Java Code Examples for org.apache.beam.sdk.transforms.Create

The following examples show how to use org.apache.beam.sdk.transforms.Create. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: AvroIOTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that {@code AvroIO} can read an upgraded version of an old class, as long as the schema
 * resolution process succeeds. This test covers the case when a new, {@code @Nullable} field
 * has been added.
 *
 * <p>For more information, see http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution
 */
@Test
@Category(NeedsRunner.class)
public void testWriteThenReadSchemaUpgrade() throws Throwable {
  List<GenericClass> values =
      ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
  File outputFile = tmpFolder.newFile("output.avro");

  writePipeline
      .apply(Create.of(values))
      .apply(
          AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding());
  writePipeline.run();

  List<GenericClassV2> expected =
      ImmutableList.of(new GenericClassV2(3, "hi", null), new GenericClassV2(5, "bar", null));

  PAssert.that(
          readPipeline.apply(
              AvroIO.read(GenericClassV2.class)
                  .withBeamSchemas(withBeamSchemas)
                  .from(outputFile.getAbsolutePath())))
      .containsInAnyOrder(expected);
  readPipeline.run();
}
 
Example 2
Source Project: beam   Source File: ParDoTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSideInputAsList() {
  PCollectionView<List<Integer>> sideInputView =
      pipeline.apply("Create sideInput", Create.of(1, 2, 3)).apply(View.asList());
  PCollection<Integer> input =
      pipeline
          .apply("Create input", Create.of(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
          .apply(
              ParDo.of(
                      new DoFn<Integer, Integer>() {
                        @ProcessElement
                        public void processElement(ProcessContext c) {
                          List<Integer> sideInputValue = c.sideInput(sideInputView);
                          if (!sideInputValue.contains(c.element())) {
                            c.output(c.element());
                          }
                        }
                      })
                  .withSideInputs(sideInputView));
  PAssert.that(input).containsInAnyOrder(4, 5, 6, 7, 8, 9, 10);
  pipeline.run();
}
 
Example 3
Source Project: beam   Source File: QueryDispositionLocationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void writeWithWriteEmptyDispositionWithEmptyTableSuccess() throws SQLException {
  FakeSnowflakeDatabase.createTable(FAKE_TABLE);

  pipeline
      .apply(Create.of(testData))
      .apply(
          "Write SnowflakeIO",
          SnowflakeIO.<Long>write()
              .withDataSourceConfiguration(dc)
              .withTable(FAKE_TABLE)
              .withStagingBucketName(options.getStagingBucketName())
              .withStorageIntegrationName(options.getStorageIntegrationName())
              .withFileNameTemplate("output*")
              .withUserDataMapper(TestUtils.getLongCsvMapper())
              .withWriteDisposition(WriteDisposition.EMPTY)
              .withSnowflakeService(snowflakeService));

  pipeline.run(options).waitUntilFinish();

  List<Long> actualData = FakeSnowflakeDatabase.getElementsAsLong(FAKE_TABLE);

  assertTrue(TestUtils.areListsEqual(testData, actualData));
}
 
Example 4
Source Project: beam   Source File: Task.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> events =
      pipeline.apply(
          Create.timestamped(
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-01T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-05T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-05T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-08T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-08T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-08T00:00:00+00:00")),
              TimestampedValue.of("event", Instant.parse("2019-06-10T00:00:00+00:00"))
          )
      );

  PCollection<KV<String, Long>> output = applyTransform(events);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example 5
Source Project: beam   Source File: TestDataflowRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testRunStreamingJobNotUsingPAssertThatSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  p.apply(Create.of(1, 2, 3));

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockStreamingMetricResponse(ImmutableMap.of()));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.run(p, mockRunner);
}
 
Example 6
Source Project: beam   Source File: TestDataflowRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
/** A streaming job that terminates with no error messages is a success. */
@Test
public void testRunStreamingJobUsingPAssertThatSucceeds() throws Exception {
  options.setStreaming(true);
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.DONE);
  when(mockJob.waitUntilFinish(any(Duration.class), any(JobMessagesHandler.class)))
      .thenReturn(State.DONE);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, true /* tentative */));
  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  runner.run(p, mockRunner);
}
 
Example 7
Source Project: beam   Source File: TestDataflowRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that when a batch job terminates in a failure state even if all assertions passed, it
 * throws an error to that effect.
 */
@Test
public void testRunBatchJobThatFails() throws Exception {
  Pipeline p = TestPipeline.create(options);
  PCollection<Integer> pc = p.apply(Create.of(1, 2, 3));
  PAssert.that(pc).containsInAnyOrder(1, 2, 3);

  DataflowPipelineJob mockJob = Mockito.mock(DataflowPipelineJob.class);
  when(mockJob.getState()).thenReturn(State.FAILED);
  when(mockJob.getProjectId()).thenReturn("test-project");
  when(mockJob.getJobId()).thenReturn("test-job");

  DataflowRunner mockRunner = Mockito.mock(DataflowRunner.class);
  when(mockRunner.run(any(Pipeline.class))).thenReturn(mockJob);

  TestDataflowRunner runner = TestDataflowRunner.fromOptionsAndClient(options, mockClient);
  when(mockClient.getJobMetrics(anyString()))
      .thenReturn(generateMockMetricResponse(true /* success */, false /* tentative */));
  expectedException.expect(RuntimeException.class);
  runner.run(p, mockRunner);
  // Note that fail throws an AssertionError which is why it is placed out here
  // instead of inside the try-catch block.
  fail("AssertionError expected");
}
 
Example 8
@Test(timeout = 30000)
public void canSendGzippedPayloads() throws Exception {
  final List<String> inputLines = Lines.resources("testdata/pubsub-integration/input.ndjson");

  pipeline.getOptions().as(DirectOptions.class).setBlockOnRun(false);

  SinkOptions sinkOptions = pipeline.getOptions().as(SinkOptions.class);
  sinkOptions.setOutputType(OutputType.pubsub);
  sinkOptions.setOutput(pipeline.newProvider(topicName.toString()));
  SinkOptions.Parsed options = SinkOptions.parseSinkOptions(sinkOptions);

  pipeline.apply(Create.of(inputLines)).apply(InputFileFormat.json.decode())
      .apply(options.getOutputType().write(options));

  final PipelineResult result = pipeline.run();

  System.err.println("Waiting for subscriber to receive messages published in the pipeline...");
  List<String> expectedLines = Lines.resources("testdata/pubsub-integration/gzipped.ndjson");
  List<String> received = receiveLines(expectedLines.size());
  assertThat(received, matchesInAnyOrder(expectedLines));
  result.cancel();
}
 
Example 9
Source Project: DataflowTemplates   Source File: PubsubToPubsubTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests whether all messages flow through when no filter is provided. */
@Test
@Category(NeedsRunner.class)
public void testNoInputFilterProvided() {
  PubsubToPubsub.Options options =
      TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class);
  PCollection<Long> pc =
      pipeline
          .apply(Create.of(allTestMessages))
          .apply(ParDo.of(ExtractAndFilterEventsFn.newBuilder().build()))
          .apply(Count.globally());

  PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(allTestMessages.size()));

  pipeline.run(options);
}
 
Example 10
Source Project: beam   Source File: DirectRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
private PTransform<PBegin, PDone> outputStartTo(StaticQueue<Integer> queue) {
  return new PTransform<PBegin, PDone>() {
    @Override
    public PDone expand(PBegin input) {
      input
          .apply(Create.of(1))
          .apply(
              MapElements.into(TypeDescriptors.voids())
                  .via(
                      in -> {
                        queue.add(in);
                        return null;
                      }));
      return PDone.in(input.getPipeline());
    }
  };
}
 
Example 11
Source Project: gcp-ingestion   Source File: GeoCityLookupTest.java    License: Mozilla Public License 2.0 6 votes vote down vote up
@Test
public void testCityAllowed() {
  final List<String> input = Arrays.asList("{\"attributeMap\":" //
      + "{\"remote_addr\":\"10.0.0.2\"" //
      + ",\"x_forwarded_for\":\"192.168.1.2, 216.160.83.56, 60.1.1.1\"" //
      + "},\"payload\":\"\"}");

  final List<String> expected = Arrays.asList("{\"attributeMap\":" //
      + "{\"geo_city\":\"Milton\"" //
      + ",\"geo_country\":\"US\"" //
      + ",\"geo_db_version\":\"2019-01-03T21:26:19Z\"" //
      + ",\"geo_subdivision1\":\"WA\"" //
      + "},\"payload\":\"\"}");

  final PCollection<String> output = pipeline //
      .apply(Create.of(input)) //
      .apply(InputFileFormat.json.decode()) //
      .apply(GeoCityLookup.of(pipeline.newProvider(MMDB),
          pipeline.newProvider("src/test/resources/cityFilters/milton.txt")))
      .apply(OutputFileFormat.json.encode());

  PAssert.that(output).containsInAnyOrder(expected);

  GeoCityLookup.clearSingletonsForTests();
  pipeline.run();
}
 
Example 12
Source Project: beam   Source File: BigtableIOTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests that a record gets written to the service and messages are logged. */
@Test
public void testWriting() throws Exception {
  final String table = "table";
  final String key = "key";
  final String value = "value";

  service.createTable(table);

  p.apply("single row", Create.of(makeWrite(key, value)).withCoder(bigtableCoder))
      .apply("write", defaultWrite.withTableId(table));
  p.run();

  logged.verifyDebug("Wrote 1 records");

  assertEquals(1, service.tables.size());
  assertNotNull(service.getTable(table));
  Map<ByteString, ByteString> rows = service.getTable(table);
  assertEquals(1, rows.size());
  assertEquals(ByteString.copyFromUtf8(value), rows.get(ByteString.copyFromUtf8(key)));
}
 
Example 13
Source Project: DataflowTemplates   Source File: PubsubToPubsubTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests whether only the valid messages flow through when a filter is provided. */
@Test
@Category(NeedsRunner.class)
public void testInputFilterProvided() {
  PubsubToPubsub.Options options =
      TestPipeline.testingPipelineOptions().as(PubsubToPubsub.Options.class);
  PCollection<Long> pc =
      pipeline
          .apply(Create.of(allTestMessages))
          .apply(
              ParDo.of(
                  ExtractAndFilterEventsFn.newBuilder()
                      .withFilterKey(options.getFilterKey())
                      .withFilterValue(options.getFilterValue())
                      .build()))
          .apply(Count.globally());

  PAssert.thatSingleton(pc).isEqualTo(Long.valueOf(goodTestMessages.size()));

  options.setFilterKey(ValueProvider.StaticValueProvider.of(FILTER_KEY));
  options.setFilterValue(ValueProvider.StaticValueProvider.of(FILTER_VALUE));

  pipeline.run(options);
}
 
Example 14
Source Project: beam   Source File: BigQueryIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testWriteToTableDecorator() throws Exception {
  TableRow row1 = new TableRow().set("name", "a").set("number", "1");
  TableRow row2 = new TableRow().set("name", "b").set("number", "2");

  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(new TableFieldSchema().setName("number").setType("INTEGER")));
  p.apply(Create.of(row1, row2))
      .apply(
          BigQueryIO.writeTableRows()
              .to("project-id:dataset-id.table-id$20171127")
              .withTestServices(fakeBqServices)
              .withMethod(BigQueryIO.Write.Method.STREAMING_INSERTS)
              .withSchema(schema)
              .withoutValidation());
  p.run();
}
 
Example 15
Source Project: beam   Source File: DirectRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the {@link
 * DirectRunner}.
 */
@Test
public void testMutatingOutputThenOutputDoFnError() throws Exception {
  Pipeline pipeline = getPipeline();

  pipeline
      .apply(Create.of(42))
      .apply(
          ParDo.of(
              new DoFn<Integer, List<Integer>>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
                  c.output(outputList);
                  outputList.set(0, 37);
                  c.output(outputList);
                }
              }));

  thrown.expect(IllegalMutationException.class);
  thrown.expectMessage("output");
  thrown.expectMessage("must not be mutated");
  pipeline.run();
}
 
Example 16
Source Project: beam   Source File: DropFieldsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@Category(NeedsRunner.class)
public void testDropTopLevelField() {
  Schema expectedSchema = Schema.builder().addStringField("field2").build();

  PCollection<Row> result =
      pipeline
          .apply(
              Create.of(simpleRow(1, "one"), simpleRow(2, "two"), simpleRow(3, "three"))
                  .withRowSchema(SIMPLE_SCHEMA))
          .apply(DropFields.fields("field1"));
  assertEquals(expectedSchema, result.getSchema());

  List<Row> expectedRows =
      Lists.newArrayList(
          Row.withSchema(expectedSchema).addValue("one").build(),
          Row.withSchema(expectedSchema).addValue("two").build(),
          Row.withSchema(expectedSchema).addValue("three").build());
  PAssert.that(result).containsInAnyOrder(expectedRows);
  pipeline.run();
}
 
Example 17
Source Project: beam   Source File: SnowflakeIOWriteTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void writeToExternalWithKVInput() {
  pipeline
      .apply(Create.of(testData))
      .apply(ParDo.of(new TestUtils.ParseToKv()))
      .apply(
          "Write SnowflakeIO",
          SnowflakeIO.<KV<String, Long>>write()
              .withDataSourceConfiguration(dc)
              .withUserDataMapper(TestUtils.getLongCsvMapperKV())
              .withTable(FAKE_TABLE)
              .withStagingBucketName(options.getStagingBucketName())
              .withStorageIntegrationName(options.getStorageIntegrationName())
              .withSnowflakeService(snowflakeService));

  pipeline.run(options).waitUntilFinish();
}
 
Example 18
Source Project: beam   Source File: TaskTest.java    License: Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("unchecked")
@Test
public void groupByKey() {
  PCollection<String> numbers =
      testPipeline.apply(
          Create.of("apple", "banana", "cherry", "durian", "guava", "melon")
      );

  PCollection<KV<String, String>> results = Task.applyTransform(numbers);

  PAssert.that(results)
      .containsInAnyOrder(
          KV.of("a", "apple"),
          KV.of("b", "banana"),
          KV.of("c", "cherry"),
          KV.of("d", "durian"),
          KV.of("g", "guava"),
          KV.of("m", "melon")
      );

  testPipeline.run().waitUntilFinish();
}
 
Example 19
Source Project: beam   Source File: CacheTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldCacheTest() {
  SparkPipelineOptions options = createOptions();
  options.setCacheDisabled(true);
  Pipeline pipeline = Pipeline.create(options);

  Values<String> valuesTransform = Create.of("foo", "bar");
  PCollection pCollection = mock(PCollection.class);

  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options);
  ctxt.getCacheCandidates().put(pCollection, 2L);

  assertFalse(ctxt.shouldCache(valuesTransform, pCollection));

  options.setCacheDisabled(false);
  assertTrue(ctxt.shouldCache(valuesTransform, pCollection));

  GroupByKey<String, String> gbkTransform = GroupByKey.create();
  assertFalse(ctxt.shouldCache(gbkTransform, pCollection));
}
 
Example 20
Source Project: beam   Source File: TaskTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void flatten() {
  PCollection<String> wordsStartingWithA =
      testPipeline.apply("Words starting with A",
          Create.of("apple", "ant", "arrow"));
  PCollection<String> wordsStartingWithB =
      testPipeline.apply("Words starting with B",
          Create.of("ball", "book", "bow"));

  PCollection<String> results = Task.applyTransform(wordsStartingWithA, wordsStartingWithB);

  PAssert.that(results)
      .containsInAnyOrder("apple", "ant", "arrow", "ball", "book", "bow");

  testPipeline.run().waitUntilFinish();
}
 
Example 21
Source Project: logparser   Source File: TestParserDoFnClass.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testClassDefinition() throws Exception {
    List<String> logLines = Collections.singletonList(TestCase.getInputLine());

    // Apply Create, passing the list and the coder, to create the PCollection.
    PCollection<String> input = pipeline.apply(Create.of(logLines)).setCoder(StringUtf8Coder.of());

    PCollection<TestRecord> filledTestRecords = input
        .apply("Extract Elements from logline",
            ParDo.of(new MyParserDoFn()));

    TestRecord expected = new TestRecord().setFullValid();

    PAssert.that(filledTestRecords).containsInAnyOrder(expected);

    pipeline.run().waitUntilFinish();
}
 
Example 22
Source Project: beam   Source File: PortablePipelineDotRendererTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testCompositePipeline() {
  p.apply(Create.timestamped(TimestampedValue.of(KV.of(1, 1), new Instant(1))))
      .apply(Window.into(FixedWindows.of(Duration.millis(10))))
      .apply(Sum.integersPerKey());

  assertEquals(
      "digraph {"
          + "    rankdir=LR"
          + "    0 [label=\"Create.TimestampedValues\\n\"]"
          + "    1 [label=\"Window.Into()\\n\"]"
          + "    0 -> 1 [style=solid label=\"Create.TimestampedValues/ParDo(ConvertTimestamps)/ParMultiDo(ConvertTimestamps).output\"]"
          + "    2 [label=\"Combine.perKey(SumInteger)\\nbeam:transform:combine_per_key:v1\"]"
          + "    1 -> 2 [style=solid label=\"Window.Into()/Window.Assign.out\"]"
          + "}",
      PortablePipelineDotRenderer.toDotString(PipelineTranslation.toProto(p))
          .replaceAll(System.lineSeparator(), ""));
}
 
Example 23
Source Project: component-runtime   Source File: IndexedRecordToJsonTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void test() {
    PAssert
            .that(pipeline
                    .apply(Create
                            .of(newIndexedRecord("first"), newIndexedRecord("second"))
                            .withCoder(AvroCoder.of(IndexedRecord.class, getSchema())))
                    .apply(new IndexedRecordToJson()))
            .satisfies(values -> {
                assertEquals(asList("first", "second"),
                        StreamSupport
                                .stream(values.spliterator(), false)
                                .map(k -> k.getString("name"))
                                .sorted()
                                .collect(toList()));
                return null;
            });
    assertEquals(PipelineResult.State.DONE, pipeline.run().waitUntilFinish());
}
 
Example 24
Source Project: component-runtime   Source File: Pipelines.java    License: Apache License 2.0 6 votes vote down vote up
static PCollection<Record> buildBasePipeline(final TestPipeline pipeline) {
    final RecordBuilderFactory factory = new AvroRecordBuilderFactoryProvider().apply(null);
    return pipeline
            .apply(Create.of("a", "b"))
            .apply(MapElements.into(TypeDescriptor.of(Record.class)).via((String input) -> {
                final Record b1 = factory.newRecordBuilder().withString("foo", input).build();
                final Record b2 = factory.newRecordBuilder().withString("bar", input).build();
                return factory
                        .newRecordBuilder()
                        .withArray(factory
                                .newEntryBuilder()
                                .withName("b1")
                                .withType(Schema.Type.ARRAY)
                                .withElementSchema(b1.getSchema())
                                .build(), singletonList(b1))
                        .withArray(factory
                                .newEntryBuilder()
                                .withName("b2")
                                .withType(Schema.Type.ARRAY)
                                .withElementSchema(b2.getSchema())
                                .build(), singletonList(b2))
                        .build();
            }))
            .setCoder(SchemaRegistryCoder.of());
}
 
Example 25
private PCollection<FileShard> runFileShardingPipeline(Metadata fileMetadata, int splitSize) {

    PCollectionView<Map<String, String>> filenamesToTableNamesMapView =
        p.apply(
                "Create File/Table names Map",
                Create.of(
                    ImmutableMap.<String, String>of(
                        fileMetadata.resourceId().toString(), "testtable")))
            .apply(View.asMap());

    return p.apply("Create Metadata", Create.of(fileMetadata))
        .apply(FileIO.readMatches())
        // Pcollection<FileIO.ReadableFile>
        .apply(
            "Split into ranges",
            ParDo.of(new SplitIntoRangesFn(splitSize, filenamesToTableNamesMapView))
                .withSideInputs(filenamesToTableNamesMapView))
        .setCoder(FileShard.Coder.of());
  }
 
Example 26
Source Project: beam   Source File: DataflowRunnerTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTransformTranslatorMissing() throws IOException {
  DataflowPipelineOptions options = buildPipelineOptions();
  Pipeline p = Pipeline.create(options);

  p.apply(Create.of(Arrays.asList(1, 2, 3))).apply(new TestTransform());

  thrown.expect(IllegalStateException.class);
  thrown.expectMessage(containsString("no translator registered"));
  SdkComponents sdkComponents = SdkComponents.create(options);
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(p, sdkComponents, true);
  DataflowPipelineTranslator.fromOptions(options)
      .translate(
          p,
          pipelineProto,
          sdkComponents,
          DataflowRunner.fromOptions(options),
          Collections.emptyList());

  ArgumentCaptor<Job> jobCaptor = ArgumentCaptor.forClass(Job.class);
  Mockito.verify(mockJobs).create(eq(PROJECT_ID), eq(REGION_ID), jobCaptor.capture());
  assertValidJob(jobCaptor.getValue());
}
 
Example 27
Source Project: beam   Source File: ApproximateDistinctTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void bigCardinality() {
  final int cardinality = 15000;
  final int p = 15;
  final int sp = 20;
  final double expectedErr = 1.04 / Math.sqrt(p);

  List<Integer> stream = new ArrayList<>();
  for (int i = 1; i <= cardinality; i++) {
    stream.addAll(Collections.nCopies(2, i));
  }
  Collections.shuffle(stream);

  PCollection<Long> res =
      tp.apply("big stream", Create.of(stream))
          .apply(
              "big cardinality",
              ApproximateDistinct.<Integer>globally().withPrecision(p).withSparsePrecision(sp));

  PAssert.that("Verify Accuracy for big cardinality", res)
      .satisfies(new VerifyAccuracy(cardinality, expectedErr));

  tp.run();
}
 
Example 28
Source Project: beam   Source File: PAssertTest.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Test that {@code PAssert.thatIterable().hashCode()} is unsupported. See {@link
 * #testPAssertEqualsIterableUnsupported}.
 */
@SuppressWarnings("deprecation") // test of deprecated function
@Test
public void testPAssertHashCodeIterableUnsupported() throws Exception {
  thrown.expect(UnsupportedOperationException.class);
  thrown.expectMessage(".hashCode() is not supported.");

  PCollection<Integer> pcollection = pipeline.apply(Create.of(42));
  PAssert.that(pcollection).hashCode();
}
 
Example 29
Source Project: DataflowTemplates   Source File: DatastoreConvertersTest.java    License: Apache License 2.0 5 votes vote down vote up
/** Unit test for {@link DatastoreConverters.CheckSameKey}. */
@Test
@Category(NeedsRunner.class)
public void testCheckSameKey() throws Exception {
  Entity dupKeyEntity = Entity.newBuilder()
      .setKey(entities.get(0).getKey())
      .putProperties("SomeBSProp", Value.newBuilder().setStringValue("Some BS Value").build())
      .build();

  // copy all entities
  ArrayList<Entity> testEntitiesWithConflictKey  = new ArrayList<>(entities);

  // Add the duplicate entity at the end of the list
  testEntitiesWithConflictKey.add(dupKeyEntity);

  List<String> expectedErrors = new ArrayList<>();
  EntityJsonPrinter entityJsonPrinter = new EntityJsonPrinter();
  for (Entity e : Arrays.asList(entities.get(0), dupKeyEntity)) {
    expectedErrors.add(ErrorMessage.newBuilder()
        .setMessage("Duplicate Datastore Key")
        .setData(entityJsonPrinter.print(e))
        .build()
        .toJson());
  }

  TupleTag<Entity> goodTag = new TupleTag<Entity>("entities"){};
  TupleTag<String> errorTag = new TupleTag<String>("errors"){};

  PCollectionTuple results = pipeline
      .apply("Create", Create.of(testEntitiesWithConflictKey))
      .apply("RemoveDupKeys", CheckSameKey.newBuilder()
          .setGoodTag(goodTag)
          .setErrorTag(errorTag)
          .build());

  PAssert.that(results.get(goodTag)).containsInAnyOrder(entities.subList(1, entities.size()));
  PAssert.that(results.get(errorTag)).containsInAnyOrder(expectedErrors);

  pipeline.run();
}
 
Example 30
Source Project: beam   Source File: CassandraIOTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testDelete() throws Exception {
  List<Row> results = getRows(CASSANDRA_TABLE);
  assertEquals(NUM_ROWS, results.size());

  Scientist einstein = new Scientist();
  einstein.id = 0;
  einstein.name = "Einstein";
  pipeline
      .apply(Create.of(einstein))
      .apply(
          CassandraIO.<Scientist>delete()
              .withHosts(Collections.singletonList(CASSANDRA_HOST))
              .withPort(cassandraPort)
              .withKeyspace(CASSANDRA_KEYSPACE)
              .withEntity(Scientist.class));

  pipeline.run();
  results = getRows(CASSANDRA_TABLE);
  assertEquals(NUM_ROWS - 1, results.size());
  // re-insert suppressed doc to make the test autonomous
  session.execute(
      String.format(
          "INSERT INTO %s.%s(person_id, person_name) values("
              + einstein.id
              + ", '"
              + einstein.name
              + "');",
          CASSANDRA_KEYSPACE,
          CASSANDRA_TABLE));
}