org.apache.beam.sdk.options.PipelineOptionsFactory Java Examples

The following examples show how to use org.apache.beam.sdk.options.PipelineOptionsFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: UserScore.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Run a batch pipeline. */
// [START DocInclude_USMain]
public static void main(String[] args) throws Exception {
  // Begin constructing a pipeline configured by commandline flags.
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  // Read events from a text file and parse them.
  pipeline
      .apply(TextIO.read().from(options.getInput()))
      .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
      .apply(
          "WriteUserScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), false));

  // Run the batch pipeline.
  pipeline.run().waitUntilFinish();
}
 
Example #2
Source File: CompressedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Test a concatenation of gzip files is correctly decompressed.
 *
 * <p>A concatenation of gzip files as one file is a valid gzip file and should decompress to be
 * the concatenation of those individual files.
 */
@Test
public void testReadConcatenatedGzip() throws IOException {
  byte[] header = "a,b,c\n".getBytes(StandardCharsets.UTF_8);
  byte[] body = "1,2,3\n4,5,6\n7,8,9\n".getBytes(StandardCharsets.UTF_8);
  byte[] expected = concat(header, body);
  byte[] totalGz = concat(compressGzip(header), compressGzip(body));
  File tmpFile = tmpFolder.newFile();
  try (FileOutputStream os = new FileOutputStream(tmpFile)) {
    os.write(totalGz);
  }

  CompressedSource<Byte> source =
      CompressedSource.from(new ByteSource(tmpFile.getAbsolutePath(), 1))
          .withDecompression(CompressionMode.GZIP);
  List<Byte> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
  assertEquals(Bytes.asList(expected), actual);
}
 
Example #3
Source File: BigtableIOTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadWithRuntimeParametersValidationDisabled() {
  ReadOptions options = PipelineOptionsFactory.fromArgs().withValidation().as(ReadOptions.class);

  BigtableIO.Read read =
      BigtableIO.read()
          .withoutValidation()
          .withProjectId(options.getBigtableProject())
          .withInstanceId(options.getBigtableInstanceId())
          .withTableId(options.getBigtableTableId());

  // Not running a pipeline therefore this is expected.
  thrown.expect(PipelineRunMissingException.class);

  p.apply(read);
}
 
Example #4
Source File: DatastoreToText.java    From DataflowTemplates with Apache License 2.0 6 votes vote down vote up
/**
 * Runs a pipeline which reads in Entities from Datastore, passes in the JSON encoded Entities
 * to a Javascript UDF, and writes the JSON to TextIO sink.
 *
 * @param args arguments to the pipeline
 */
public static void main(String[] args) {
  DatastoreToTextOptions options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(DatastoreToTextOptions.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply(ReadJsonEntities.newBuilder()
          .setGqlQuery(options.getDatastoreReadGqlQuery())
          .setProjectId(options.getDatastoreReadProjectId())
          .setNamespace(options.getDatastoreReadNamespace())
          .build())
      .apply(TransformTextViaJavascript.newBuilder()
          .setFileSystemPath(options.getJavascriptTextTransformGcsPath())
          .setFunctionName(options.getJavascriptTextTransformFunctionName())
          .build())
      .apply(TextIO.write()
          .to(options.getTextWritePrefix())
          .withSuffix(".json"));

  pipeline.run();
}
 
Example #5
Source File: GrpcWindmillServer.java    From beam with Apache License 2.0 6 votes vote down vote up
private GrpcWindmillServer(String name, boolean enableStreamingEngine) {
  this.options = PipelineOptionsFactory.create().as(StreamingDataflowWorkerOptions.class);
  this.streamingRpcBatchLimit = Integer.MAX_VALUE;
  options.setProject("project");
  options.setJobId("job");
  options.setWorkerId("worker");
  if (enableStreamingEngine) {
    List<String> experiments = this.options.getExperiments();
    if (experiments == null) {
      experiments = new ArrayList<>();
    }
    experiments.add(GcpOptions.STREAMING_ENGINE_EXPERIMENT);
    options.setExperiments(experiments);
  }
  this.stubList.add(CloudWindmillServiceV1Alpha1Grpc.newStub(inProcessChannel(name)));
}
 
Example #6
Source File: Task.java    From beam with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  String[] lines = {
      "apple orange grape banana apple banana",
      "banana orange banana papaya"
  };

  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> wordCounts =
      pipeline.apply(Create.of(Arrays.asList(lines)));

  PCollection<String> output = applyTransform(wordCounts);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example #7
Source File: FlinkExecutionEnvironmentsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldSupportIPv6Batch() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(FlinkRunner.class);

  options.setFlinkMaster("[FE80:CD00:0000:0CDE:1257:0000:211E:729C]:1234");
  ExecutionEnvironment bev =
      FlinkExecutionEnvironments.createBatchExecutionEnvironment(
          options, Collections.emptyList());
  checkHostAndPort(bev, "FE80:CD00:0000:0CDE:1257:0000:211E:729C", 1234);

  options.setFlinkMaster("FE80:CD00:0000:0CDE:1257:0000:211E:729C");
  bev =
      FlinkExecutionEnvironments.createBatchExecutionEnvironment(
          options, Collections.emptyList());
  checkHostAndPort(
      bev, "FE80:CD00:0000:0CDE:1257:0000:211E:729C", RestOptions.PORT.defaultValue());
}
 
Example #8
Source File: Task.java    From beam with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<Event> events =
      pipeline.apply(
          Create.of(
              new Event("1", "book-order", DateTime.parse("2019-06-01T00:00:00+00:00")),
              new Event("2", "pencil-order", DateTime.parse("2019-06-02T00:00:00+00:00")),
              new Event("3", "paper-order", DateTime.parse("2019-06-03T00:00:00+00:00")),
              new Event("4", "pencil-order", DateTime.parse("2019-06-04T00:00:00+00:00")),
              new Event("5", "book-order", DateTime.parse("2019-06-05T00:00:00+00:00"))
          )
      );

  PCollection<Event> output = applyTransform(events);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example #9
Source File: ElasticsearchIOIT.java    From beam with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  PipelineOptionsFactory.register(ElasticsearchPipelineOptions.class);
  options = TestPipeline.testingPipelineOptions().as(ElasticsearchPipelineOptions.class);
  readConnectionConfiguration =
      ElasticsearchIOITCommon.getConnectionConfiguration(
          options, ElasticsearchIOITCommon.IndexMode.READ);
  writeConnectionConfiguration =
      ElasticsearchIOITCommon.getConnectionConfiguration(
          options, ElasticsearchIOITCommon.IndexMode.WRITE);
  updateConnectionConfiguration =
      ElasticsearchIOITCommon.getConnectionConfiguration(
          options, ElasticsearchIOITCommon.IndexMode.WRITE_PARTIAL);
  restClient = readConnectionConfiguration.createClient();
  elasticsearchIOTestCommon =
      new ElasticsearchIOTestCommon(readConnectionConfiguration, restClient, true);
}
 
Example #10
Source File: FileBasedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testReadEverythingFromFileWithSplits() throws IOException {
  PipelineOptions options = PipelineOptionsFactory.create();
  String header = "<h>";
  List<String> data = new ArrayList<>();
  for (int i = 0; i < 10; i++) {
    data.add(header);
    data.addAll(createStringDataset(3, 9));
  }
  String fileName = "file";
  File file = createFileWithData(fileName, data);

  TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, header);

  List<String> expectedResults = new ArrayList<>();
  expectedResults.addAll(data);
  // Remove all occurrences of header from expected results.
  expectedResults.removeAll(Collections.singletonList(header));

  assertEquals(expectedResults, readFromSource(source, options));
}
 
Example #11
Source File: FlinkPipelineExecutionEnvironmentTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void shouldUseTransformOverrides() {
  boolean[] testParameters = {true, false};
  for (boolean streaming : testParameters) {
    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
    options.setStreaming(streaming);
    options.setRunner(FlinkRunner.class);
    FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options);
    Pipeline p = Mockito.spy(Pipeline.create(options));

    flinkEnv.translate(p);

    ArgumentCaptor<ImmutableList> captor = ArgumentCaptor.forClass(ImmutableList.class);
    Mockito.verify(p).replaceAll(captor.capture());
    ImmutableList<PTransformOverride> overridesList = captor.getValue();

    assertThat(overridesList.isEmpty(), is(false));
    assertThat(
        overridesList.size(), is(FlinkTransformOverrides.getDefaultOverrides(options).size()));
  }
}
 
Example #12
Source File: GoogleApiDebugOptionsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testMatchingAgainstClient() throws Exception {
  GcsOptions options = PipelineOptionsFactory.as(GcsOptions.class);
  options.setGcpCredential(new TestCredential());
  options.setGoogleApiTrace(
      new GoogleApiTracer()
          .addTraceFor(Transport.newStorageClient(options).build(), "TraceDestination"));

  Storage.Objects.Get getRequest =
      Transport.newStorageClient(options).build().objects().get("testBucketId", "testObjectId");
  assertEquals("TraceDestination", getRequest.get("$trace"));

  Delete deleteRequest =
      GcpOptions.GcpTempLocationFactory.newCloudResourceManagerClient(
              options.as(CloudResourceManagerOptions.class))
          .build()
          .projects()
          .delete("testProjectId");
  assertNull(deleteRequest.get("$trace"));
}
 
Example #13
Source File: FileBasedSourceTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitAtFraction() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  File file = createFileWithData("file", createStringDataset(3, 100));

  Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
  TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
  // Shouldn't be able to split while unstarted.
  assertSplitAtFractionFails(source, 0, 0.7, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
  assertSplitAtFractionFails(source, 0, 0.0, options);
  assertSplitAtFractionFails(source, 70, 0.3, options);
  assertSplitAtFractionFails(source, 100, 1.0, options);
  assertSplitAtFractionFails(source, 100, 0.99, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
}
 
Example #14
Source File: NexmarkUtilsTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void testTableName(
    NexmarkUtils.ResourceNameMode nameMode,
    String baseTableName,
    String queryName,
    Long salt,
    String version,
    Class runner,
    Boolean isStreaming,
    final String expected) {
  NexmarkOptions options = PipelineOptionsFactory.as(NexmarkOptions.class);
  options.setResourceNameMode(nameMode);
  options.setBigQueryTable(baseTableName);
  options.setRunner(runner);
  options.setStreaming(isStreaming);

  String tableName = NexmarkUtils.tableName(options, queryName, salt, version);

  assertEquals(expected, tableName);
}
 
Example #15
Source File: BigQueryServicesImplTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/** Tests that table creation succeeds when the table already exists. */
@Test
public void testCreateTableSucceedsAlreadyExists() throws IOException {
  TableReference ref =
      new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema().setName("column1").setType("String"),
                  new TableFieldSchema().setName("column2").setType("Integer")));
  Table testTable = new Table().setTableReference(ref).setSchema(schema);

  when(response.getStatusCode()).thenReturn(409); // 409 means already exists

  BigQueryServicesImpl.DatasetServiceImpl services =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
  Table ret =
      services.tryCreateTable(
          testTable, new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF), Sleeper.DEFAULT);

  assertNull(ret);
  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
}
 
Example #16
Source File: ParDoEvaluatorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private ParDoEvaluator<Integer> createEvaluator(
    PCollectionView<Integer> singletonView,
    RecorderFn fn,
    PCollection<Integer> input,
    PCollection<Integer> output) {
  when(evaluationContext.createSideInputReader(ImmutableList.of(singletonView)))
      .thenReturn(new ReadyInGlobalWindowReader());
  DirectExecutionContext executionContext = mock(DirectExecutionContext.class);
  DirectStepContext stepContext = mock(DirectStepContext.class);
  when(executionContext.getStepContext(Mockito.any(String.class))).thenReturn(stepContext);
  when(stepContext.getTimerUpdate()).thenReturn(TimerUpdate.empty());
  when(evaluationContext.getExecutionContext(
          Mockito.any(AppliedPTransform.class), Mockito.any(StructuralKey.class)))
      .thenReturn(executionContext);

  DirectGraphs.performDirectOverrides(p);
  @SuppressWarnings("unchecked")
  AppliedPTransform<PCollection<Integer>, ?, ?> transform =
      (AppliedPTransform<PCollection<Integer>, ?, ?>) DirectGraphs.getProducer(output);
  return ParDoEvaluator.create(
      evaluationContext,
      PipelineOptionsFactory.create(),
      stepContext,
      transform,
      input.getCoder(),
      input.getWindowingStrategy(),
      fn,
      null /* key */,
      ImmutableList.of(singletonView),
      mainOutputTag,
      additionalOutputTags,
      ImmutableMap.of(mainOutputTag, output),
      DoFnSchemaInformation.create(),
      Collections.emptyMap(),
      ParDoEvaluator.defaultRunnerFactory());
}
 
Example #17
Source File: FlinkExecutionEnvironmentsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldAcceptExplicitlySetIdleSourcesFlagWithoutCheckpointing() {
  // Checkpointing disabled, accept flag
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setShutdownSourcesAfterIdleMs(42L);
  FlinkExecutionEnvironments.createStreamExecutionEnvironment(options, Collections.emptyList());
  assertThat(options.getShutdownSourcesAfterIdleMs(), is(42L));
}
 
Example #18
Source File: CreateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSourceSplitVoid() throws Exception {
  CreateSource<Void> source =
      CreateSource.fromIterable(Lists.newArrayList(null, null, null, null, null), VoidCoder.of());
  PipelineOptions options = PipelineOptionsFactory.create();
  List<? extends BoundedSource<Void>> splitSources = source.split(3, options);
  SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options);
}
 
Example #19
Source File: GcsUtilTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleThreadsCanCompleteOutOfOrderWithDefaultThreadPool() throws Exception {
  GcsOptions pipelineOptions = PipelineOptionsFactory.as(GcsOptions.class);
  ExecutorService executorService = pipelineOptions.getExecutorService();

  int numThreads = 100;
  final CountDownLatch[] countDownLatches = new CountDownLatch[numThreads];
  for (int i = 0; i < numThreads; i++) {
    final int currentLatch = i;
    countDownLatches[i] = new CountDownLatch(1);
    executorService.execute(
        () -> {
          // Wait for latch N and then release latch N - 1
          try {
            countDownLatches[currentLatch].await();
            if (currentLatch > 0) {
              countDownLatches[currentLatch - 1].countDown();
            }
          } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            throw new RuntimeException(e);
          }
        });
  }

  // Release the last latch starting the chain reaction.
  countDownLatches[countDownLatches.length - 1].countDown();
  executorService.shutdown();
  assertTrue(
      "Expected tasks to complete", executorService.awaitTermination(10, TimeUnit.SECONDS));
}
 
Example #20
Source File: ConfigGeneratorTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBeamStoreConfig() {
  SamzaPipelineOptions options = PipelineOptionsFactory.create().as(SamzaPipelineOptions.class);
  options.setJobName("TestStoreConfig");
  options.setRunner(SamzaRunner.class);

  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(Create.of(1, 2, 3)).apply(Sum.integersGlobally());

  pipeline.replaceAll(SamzaTransformOverrides.getDefaultOverrides());

  final Map<PValue, String> idMap = PViewToIdMapper.buildIdMap(pipeline);
  final ConfigBuilder configBuilder = new ConfigBuilder(options);
  SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
  final Config config = configBuilder.build();

  assertEquals(
      RocksDbKeyValueStorageEngineFactory.class.getName(),
      config.get("stores.beamStore.factory"));
  assertEquals("byteArraySerde", config.get("stores.beamStore.key.serde"));
  assertEquals("byteSerde", config.get("stores.beamStore.msg.serde"));
  assertNull(config.get("stores.beamStore.changelog"));

  options.setStateDurable(true);
  SamzaPipelineTranslator.createConfig(pipeline, options, idMap, configBuilder);
  final Config config2 = configBuilder.build();
  assertEquals(
      "TestStoreConfig-1-beamStore-changelog", config2.get("stores.beamStore.changelog"));
}
 
Example #21
Source File: PubSubToElasticsearch.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Main entry point for executing the pipeline.
 *
 * @param args The command-line arguments to the pipeline.
 */
public static void main(String[] args) {

  // Parse the user options passed from the command-line.
  PubSubToElasticsearchOptions pubSubToElasticsearchOptions =
      PipelineOptionsFactory.fromArgs(args)
          .withValidation()
          .as(PubSubToElasticsearchOptions.class);

  run(pubSubToElasticsearchOptions);
}
 
Example #22
Source File: BigQueryToElasticsearchTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/** Test the {@link BigQueryToElasticsearch} pipeline end-to-end. */
@Test
public void testBigQueryToElasticsearchE2E() {

  BigQueryToElasticsearch.BigQueryToElasticsearchReadOptions options =
      PipelineOptionsFactory.create()
          .as(BigQueryToElasticsearch.BigQueryToElasticsearchReadOptions.class);

  options.setNodeAddresses("http://my-node");
  options.setIndex("test");
  options.setDocumentType("_doc");
  options.setInputTableSpec("my-project:my-dataset.my-table");
  options.setQuery(null);

  // Build pipeline
  PCollection<String> testStrings =
      pipeline
          .apply("CreateInput", Create.of(rows))
          .apply("TestTableRowToJson", ParDo.of(new BigQueryConverters.TableRowToJsonFn()));

  PAssert.that(testStrings)
      .satisfies(
          collection -> {
            String result = collection.iterator().next();
            assertThat(result, is(equalTo(jsonifiedTableRow)));
            return null;
          });

  // Execute pipeline
  pipeline.run();
}
 
Example #23
Source File: FlinkDoFnFunctionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testAccumulatorRegistrationOnOperatorClose() throws Exception {
  FlinkDoFnFunction doFnFunction =
      new TestDoFnFunction(
          "step",
          WindowingStrategy.globalDefault(),
          Collections.emptyMap(),
          PipelineOptionsFactory.create(),
          Collections.emptyMap(),
          new TupleTag<>(),
          null,
          Collections.emptyMap(),
          DoFnSchemaInformation.create(),
          Collections.emptyMap());

  doFnFunction.open(new Configuration());

  String metricContainerFieldName = "metricContainer";
  FlinkMetricContainer monitoredContainer =
      Mockito.spy(
          (FlinkMetricContainer)
              Whitebox.getInternalState(doFnFunction, metricContainerFieldName));
  Whitebox.setInternalState(doFnFunction, metricContainerFieldName, monitoredContainer);

  doFnFunction.close();
  Mockito.verify(monitoredContainer).registerMetricsForPipelineResult();
}
 
Example #24
Source File: CombineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() {
  SparkStructuredStreamingPipelineOptions options =
      PipelineOptionsFactory.create().as(SparkStructuredStreamingPipelineOptions.class);
  options.setRunner(SparkStructuredStreamingRunner.class);
  options.setTestMode(true);
  pipeline = Pipeline.create(options);
}
 
Example #25
Source File: AvroByteReaderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testRequestDynamicSplitExhaustive() throws Exception {
  List<List<String>> elements = generateInputBlocks(5, 10 * 10, 10);
  Coder<String> coder = StringUtf8Coder.of();
  AvroFileInfo<String> fileInfo = initInputFile(elements, coder);
  AvroByteReader<String> reader =
      new AvroByteReader<String>(
          fileInfo.filename, 0L, Long.MAX_VALUE, coder, PipelineOptionsFactory.create());
  for (float splitFraction = 0.0F; splitFraction < 1.0F; splitFraction += 0.02F) {
    for (int recordsToRead = 0; recordsToRead <= 500; recordsToRead += 5) {
      testRequestDynamicSplitInternal(
          reader, splitFraction, recordsToRead, SplitVerificationBehavior.DO_NOT_VERIFY);
    }
  }
}
 
Example #26
Source File: CreateTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testSourceSplit() throws Exception {
  CreateSource<Integer> source =
      CreateSource.fromIterable(
          ImmutableList.of(1, 2, 3, 4, 5, 6, 7, 8), BigEndianIntegerCoder.of());
  PipelineOptions options = PipelineOptionsFactory.create();
  List<? extends BoundedSource<Integer>> splitSources = source.split(12, options);
  assertThat(splitSources, hasSize(3));
  SourceTestUtils.assertSourcesEqualReferenceSource(source, splitSources, options);
}
 
Example #27
Source File: DataflowRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testGcsStagingLocationInitialization() throws Exception {
  // Set temp location (required), and check that staging location is set.
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setTempLocation(VALID_TEMP_BUCKET);
  options.setProject(PROJECT_ID);
  options.setRegion(REGION_ID);
  options.setGcpCredential(new TestCredential());
  options.setGcsUtil(mockGcsUtil);
  options.setRunner(DataflowRunner.class);

  DataflowRunner.fromOptions(options);

  assertNotNull(options.getStagingLocation());
}
 
Example #28
Source File: AvroByteReaderTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private <T> List<T> readElems(
    String filename, long startOffset, long endOffset, Coder<T> coder, List<Integer> actualSizes)
    throws Exception {
  AvroByteReader<T> avroReader =
      new AvroByteReader<>(
          filename, startOffset, endOffset, coder, PipelineOptionsFactory.create());
  new ExecutorTestUtils.TestReaderObserver(avroReader, actualSizes);
  return readAllFromReader(avroReader);
}
 
Example #29
Source File: FlinkExecutionEnvironmentsTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void shouldInferParallelismFromEnvironmentStreaming() throws IOException {
  String confDir = extractFlinkConfig();

  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(TestFlinkRunner.class);
  options.setFlinkMaster("host:80");

  StreamExecutionEnvironment sev =
      FlinkExecutionEnvironments.createStreamExecutionEnvironment(
          options, Collections.emptyList(), confDir);

  assertThat(options.getParallelism(), is(23));
  assertThat(sev.getParallelism(), is(23));
}
 
Example #30
Source File: KafkaToGCSTest.java    From DataflowTemplates with Apache License 2.0 5 votes vote down vote up
/**
 * Test whether {@link FileFormatFactory} maps the output file format to the transform to be
 * carried out. And throws illegal argument exception if invalid file format is passed.
 */
@Test
@Category(NeedsRunner.class)
public void testFileFormatFactoryInvalid() {

  // Create the test input.
  final String key = "Name";
  final String value = "Generic";
  final KV<String, String> message = KV.of(key, value);

  final String outputDirectory = "gs://bucket_name/path/to/output-location";
  final String outputFileFormat = "json".toUpperCase();
  final String outputFilenamePrefix = "output";
  final Integer numShards = 1;
  final String tempOutputDirectory = "gs://bucket_name/path/to/temp-location";

  KafkaToGCSOptions options = PipelineOptionsFactory.create().as(KafkaToGCSOptions.class);

  options.setOutputFileFormat(outputFileFormat);
  options.setOutputDirectory(outputDirectory);
  options.setOutputFilenamePrefix(outputFilenamePrefix);
  options.setNumShards(numShards);
  options.setTempLocation(tempOutputDirectory);

  exception.expect(IllegalArgumentException.class);

  PCollection<KV<String, String>> records =
      pipeline.apply(
          "CreateInput",
          Create.of(message).withCoder(KvCoder.of(StringUtf8Coder.of(), StringUtf8Coder.of())));

  records.apply("WriteToGCS", FileFormatFactory.newBuilder().setOptions(options).build());

  // Run the pipeline.
  pipeline.run();
}