Java Code Examples for org.apache.beam.sdk.options.PipelineOptionsFactory

The following examples show how to use org.apache.beam.sdk.options.PipelineOptionsFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: beam   Source File: GrpcWindmillServer.java    License: Apache License 2.0 6 votes vote down vote up
private GrpcWindmillServer(String name, boolean enableStreamingEngine) {
  this.options = PipelineOptionsFactory.create().as(StreamingDataflowWorkerOptions.class);
  this.streamingRpcBatchLimit = Integer.MAX_VALUE;
  options.setProject("project");
  options.setJobId("job");
  options.setWorkerId("worker");
  if (enableStreamingEngine) {
    List<String> experiments = this.options.getExperiments();
    if (experiments == null) {
      experiments = new ArrayList<>();
    }
    experiments.add(GcpOptions.STREAMING_ENGINE_EXPERIMENT);
    options.setExperiments(experiments);
  }
  this.stubList.add(CloudWindmillServiceV1Alpha1Grpc.newStub(inProcessChannel(name)));
}
 
Example 2
Source Project: beam   Source File: ElasticsearchIOIT.java    License: Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  PipelineOptionsFactory.register(ElasticsearchPipelineOptions.class);
  options = TestPipeline.testingPipelineOptions().as(ElasticsearchPipelineOptions.class);
  readConnectionConfiguration =
      ElasticsearchIOITCommon.getConnectionConfiguration(
          options, ElasticsearchIOITCommon.IndexMode.READ);
  writeConnectionConfiguration =
      ElasticsearchIOITCommon.getConnectionConfiguration(
          options, ElasticsearchIOITCommon.IndexMode.WRITE);
  updateConnectionConfiguration =
      ElasticsearchIOITCommon.getConnectionConfiguration(
          options, ElasticsearchIOITCommon.IndexMode.WRITE_PARTIAL);
  restClient = readConnectionConfiguration.createClient();
  elasticsearchIOTestCommon =
      new ElasticsearchIOTestCommon(readConnectionConfiguration, restClient, true);
}
 
Example 3
@Test
public void shouldUseTransformOverrides() {
  boolean[] testParameters = {true, false};
  for (boolean streaming : testParameters) {
    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
    options.setStreaming(streaming);
    options.setRunner(FlinkRunner.class);
    FlinkPipelineExecutionEnvironment flinkEnv = new FlinkPipelineExecutionEnvironment(options);
    Pipeline p = Mockito.spy(Pipeline.create(options));

    flinkEnv.translate(p);

    ArgumentCaptor<ImmutableList> captor = ArgumentCaptor.forClass(ImmutableList.class);
    Mockito.verify(p).replaceAll(captor.capture());
    ImmutableList<PTransformOverride> overridesList = captor.getValue();

    assertThat(overridesList.isEmpty(), is(false));
    assertThat(
        overridesList.size(), is(FlinkTransformOverrides.getDefaultOverrides(options).size()));
  }
}
 
Example 4
Source Project: beam   Source File: GoogleApiDebugOptionsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMatchingAgainstClient() throws Exception {
  GcsOptions options = PipelineOptionsFactory.as(GcsOptions.class);
  options.setGcpCredential(new TestCredential());
  options.setGoogleApiTrace(
      new GoogleApiTracer()
          .addTraceFor(Transport.newStorageClient(options).build(), "TraceDestination"));

  Storage.Objects.Get getRequest =
      Transport.newStorageClient(options).build().objects().get("testBucketId", "testObjectId");
  assertEquals("TraceDestination", getRequest.get("$trace"));

  Delete deleteRequest =
      GcpOptions.GcpTempLocationFactory.newCloudResourceManagerClient(
              options.as(CloudResourceManagerOptions.class))
          .build()
          .projects()
          .delete("testProjectId");
  assertNull(deleteRequest.get("$trace"));
}
 
Example 5
Source Project: beam   Source File: BigQueryServicesImplTest.java    License: Apache License 2.0 6 votes vote down vote up
/** Tests that table creation succeeds when the table already exists. */
@Test
public void testCreateTableSucceedsAlreadyExists() throws IOException {
  TableReference ref =
      new TableReference().setProjectId("project").setDatasetId("dataset").setTableId("table");
  TableSchema schema =
      new TableSchema()
          .setFields(
              ImmutableList.of(
                  new TableFieldSchema().setName("column1").setType("String"),
                  new TableFieldSchema().setName("column2").setType("Integer")));
  Table testTable = new Table().setTableReference(ref).setSchema(schema);

  when(response.getStatusCode()).thenReturn(409); // 409 means already exists

  BigQueryServicesImpl.DatasetServiceImpl services =
      new BigQueryServicesImpl.DatasetServiceImpl(bigquery, PipelineOptionsFactory.create());
  Table ret =
      services.tryCreateTable(
          testTable, new RetryBoundedBackOff(0, BackOff.ZERO_BACKOFF), Sleeper.DEFAULT);

  assertNull(ret);
  verify(response, times(1)).getStatusCode();
  verify(response, times(1)).getContent();
  verify(response, times(1)).getContentType();
}
 
Example 6
Source Project: beam   Source File: UserScore.java    License: Apache License 2.0 6 votes vote down vote up
/** Run a batch pipeline. */
// [START DocInclude_USMain]
public static void main(String[] args) throws Exception {
  // Begin constructing a pipeline configured by commandline flags.
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline pipeline = Pipeline.create(options);

  // Read events from a text file and parse them.
  pipeline
      .apply(TextIO.read().from(options.getInput()))
      .apply("ParseGameEvent", ParDo.of(new ParseEventFn()))
      // Extract and sum username/score pairs from the event data.
      .apply("ExtractUserScore", new ExtractAndSumScore("user"))
      .apply(
          "WriteUserScoreSums", new WriteToText<>(options.getOutput(), configureOutput(), false));

  // Run the batch pipeline.
  pipeline.run().waitUntilFinish();
}
 
Example 7
Source Project: beam   Source File: NexmarkUtilsTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testTableName(
    NexmarkUtils.ResourceNameMode nameMode,
    String baseTableName,
    String queryName,
    Long salt,
    String version,
    Class runner,
    Boolean isStreaming,
    final String expected) {
  NexmarkOptions options = PipelineOptionsFactory.as(NexmarkOptions.class);
  options.setResourceNameMode(nameMode);
  options.setBigQueryTable(baseTableName);
  options.setRunner(runner);
  options.setStreaming(isStreaming);

  String tableName = NexmarkUtils.tableName(options, queryName, salt, version);

  assertEquals(expected, tableName);
}
 
Example 8
Source Project: beam   Source File: FileBasedSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testSplitAtFraction() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  File file = createFileWithData("file", createStringDataset(3, 100));

  Metadata metadata = FileSystems.matchSingleFileSpec(file.getPath());
  TestFileBasedSource source = new TestFileBasedSource(metadata, 1, 0, file.length(), null);
  // Shouldn't be able to split while unstarted.
  assertSplitAtFractionFails(source, 0, 0.7, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 1, 0.7, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 30, 0.7, options);
  assertSplitAtFractionFails(source, 0, 0.0, options);
  assertSplitAtFractionFails(source, 70, 0.3, options);
  assertSplitAtFractionFails(source, 100, 1.0, options);
  assertSplitAtFractionFails(source, 100, 0.99, options);
  assertSplitAtFractionSucceedsAndConsistent(source, 100, 0.995, options);
}
 
Example 9
Source Project: beam   Source File: FileBasedSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadEverythingFromFileWithSplits() throws IOException {
  PipelineOptions options = PipelineOptionsFactory.create();
  String header = "<h>";
  List<String> data = new ArrayList<>();
  for (int i = 0; i < 10; i++) {
    data.add(header);
    data.addAll(createStringDataset(3, 9));
  }
  String fileName = "file";
  File file = createFileWithData(fileName, data);

  TestFileBasedSource source = new TestFileBasedSource(file.getPath(), 64, header);

  List<String> expectedResults = new ArrayList<>();
  expectedResults.addAll(data);
  // Remove all occurrences of header from expected results.
  expectedResults.removeAll(Collections.singletonList(header));

  assertEquals(expectedResults, readFromSource(source, options));
}
 
Example 10
Source Project: beam   Source File: Task.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  String[] lines = {
      "apple orange grape banana apple banana",
      "banana orange banana papaya"
  };

  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> wordCounts =
      pipeline.apply(Create.of(Arrays.asList(lines)));

  PCollection<String> output = applyTransform(wordCounts);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example 11
Source Project: beam   Source File: Task.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<Event> events =
      pipeline.apply(
          Create.of(
              new Event("1", "book-order", DateTime.parse("2019-06-01T00:00:00+00:00")),
              new Event("2", "pencil-order", DateTime.parse("2019-06-02T00:00:00+00:00")),
              new Event("3", "paper-order", DateTime.parse("2019-06-03T00:00:00+00:00")),
              new Event("4", "pencil-order", DateTime.parse("2019-06-04T00:00:00+00:00")),
              new Event("5", "book-order", DateTime.parse("2019-06-05T00:00:00+00:00"))
          )
      );

  PCollection<Event> output = applyTransform(events);

  output.apply(Log.ofElements());

  pipeline.run();
}
 
Example 12
Source Project: beam   Source File: FlinkExecutionEnvironmentsTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void shouldSupportIPv6Batch() {
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setRunner(FlinkRunner.class);

  options.setFlinkMaster("[FE80:CD00:0000:0CDE:1257:0000:211E:729C]:1234");
  ExecutionEnvironment bev =
      FlinkExecutionEnvironments.createBatchExecutionEnvironment(
          options, Collections.emptyList());
  checkHostAndPort(bev, "FE80:CD00:0000:0CDE:1257:0000:211E:729C", 1234);

  options.setFlinkMaster("FE80:CD00:0000:0CDE:1257:0000:211E:729C");
  bev =
      FlinkExecutionEnvironments.createBatchExecutionEnvironment(
          options, Collections.emptyList());
  checkHostAndPort(
      bev, "FE80:CD00:0000:0CDE:1257:0000:211E:729C", RestOptions.PORT.defaultValue());
}
 
Example 13
Source Project: DataflowTemplates   Source File: DatastoreToText.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Runs a pipeline which reads in Entities from Datastore, passes in the JSON encoded Entities
 * to a Javascript UDF, and writes the JSON to TextIO sink.
 *
 * @param args arguments to the pipeline
 */
public static void main(String[] args) {
  DatastoreToTextOptions options = PipelineOptionsFactory.fromArgs(args)
      .withValidation()
      .as(DatastoreToTextOptions.class);

  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply(ReadJsonEntities.newBuilder()
          .setGqlQuery(options.getDatastoreReadGqlQuery())
          .setProjectId(options.getDatastoreReadProjectId())
          .setNamespace(options.getDatastoreReadNamespace())
          .build())
      .apply(TransformTextViaJavascript.newBuilder()
          .setFileSystemPath(options.getJavascriptTextTransformGcsPath())
          .setFunctionName(options.getJavascriptTextTransformFunctionName())
          .build())
      .apply(TextIO.write()
          .to(options.getTextWritePrefix())
          .withSuffix(".json"));

  pipeline.run();
}
 
Example 14
Source Project: beam   Source File: BigtableIOTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReadWithRuntimeParametersValidationDisabled() {
  ReadOptions options = PipelineOptionsFactory.fromArgs().withValidation().as(ReadOptions.class);

  BigtableIO.Read read =
      BigtableIO.read()
          .withoutValidation()
          .withProjectId(options.getBigtableProject())
          .withInstanceId(options.getBigtableInstanceId())
          .withTableId(options.getBigtableTableId());

  // Not running a pipeline therefore this is expected.
  thrown.expect(PipelineRunMissingException.class);

  p.apply(read);
}
 
Example 15
Source Project: beam   Source File: CompressedSourceTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test a concatenation of gzip files is correctly decompressed.
 *
 * <p>A concatenation of gzip files as one file is a valid gzip file and should decompress to be
 * the concatenation of those individual files.
 */
@Test
public void testReadConcatenatedGzip() throws IOException {
  byte[] header = "a,b,c\n".getBytes(StandardCharsets.UTF_8);
  byte[] body = "1,2,3\n4,5,6\n7,8,9\n".getBytes(StandardCharsets.UTF_8);
  byte[] expected = concat(header, body);
  byte[] totalGz = concat(compressGzip(header), compressGzip(body));
  File tmpFile = tmpFolder.newFile();
  try (FileOutputStream os = new FileOutputStream(tmpFile)) {
    os.write(totalGz);
  }

  CompressedSource<Byte> source =
      CompressedSource.from(new ByteSource(tmpFile.getAbsolutePath(), 1))
          .withDecompression(CompressionMode.GZIP);
  List<Byte> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
  assertEquals(Bytes.asList(expected), actual);
}
 
Example 16
Source Project: beam   Source File: SyntheticBoundedSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
private void testSplitIntoBundlesP(long splitPointFrequency) throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  testSourceOptions.splitPointFrequencyRecords = splitPointFrequency;
  testSourceOptions.numRecords = 100;
  SyntheticBoundedSource source = new SyntheticBoundedSource(testSourceOptions);
  SourceTestUtils.assertSourcesEqualReferenceSource(source, source.split(10, options), options);
  SourceTestUtils.assertSourcesEqualReferenceSource(source, source.split(40, options), options);
  SourceTestUtils.assertSourcesEqualReferenceSource(source, source.split(100, options), options);
}
 
Example 17
Source Project: component-runtime   Source File: BeamIOTransformerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
void boundedSource() {
    scenario((transformer, loader) -> {
        final Class<?> aClass = loader.loadClass(MyBoundedSource.class.getName());
        assertEquals(loader, aClass.getClassLoader());
        final Object instance = newInstance(aClass, loader);
        final PipelineOptions pipelineOptions = PipelineOptionsFactory.create();
        aClass.getMethod("split", long.class, PipelineOptions.class).invoke(instance, 1, pipelineOptions);
        aClass.getMethod("createReader", PipelineOptions.class).invoke(instance, pipelineOptions);
        aClass.getMethod("getEstimatedSizeBytes", PipelineOptions.class).invoke(instance, pipelineOptions);
        aClass.getMethod("getDefaultOutputCoder").invoke(instance);
        aClass.getMethod("populateDisplayData", DisplayData.Builder.class).invoke(instance, new Object[] { null });
    });
}
 
Example 18
Source Project: beam   Source File: DoFnOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void nonKeyedParDoPushbackDataCheckpointing() throws Exception {
  pushbackDataCheckpointing(
      () -> {
        Coder<WindowedValue<String>> coder =
            WindowedValue.getFullCoder(StringUtf8Coder.of(), IntervalWindow.getCoder());

        TupleTag<String> outputTag = new TupleTag<>("main-output");

        ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
            ImmutableMap.<Integer, PCollectionView<?>>builder()
                .put(1, view1)
                .put(2, view2)
                .build();

        DoFnOperator<String, String> doFnOperator =
            new DoFnOperator<>(
                new IdentityDoFn<>(),
                "stepName",
                coder,
                Collections.emptyMap(),
                outputTag,
                Collections.emptyList(),
                new DoFnOperator.MultiOutputOutputManagerFactory<>(outputTag, coder),
                WindowingStrategy.of(FixedWindows.of(Duration.millis(100))),
                sideInputMapping, /* side-input mapping */
                ImmutableList.of(view1, view2), /* side inputs */
                PipelineOptionsFactory.as(FlinkPipelineOptions.class),
                null,
                null,
                DoFnSchemaInformation.create(),
                Collections.emptyMap());

        return new TwoInputStreamOperatorTestHarness<>(doFnOperator);
      });
}
 
Example 19
Source Project: beam   Source File: JdbcDriverTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@Ignore("https://issues.apache.org/jira/browse/CALCITE-2394")
public void testTimestampWithNonzeroTimezone() throws Exception {
  Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("Asia/Tokyo"), Locale.ROOT);
  TestTableProvider tableProvider = new TestTableProvider();
  Connection connection = JdbcDriver.connect(tableProvider, PipelineOptionsFactory.create());

  // A table with one TIMESTAMP column
  Schema schema = Schema.builder().addDateTimeField("ts").build();
  connection
      .createStatement()
      .executeUpdate("CREATE EXTERNAL TABLE test (ts TIMESTAMP) TYPE 'test'");

  ReadableInstant july1 =
      ISODateTimeFormat.dateTimeParser().parseDateTime("2018-07-01T01:02:03Z");
  tableProvider.addRows("test", Row.withSchema(schema).addValue(july1).build());

  ResultSet selectResult =
      connection.createStatement().executeQuery(String.format("SELECT ts FROM test"));
  selectResult.next();
  Timestamp ts = selectResult.getTimestamp(1, cal);

  assertThat(
      String.format(
          "Wrote %s to a table, but got back %s",
          ISODateTimeFormat.basicDateTime().print(july1),
          ISODateTimeFormat.basicDateTime().print(ts.getTime())),
      ts.getTime(),
      equalTo(july1.getMillis()));
}
 
Example 20
Source Project: beam   Source File: HarnessStreamObserverFactoriesTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testBufferedStreamInstantiation() {
  StreamObserver<String> observer =
      HarnessStreamObserverFactories.fromOptions(
              PipelineOptionsFactory.fromArgs(
                      new String[] {"--experiments=beam_fn_api_buffered_stream"})
                  .create())
          .outboundObserverFor(this::fakeFactory, mockRequestObserver);
  assertThat(observer, instanceOf(BufferingStreamObserver.class));
}
 
Example 21
Source Project: beam   Source File: UnboundedReadEvaluatorFactoryTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void generatesInitialSplits() throws Exception {
  when(context.createRootBundle()).thenAnswer(invocation -> bundleFactory.createRootBundle());

  int numSplits = 5;
  Collection<CommittedBundle<?>> initialInputs =
      new UnboundedReadEvaluatorFactory.InputProvider(context, options)
          .getInitialInputs(graph.getProducer(longs), numSplits);
  // CountingSource.unbounded has very good splitting behavior
  assertThat(initialInputs, hasSize(numSplits));

  int readPerSplit = 100;
  int totalSize = numSplits * readPerSplit;
  Set<Long> expectedOutputs =
      ContiguousSet.create(Range.closedOpen(0L, (long) totalSize), DiscreteDomain.longs());

  Collection<Long> readItems = new ArrayList<>(totalSize);
  for (CommittedBundle<?> initialInput : initialInputs) {
    CommittedBundle<UnboundedSourceShard<Long, ?>> shardBundle =
        (CommittedBundle<UnboundedSourceShard<Long, ?>>) initialInput;
    WindowedValue<UnboundedSourceShard<Long, ?>> shard =
        Iterables.getOnlyElement(shardBundle.getElements());
    assertThat(shard.getTimestamp(), equalTo(BoundedWindow.TIMESTAMP_MIN_VALUE));
    assertThat(shard.getWindows(), Matchers.contains(GlobalWindow.INSTANCE));
    UnboundedSource<Long, ?> shardSource = shard.getValue().getSource();
    readItems.addAll(
        SourceTestUtils.readNItemsFromUnstartedReader(
            shardSource.createReader(
                PipelineOptionsFactory.create(), null /* No starting checkpoint */),
            readPerSplit));
  }
  assertThat(readItems, containsInAnyOrder(expectedOutputs.toArray(new Long[0])));
}
 
Example 22
Source Project: beam   Source File: TextIOReadTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testProgressTextFile() throws IOException {
  String file = "line1\nline2\nline3";
  try (BoundedSource.BoundedReader<String> reader =
      prepareSource(file.getBytes(Charsets.UTF_8))
          .createReader(PipelineOptionsFactory.create())) {
    // Check preconditions before starting
    assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
    assertEquals(0, reader.getSplitPointsConsumed());
    assertEquals(
        BoundedSource.BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());

    // Line 1
    assertTrue(reader.start());
    assertEquals(0, reader.getSplitPointsConsumed());
    assertEquals(
        BoundedSource.BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());

    // Line 2
    assertTrue(reader.advance());
    assertEquals(1, reader.getSplitPointsConsumed());
    assertEquals(
        BoundedSource.BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());

    // Line 3
    assertTrue(reader.advance());
    assertEquals(2, reader.getSplitPointsConsumed());
    assertEquals(1, reader.getSplitPointsRemaining());

    // Check postconditions after finishing
    assertFalse(reader.advance());
    assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
    assertEquals(3, reader.getSplitPointsConsumed());
    assertEquals(0, reader.getSplitPointsRemaining());
  }
}
 
Example 23
Source Project: beam   Source File: DataflowPipelineDebugOptionsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTransformNameMapping() throws Exception {
  DataflowPipelineDebugOptions options =
      PipelineOptionsFactory.fromArgs(
              "--transformNameMapping={\"a\":\"b\",\"foo\":\"\",\"bar\":\"baz\"}")
          .as(DataflowPipelineDebugOptions.class);
  assertEquals(3, options.getTransformNameMapping().size());
  assertThat(options.getTransformNameMapping(), hasEntry("a", "b"));
  assertThat(options.getTransformNameMapping(), hasEntry("foo", ""));
  assertThat(options.getTransformNameMapping(), hasEntry("bar", "baz"));
}
 
Example 24
Source Project: beam   Source File: DataflowRunnerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testGcpTempAndNoTempLocationSucceeds() throws Exception {
  DataflowPipelineOptions options = PipelineOptionsFactory.as(DataflowPipelineOptions.class);
  options.setRunner(DataflowRunner.class);
  options.setGcpCredential(new TestCredential());
  options.setProject("foo-project");
  options.setRegion(REGION_ID);
  options.setGcpTempLocation(VALID_TEMP_BUCKET);
  options.setGcsUtil(mockGcsUtil);

  DataflowRunner.fromOptions(options);
}
 
Example 25
Source Project: beam   Source File: OffsetBasedSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyOffsetRange() throws Exception {
  CoarseRangeSource empty = new CoarseRangeSource(0, 0, 1, 1);
  try (CoarseRangeReader reader = empty.createReader(PipelineOptionsFactory.create())) {
    assertEquals(0, reader.getSplitPointsConsumed());
    assertEquals(OffsetBasedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
    assertEquals(0.0, reader.getFractionConsumed(), 0.0001);

    assertFalse(reader.start());

    assertEquals(0, reader.getSplitPointsConsumed());
    assertEquals(0, reader.getSplitPointsRemaining());
    assertEquals(1.0, reader.getFractionConsumed(), 0.0001);
  }
}
 
Example 26
Source Project: beam   Source File: XmlSourceTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSplitAtFractionExhaustiveSingleByte() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  File file = tempFolder.newFile("trainXMLSmall");
  Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8));

  BoundedSource<Train> source =
      XmlIO.<Train>read()
          .from(file.toPath().toString())
          .withRootElement("trains")
          .withRecordElement("train")
          .withRecordClass(Train.class)
          .createSource();
  assertSplitAtFractionExhaustive(source, options);
}
 
Example 27
Source Project: beam   Source File: CrashingRunnerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void runThrows() {
  PipelineOptions opts = PipelineOptionsFactory.create();
  opts.setRunner(CrashingRunner.class);

  Pipeline p = Pipeline.create(opts);
  p.apply(Create.of(1, 2, 3));

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Cannot call #run");
  thrown.expectMessage(TestPipeline.PROPERTY_BEAM_TEST_PIPELINE_OPTIONS);

  p.run();
}
 
Example 28
Source Project: beam   Source File: DataflowWorkUnitClientTest.java    License: Apache License 2.0 5 votes vote down vote up
@Before
public void setUp() throws Exception {
  MockitoAnnotations.initMocks(this);
  when(transport.buildRequest(anyString(), anyString())).thenReturn(request);
  doCallRealMethod().when(request).getContentAsString();

  Dataflow service = new Dataflow(transport, Transport.getJsonFactory(), null);
  pipelineOptions = PipelineOptionsFactory.as(DataflowWorkerHarnessOptions.class);
  pipelineOptions.setProject(PROJECT_ID);
  pipelineOptions.setJobId(JOB_ID);
  pipelineOptions.setWorkerId(WORKER_ID);
  pipelineOptions.setGcpCredential(new TestCredential());
  pipelineOptions.setDataflowClient(service);
  pipelineOptions.setRegion("us-central1");
}
 
Example 29
Source Project: beam   Source File: FlinkExecutionEnvironmentsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void shouldAcceptExplicitlySetIdleSourcesFlagWithCheckpointing() {
  // Checkpointing enable, still accept flag
  FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
  options.setCheckpointingInterval(1000L);
  options.setShutdownSourcesAfterIdleMs(42L);
  FlinkExecutionEnvironments.createStreamExecutionEnvironment(options, Collections.emptyList());
  assertThat(options.getShutdownSourcesAfterIdleMs(), is(42L));
}
 
Example 30
Source Project: beam   Source File: Task.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create();
  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> words =
      pipeline.apply(
          Create.of("apple", "ball", "car", "bear", "cheetah", "ant")
      );

  PCollection<KV<String, Iterable<String>>> output = applyTransform(words);

  output.apply(Log.ofElements());

  pipeline.run();
}