Java Code Examples for org.apache.beam.sdk.options.PipelineOptions#setRunner()

The following examples show how to use org.apache.beam.sdk.options.PipelineOptions#setRunner() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Tests that a {@link DoFn} that mutates an output with a good equals() fails in the {@link
 * DirectRunner}.
 */
@Test
public void testMutatingOutputWithEnforcementDisabledSucceeds() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(DirectRunner.class);
  options.as(DirectOptions.class).setEnforceImmutability(false);
  Pipeline pipeline = Pipeline.create(options);

  pipeline
      .apply(Create.of(42))
      .apply(
          ParDo.of(
              new DoFn<Integer, List<Integer>>() {
                @ProcessElement
                public void processElement(ProcessContext c) {
                  List<Integer> outputList = Arrays.asList(1, 2, 3, 4);
                  c.output(outputList);
                  outputList.set(0, 37);
                  c.output(outputList);
                }
              }));

  pipeline.run();
}
 
Example 2
Source File: PipelineTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testPipelineSDKExceptionHandling() {
  PipelineOptions options = TestPipeline.testingPipelineOptions();
  options.setRunner(TestPipelineRunnerThrowingSdkException.class);
  Pipeline p = Pipeline.create(options);

  // Check pipeline runner correctly catches SDK errors.
  try {
    p.run();
    fail("Should have thrown an exception.");
  } catch (RuntimeException exn) {
    // Make sure the exception isn't a UserCodeException.
    assertThat(exn, not(instanceOf(UserCodeException.class)));
    // Assert that the message is correct.
    assertThat(exn.getMessage(), containsString("SDK exception"));
    // RuntimeException should be IllegalStateException.
    assertThat(exn, instanceOf(IllegalStateException.class));
  }
}
 
Example 3
Source File: CrashingRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void fromOptionsCreatesInstance() {
  PipelineOptions opts = PipelineOptionsFactory.create();
  opts.setRunner(CrashingRunner.class);
  PipelineRunner<? extends PipelineResult> runner = PipelineRunner.fromOptions(opts);

  assertTrue("Should have created a CrashingRunner", runner instanceof CrashingRunner);
}
 
Example 4
Source File: PipelineTranslationModeOptimizerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoundedCollectionProducingTransform() {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(FlinkRunner.class);
  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(GenerateSequence.from(0).to(10));

  assertThat(PipelineTranslationModeOptimizer.hasUnboundedOutput(pipeline), is(false));
}
 
Example 5
Source File: PipelineTranslationModeOptimizerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testUnboundedCollectionProducingTransform() {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(FlinkRunner.class);
  Pipeline pipeline = Pipeline.create(options);
  pipeline.apply(GenerateSequence.from(0));

  assertThat(PipelineTranslationModeOptimizer.hasUnboundedOutput(pipeline), is(true));
}
 
Example 6
Source File: SparkPortableExecutionTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test(timeout = 120_000)
public void testExecStageWithMultipleOutputs() throws Exception {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(CrashingRunner.class);
  options
      .as(PortablePipelineOptions.class)
      .setDefaultEnvironmentType(Environments.ENVIRONMENT_EMBEDDED);
  Pipeline pipeline = Pipeline.create(options);
  PCollection<KV<String, String>> a =
      pipeline
          .apply("impulse", Impulse.create())
          .apply("A", ParDo.of(new DoFnWithSideEffect<>("A")));
  PCollection<KV<String, String>> b = a.apply("B", ParDo.of(new DoFnWithSideEffect<>("B")));
  PCollection<KV<String, String>> c = a.apply("C", ParDo.of(new DoFnWithSideEffect<>("C")));
  // Use GBKs to force re-computation of executable stage unless cached.
  b.apply(GroupByKey.create());
  c.apply(GroupByKey.create());
  RunnerApi.Pipeline pipelineProto = PipelineTranslation.toProto(pipeline);
  JobInvocation jobInvocation =
      SparkJobInvoker.createJobInvocation(
          "testExecStageWithMultipleOutputs",
          "testExecStageWithMultipleOutputsRetrievalToken",
          sparkJobExecutor,
          pipelineProto,
          options.as(SparkPipelineOptions.class));
  jobInvocation.start();
  Assert.assertEquals(Enum.DONE, jobInvocation.getState());
}
 
Example 7
Source File: PipelineRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInstantiation() {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(CrashingRunner.class);
  PipelineRunner<?> runner = PipelineRunner.fromOptions(options);
  assertTrue(runner instanceof CrashingRunner);
}
 
Example 8
Source File: CrashingRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void runThrows() {
  PipelineOptions opts = PipelineOptionsFactory.create();
  opts.setRunner(CrashingRunner.class);

  Pipeline p = Pipeline.create(opts);
  p.apply(Create.of(1, 2, 3));

  thrown.expect(IllegalArgumentException.class);
  thrown.expectMessage("Cannot call #run");
  thrown.expectMessage(TestPipeline.PROPERTY_BEAM_TEST_PIPELINE_OPTIONS);

  p.run();
}
 
Example 9
Source File: CrashingRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void applySucceeds() {
  PipelineOptions opts = PipelineOptionsFactory.create();
  opts.setRunner(CrashingRunner.class);

  Pipeline p = Pipeline.create(opts);
  p.apply(Create.of(1, 2, 3));
}
 
Example 10
Source File: WindowRuntimeTest.java    From components with Apache License 2.0 5 votes vote down vote up
@Test
public void testFixedWindow() {

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // creation of PCollection with different timestamp PCollection<IndexedRecord>

    List<TimestampedValue<IndexedRecord>> data = Arrays.asList(TimestampedValue.of(irA, new Instant(1L)),
            TimestampedValue.of(irB, new Instant(2L)), TimestampedValue.of(irC, new Instant(3L)));

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p
            .apply(Create.timestamped(data).withCoder(LazyAvroCoder.of()));

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.windowLength.setValue(2);
    windowProperties.windowSlideLength.setValue(-1);
    windowProperties.windowSession.setValue(false);

    windowProperties.setValue("windowLength", 2);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", false);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    /////////
    // Fixed duration: 2

    PAssert.that(windowed_counts).containsInAnyOrder(KV.of(irA, 1L), KV.of(irB, 1L), KV.of(irC, 1L));

    p.run();
}
 
Example 11
Source File: PipelineTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testPipelineUserExceptionHandling() {
  PipelineOptions options = TestPipeline.testingPipelineOptions();
  options.setRunner(TestPipelineRunnerThrowingUserException.class);
  Pipeline p = Pipeline.create(options);

  // Check pipeline runner correctly catches user errors.
  thrown.expect(PipelineExecutionException.class);
  thrown.expectCause(isA(IllegalStateException.class));
  thrown.expectMessage("user code exception");
  p.run();
}
 
Example 12
Source File: TestPipeline.java    From beam with Apache License 2.0 5 votes vote down vote up
/** Creates {@link PipelineOptions} for testing. */
public static PipelineOptions testingPipelineOptions() {
  try {
    @Nullable
    String beamTestPipelineOptions = System.getProperty(PROPERTY_BEAM_TEST_PIPELINE_OPTIONS);

    PipelineOptions options =
        Strings.isNullOrEmpty(beamTestPipelineOptions)
            ? PipelineOptionsFactory.create()
            : PipelineOptionsFactory.fromArgs(
                    MAPPER.readValue(beamTestPipelineOptions, String[].class))
                .as(TestPipelineOptions.class);

    // If no options were specified, set some reasonable defaults
    if (Strings.isNullOrEmpty(beamTestPipelineOptions)) {
      // If there are no provided options, check to see if a dummy runner should be used.
      String useDefaultDummy = System.getProperty(PROPERTY_USE_DEFAULT_DUMMY_RUNNER);
      if (!Strings.isNullOrEmpty(useDefaultDummy) && Boolean.valueOf(useDefaultDummy)) {
        options.setRunner(CrashingRunner.class);
      }
    }
    options.setStableUniqueNames(CheckEnabled.ERROR);

    FileSystems.setDefaultPipelineOptions(options);
    return options;
  } catch (IOException e) {
    throw new RuntimeException(
        "Unable to instantiate test options from system property "
            + PROPERTY_BEAM_TEST_PIPELINE_OPTIONS
            + ":"
            + System.getProperty(PROPERTY_BEAM_TEST_PIPELINE_OPTIONS),
        e);
  }
}
 
Example 13
Source File: ConvertToIndexedRecordTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Demonstrates the basic use case for the {@link ConvertToIndexedRecord}.
 */
@Test
public void testBasic() {

    String[] inputValues = { "one", "two", "three" };
    // The output values should use the standard primitive converter.
    SingleColumnIndexedRecordConverter<String> converter = new SingleColumnIndexedRecordConverter(String.class,
            Schema.create(Schema.Type.STRING));
    IndexedRecord[] outputExpected = new IndexedRecord[inputValues.length];
    for (int i = 0; i < inputValues.length; i++)
        outputExpected[i] = converter.convertToAvro(inputValues[i]);

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    PCollection<String> input = p.apply(Create.of(Arrays.asList(inputValues))); //

    // Collect the results before and after the transformation.
    PCollection<IndexedRecord> output = input.apply(ConvertToIndexedRecord.<String> of());

    // Validate the contents of the collections in the pipeline.
    PAssert.that(input).containsInAnyOrder(inputValues);
    PAssert.that(output).containsInAnyOrder(outputExpected);

    // Run the pipeline to fill the collectors.
    p.run().waitUntilFinish();
}
 
Example 14
Source File: Broadcast.java    From nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Main function for the BEAM program.
 * @param args arguments.
 */
public static void main(final String[] args) {
  final String inputFilePath = args[0];
  final String outputFilePath = args[1];
  final PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(NemoPipelineRunner.class);

  final Pipeline p = Pipeline.create(options);
  final PCollection<String> elemCollection = GenericSourceSink.read(p, inputFilePath);
  final PCollectionView<Iterable<String>> allCollection = elemCollection.apply(View.<String>asIterable());

  final PCollection<String> result = elemCollection.apply(ParDo.of(new DoFn<String, String>() {
        @ProcessElement
        public void processElement(final ProcessContext c) {
          final String line = c.element();
          final Iterable<String> all = c.sideInput(allCollection);
          final Optional<String> appended = StreamSupport.stream(all.spliterator(), false)
              .reduce((l, r) -> l + '\n' + r);
          if (appended.isPresent()) {
            c.output("line: " + line + "\n" + appended.get());
          } else {
            c.output("error");
          }
        }
      }).withSideInputs(allCollection)
  );

  GenericSourceSink.write(result, outputFilePath);
  p.run();
}
 
Example 15
Source File: ReplicateRuntimeTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Check {@link ReplicateRuntime#build(BeamJobContext)}
 */
@Test
public void testBuild() {

    // Create pipeline
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // Create PCollection for test
    Schema a = GenericDataRecordHelper.createSchemaFromObject("a", new Object[] { "a" });
    IndexedRecord irA = GenericDataRecordHelper.createRecord(a, new Object[] { "a" });
    IndexedRecord irB = GenericDataRecordHelper.createRecord(a, new Object[] { "b" });
    IndexedRecord irC = GenericDataRecordHelper.createRecord(a, new Object[] { "c" });

    List<IndexedRecord> data = Arrays.asList( //
            irA, //
            irB, //
            irC, //
            irA, //
            irA, //
            irC //
    );

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p.apply(Create.of(data).withCoder(LazyAvroCoder.of()));

    ReplicateProperties replicateProperties = new ReplicateProperties("test");
    replicateRuntime.initialize(null, replicateProperties);
    BeamJobContext context = Mockito.mock(BeamJobContext.class);
    replicateRuntime.build(context);
    verify(context, times(1)).getLinkNameByPortName(anyString());
    verify(context, times(0)).getPCollectionByLinkName(anyString());

    BeamJobContext ctx = Mockito.mock(BeamJobContext.class);
    when(ctx.getLinkNameByPortName(anyString())).thenReturn("test");
    when(ctx.getPCollectionByLinkName(anyString())).thenReturn(input);
    replicateRuntime.build(ctx);
    verify(ctx, times(3)).getLinkNameByPortName(anyString());
    verify(ctx, times(1)).getPCollectionByLinkName(anyString());
}
 
Example 16
Source File: TransMetaPipelineConverter.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
public Pipeline createPipeline( PipelineOptions pipelineOptions ) throws Exception {

    LogChannelInterface log = LogChannel.GENERAL;

    // Create a new Pipeline
    //
    RunnerType runnerType = RunnerType.getRunnerTypeByName( beamJobConfig.getRunnerTypeName() );
    Class<? extends PipelineRunner<?>> runnerClass = getPipelineRunnerClass(runnerType);

    pipelineOptions.setRunner( runnerClass );
    Pipeline pipeline = Pipeline.create( pipelineOptions );

    pipeline.getCoderRegistry().registerCoderForClass( KettleRow.class, new KettleRowCoder() );

    log.logBasic( "Created pipeline job with name '" + pipelineOptions.getJobName() + "'" );

    // Keep track of which step outputs which Collection
    //
    Map<String, PCollection<KettleRow>> stepCollectionMap = new HashMap<>();

    // Handle io
    //
    handleBeamInputSteps( log, stepCollectionMap, pipeline );

    // Transform all the other steps...
    //
    handleGenericStep( stepCollectionMap, pipeline );

    // Output handling
    //
    handleBeamOutputSteps( log, stepCollectionMap, pipeline );

    return pipeline;
  }
 
Example 17
Source File: HopPipelineMetaToBeamPipelineConverter.java    From hop with Apache License 2.0 4 votes vote down vote up
public Pipeline createPipeline() throws Exception {

    ILogChannel log = LogChannel.GENERAL;

    // Create a new Pipeline
    //
    RunnerType runnerType = pipelineRunConfiguration.getRunnerType();
    Class<? extends PipelineRunner<?>> runnerClass = getPipelineRunnerClass( runnerType );

    PipelineOptions pipelineOptions = pipelineRunConfiguration.getPipelineOptions();
    // The generic options
    //
    pipelineOptions.setUserAgent( pipelineRunConfiguration.environmentSubstitute( pipelineRunConfiguration.getUserAgent() ) );
    pipelineOptions.setTempLocation( pipelineRunConfiguration.environmentSubstitute( pipelineRunConfiguration.getTempLocation() ) );
    pipelineOptions.setJobName( pipelineMeta.getName() );

    pipelineOptions.setRunner( runnerClass );
    Pipeline pipeline = Pipeline.create( pipelineOptions );

    pipeline.getCoderRegistry().registerCoderForClass( HopRow.class, new HopRowCoder() );

    log.logBasic( "Created Apache Beam pipeline with name '" + pipelineOptions.getJobName() + "'" );

    // Keep track of which transform outputs which Collection
    //
    Map<String, PCollection<HopRow>> stepCollectionMap = new HashMap<>();

    // Handle io
    //
    handleBeamInputSteps( log, stepCollectionMap, pipeline );

    // Transform all the other transforms...
    //
    handleGenericStep( stepCollectionMap, pipeline );

    // Output handling
    //
    handleBeamOutputSteps( log, stepCollectionMap, pipeline );

    return pipeline;
  }
 
Example 18
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
private Pipeline getPipeline(boolean blockOnRun) {
  PipelineOptions opts = PipelineOptionsFactory.create();
  opts.setRunner(DirectRunner.class);
  opts.as(DirectOptions.class).setBlockOnRun(blockOnRun);
  return Pipeline.create(opts);
}
 
Example 19
Source File: SparkRunnerDebuggerTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Test
public void debugBatchPipeline() {
  PipelineOptions options = PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class);
  options.setRunner(SparkRunnerDebugger.class);

  Pipeline pipeline = Pipeline.create(options);

  PCollection<String> lines =
      pipeline.apply(Create.of(Collections.<String>emptyList()).withCoder(StringUtf8Coder.of()));

  PCollection<KV<String, Long>> wordCounts = lines.apply(new WordCount.CountWords());

  wordCounts.apply(GroupByKey.create()).apply(Combine.groupedValues(Sum.ofLongs()));

  PCollection<KV<String, Long>> wordCountsPlusOne =
      wordCounts.apply(MapElements.via(new PlusOne()));

  PCollectionList.of(wordCounts).and(wordCountsPlusOne).apply(Flatten.pCollections());

  wordCounts
      .apply(MapElements.via(new WordCount.FormatAsTextFn()))
      .apply(TextIO.write().to("!!PLACEHOLDER-OUTPUT-DIR!!").withNumShards(3).withSuffix(".txt"));

  final String expectedPipeline =
      "sparkContext.<readFrom(org.apache.beam.sdk.transforms.Create$Values$CreateSource)>()\n"
          + "_.mapPartitions("
          + "new org.apache.beam.runners.spark.examples.WordCount$ExtractWordsFn())\n"
          + "_.mapPartitions(new org.apache.beam.sdk.transforms.Contextful())\n"
          + "_.combineByKey(..., new org.apache.beam.sdk.transforms.Count$CountFn(), ...)\n"
          + "_.groupByKey()\n"
          + "_.map(new org.apache.beam.sdk.transforms.Sum$SumLongFn())\n"
          + "_.mapPartitions(new org.apache.beam.sdk.transforms.Contextful())\n"
          + "sparkContext.union(...)\n"
          + "_.mapPartitions("
          + "new org.apache.beam.sdk.transforms.Contextful())\n"
          + "_.<org.apache.beam.sdk.io.TextIO$Write>";

  SparkRunnerDebugger.DebugSparkPipelineResult result =
      (SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();

  assertThat(
      "Debug pipeline did not equal expected",
      result.getDebugString(),
      Matchers.equalTo(expectedPipeline));
}
 
Example 20
Source File: WindowRuntimeTest.java    From components with Apache License 2.0 4 votes vote down vote up
@Test
public void testSessionWindow() {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    /*
     * // creation of PCollection with different timestamp PCollection<IndexedRecord>
     */
    List<TimestampedValue<IndexedRecord>> data = Arrays.asList( //
            TimestampedValue.of(irA, new Instant(0L)), //
            TimestampedValue.of(irB, new Instant(0L)), //
            TimestampedValue.of(irC, new Instant(1L)), //
            TimestampedValue.of(irA, new Instant(2L)), //
            TimestampedValue.of(irA, new Instant(2L)), //
            TimestampedValue.of(irB, new Instant(2L)), //
            TimestampedValue.of(irB, new Instant(30L)), //
            TimestampedValue.of(irA, new Instant(30L)), //
            TimestampedValue.of(irA, new Instant(50L)), //
            TimestampedValue.of(irC, new Instant(55L)), //
            TimestampedValue.of(irA, new Instant(59L)));

    Create.TimestampedValues<IndexedRecord> pt = Create.timestamped(data);
    pt = (Create.TimestampedValues<IndexedRecord>) pt.withCoder(LazyAvroCoder.of());
    PCollection<IndexedRecord> input = p.apply(pt);

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.setValue("windowLength", 10);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", true);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    // window duration: 4 - sliding: 2
    PAssert.that(windowed_counts).containsInAnyOrder( //
            KV.of(irA, 3L), //
            KV.of(irB, 2L), //
            KV.of(irC, 1L), //

            KV.of(irB, 1L), //
            KV.of(irA, 1L), //

            KV.of(irA, 2L), //
            KV.of(irC, 1L));

    p.run();
}