org.apache.beam.runners.direct.DirectRunner Java Examples

The following examples show how to use org.apache.beam.runners.direct.DirectRunner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HopPipelineMetaToBeamPipelineConverter.java    From hop with Apache License 2.0 6 votes vote down vote up
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws HopException {
  if ( runnerType == null ) {
    throw new HopException( "Please specify a valid runner type" );
  }
  switch ( runnerType ) {
    case Direct:
      return DirectRunner.class;
    case Flink:
      return FlinkRunner.class;
    case Spark:
      return SparkRunner.class;
    case DataFlow:
      return DataflowRunner.class;
    default:
      throw new HopException( "Unsupported runner type: " + runnerType.name() );
  }
}
 
Example #2
Source File: BeamPipelineEngine.java    From hop with Apache License 2.0 6 votes vote down vote up
private PipelineResult executePipeline( org.apache.beam.sdk.Pipeline pipeline ) throws HopException {

    RunnerType runnerType = beamEngineRunConfiguration.getRunnerType();
    switch ( runnerType ) {
      case Direct:
        return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Flink:
        return FlinkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case DataFlow:
        return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Spark:
        return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      default:
        throw new HopException( "Execution on runner '" + runnerType.name() + "' is not supported yet." );
    }
  }
 
Example #3
Source File: DirectRunnerJobManager.java    From feast with Apache License 2.0 6 votes vote down vote up
private ImportOptions getPipelineOptions(
    String jobName, SourceProto.Source source, Set<StoreProto.Store> sinks)
    throws IOException, IllegalAccessException {
  ImportOptions pipelineOptions =
      PipelineOptionsFactory.fromArgs(defaultOptions.toArgs()).as(ImportOptions.class);

  JsonFormat.Printer printer = JsonFormat.printer();
  pipelineOptions.setSpecsStreamingUpdateConfigJson(printer.print(specsStreamingUpdateConfig));
  pipelineOptions.setSourceJson(printer.print(source));
  pipelineOptions.setJobName(jobName);
  pipelineOptions.setStoresJson(
      sinks.stream().map(wrapException(printer::print)).collect(Collectors.toList()));
  pipelineOptions.setRunner(DirectRunner.class);
  pipelineOptions.setDefaultFeastProject(Project.DEFAULT_NAME);
  pipelineOptions.setProject(""); // set to default value to satisfy validation
  if (metrics.isEnabled()) {
    pipelineOptions.setMetricsExporterType(metrics.getType());
    if (metrics.getType().equals("statsd")) {
      pipelineOptions.setStatsdHost(metrics.getHost());
      pipelineOptions.setStatsdPort(metrics.getPort());
    }
  }
  pipelineOptions.setBlockOnRun(false);
  return pipelineOptions;
}
 
Example #4
Source File: BeamDirectTestResource.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * @return the options used to create this pipeline. These can be or changed before the Pipeline is created.
 */
public DirectOptions getOptions() {
    if (options == null) {
        options = PipelineOptionsFactory.create().as(DirectOptions.class);
        options.setRunner(DirectRunner.class);
    }
    return options;
}
 
Example #5
Source File: ReplicateRuntimeTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Check {@link ReplicateRuntime#build(BeamJobContext)}
 */
@Test
public void testBuild() {

    // Create pipeline
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // Create PCollection for test
    Schema a = GenericDataRecordHelper.createSchemaFromObject("a", new Object[] { "a" });
    IndexedRecord irA = GenericDataRecordHelper.createRecord(a, new Object[] { "a" });
    IndexedRecord irB = GenericDataRecordHelper.createRecord(a, new Object[] { "b" });
    IndexedRecord irC = GenericDataRecordHelper.createRecord(a, new Object[] { "c" });

    List<IndexedRecord> data = Arrays.asList( //
            irA, //
            irB, //
            irC, //
            irA, //
            irA, //
            irC //
    );

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p.apply(Create.of(data).withCoder(LazyAvroCoder.of()));

    ReplicateProperties replicateProperties = new ReplicateProperties("test");
    replicateRuntime.initialize(null, replicateProperties);
    BeamJobContext context = Mockito.mock(BeamJobContext.class);
    replicateRuntime.build(context);
    verify(context, times(1)).getLinkNameByPortName(anyString());
    verify(context, times(0)).getPCollectionByLinkName(anyString());

    BeamJobContext ctx = Mockito.mock(BeamJobContext.class);
    when(ctx.getLinkNameByPortName(anyString())).thenReturn("test");
    when(ctx.getPCollectionByLinkName(anyString())).thenReturn(input);
    replicateRuntime.build(ctx);
    verify(ctx, times(3)).getLinkNameByPortName(anyString());
    verify(ctx, times(1)).getPCollectionByLinkName(anyString());
}
 
Example #6
Source File: WindowRuntimeTest.java    From components with Apache License 2.0 5 votes vote down vote up
@Test
public void testFixedWindow() {

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // creation of PCollection with different timestamp PCollection<IndexedRecord>

    List<TimestampedValue<IndexedRecord>> data = Arrays.asList(TimestampedValue.of(irA, new Instant(1L)),
            TimestampedValue.of(irB, new Instant(2L)), TimestampedValue.of(irC, new Instant(3L)));

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p
            .apply(Create.timestamped(data).withCoder(LazyAvroCoder.of()));

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.windowLength.setValue(2);
    windowProperties.windowSlideLength.setValue(-1);
    windowProperties.windowSession.setValue(false);

    windowProperties.setValue("windowLength", 2);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", false);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    /////////
    // Fixed duration: 2

    PAssert.that(windowed_counts).containsInAnyOrder(KV.of(irA, 1L), KV.of(irB, 1L), KV.of(irC, 1L));

    p.run();
}
 
Example #7
Source File: ConvertToIndexedRecordTest.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * Demonstrates the basic use case for the {@link ConvertToIndexedRecord}.
 */
@Test
public void testBasic() {

    String[] inputValues = { "one", "two", "three" };
    // The output values should use the standard primitive converter.
    SingleColumnIndexedRecordConverter<String> converter = new SingleColumnIndexedRecordConverter(String.class,
            Schema.create(Schema.Type.STRING));
    IndexedRecord[] outputExpected = new IndexedRecord[inputValues.length];
    for (int i = 0; i < inputValues.length; i++)
        outputExpected[i] = converter.convertToAvro(inputValues[i]);

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    PCollection<String> input = p.apply(Create.of(Arrays.asList(inputValues))); //

    // Collect the results before and after the transformation.
    PCollection<IndexedRecord> output = input.apply(ConvertToIndexedRecord.<String> of());

    // Validate the contents of the collections in the pipeline.
    PAssert.that(input).containsInAnyOrder(inputValues);
    PAssert.that(output).containsInAnyOrder(outputExpected);

    // Run the pipeline to fill the collectors.
    p.run().waitUntilFinish();
}
 
Example #8
Source File: BeamLocalRunnerOption.java    From components with Apache License 2.0 5 votes vote down vote up
public static DirectOptions getOptions() {
    if (options == null) {
        LOGGER.info("Create DirectOption");
        options = PipelineOptionsFactory.as(DirectOptions.class);
        options.setTargetParallelism(1);
        options.setRunner(DirectRunner.class);
        options.setEnforceEncodability(false);
        options.setEnforceImmutability(false);
    }
    return options;
}
 
Example #9
Source File: PerfsToBigQueryTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
  options = PipelineOptionsFactory.create().as(NexmarkOptions.class);
  options.setBigQueryTable("nexmark");
  options.setBigQueryDataset("nexmark");
  options.setRunner(DirectRunner.class);
  options.setStreaming(true);
  options.setProject("nexmark-test");
  options.setResourceNameMode(NexmarkUtils.ResourceNameMode.QUERY_RUNNER_AND_MODE);

  publisher = new FakeBigQueryResultsPublisher();
}
 
Example #10
Source File: KettleBeamPipelineExecutor.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
private PipelineResult asyncExecutePipeline( Pipeline pipeline ) throws KettleException {

    RunnerType runnerType = RunnerType.getRunnerTypeByName( transMeta.environmentSubstitute( jobConfig.getRunnerTypeName() ) );
    if (runnerType==null) {
      throw new KettleException( "Runner type '"+jobConfig.getRunnerTypeName()+"' is not recognized");
    }
    switch ( runnerType ) {
      case Direct: return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Flink: return FlinkRunner.fromOptions(pipeline.getOptions()).run( pipeline );
      case DataFlow: return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Spark: return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      default:
        throw new KettleException( "Execution on runner '" + runnerType.name() + "' is not supported yet, sorry." );
    }
  }
 
Example #11
Source File: TransMetaPipelineConverter.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws KettleException {
  if (runnerType==null) {
    throw new KettleException( "Please specify a valid runner type");
  }
  switch(runnerType) {
    case Direct: return DirectRunner.class;
    case Flink: return FlinkRunner.class;
    case Spark: return SparkRunner.class;
    case DataFlow: return DataflowRunner.class;
    default:
      throw new KettleException( "Unsupported runner type: "+runnerType.name() );
  }
}
 
Example #12
Source File: Spec11PipelineTest.java    From nomulus with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void initializePipelineOptions() {
  pipelineOptions = PipelineOptionsFactory.create();
  pipelineOptions.setRunner(DirectRunner.class);
}
 
Example #13
Source File: __componentNameClass__RuntimeTest.java    From components with Apache License 2.0 4 votes vote down vote up
/**
 * Check {@link ${componentNameClass}Runtime#build(BeamJobContext)}
 */
@Test
public void testBuild() {

    // Create pipeline
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    // Create PCollection for test
    Schema a = GenericDataRecordHelper.createSchemaFromObject("a", new Object[] { "a" });
    IndexedRecord irA = GenericDataRecordHelper.createRecord(a, new Object[] { "a" });
    IndexedRecord irB = GenericDataRecordHelper.createRecord(a, new Object[] { "b" });
    IndexedRecord irC = GenericDataRecordHelper.createRecord(a, new Object[] { "c" });
    /*
    *   Example of test
    *
    List<IndexedRecord> data = Arrays.asList( //
            irA, //
            irB, //
            irC, //
            irA, //
            irA, //
            irC //
    );

    PCollection<IndexedRecord> input = (PCollection<IndexedRecord>) p.apply(Create.of(data).withCoder(LazyAvroCoder.of()));

    ${componentNameClass}Properties ${componentNameLowerCase}Properties = new ${componentNameClass}Properties("test");
    ${componentNameLowerCase}Runtime.initialize(null, ${componentNameLowerCase}Properties);
    BeamJobContext context = Mockito.mock(BeamJobContext.class);
    ${componentNameLowerCase}Runtime.build(context);
    verify(context, times(1)).getLinkNameByPortName(anyString());
    verify(context, times(0)).getPCollectionByLinkName(anyString());

    BeamJobContext ctx = Mockito.mock(BeamJobContext.class);
    when(ctx.getLinkNameByPortName(anyString())).thenReturn("test");
    when(ctx.getPCollectionByLinkName(anyString())).thenReturn(input);
    ${componentNameLowerCase}Runtime.build(ctx);
    verify(ctx, times(2)).getLinkNameByPortName(anyString());
    verify(ctx, times(1)).getPCollectionByLinkName(anyString());
     */
}
 
Example #14
Source File: InvoicingPipelineTest.java    From nomulus with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void initializePipelineOptions() {
  pipelineOptions = PipelineOptionsFactory.create();
  pipelineOptions.setRunner(DirectRunner.class);
}
 
Example #15
Source File: DirectRunnerJobManagerTest.java    From feast with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldStartDirectJobAndRegisterPipelineResult() throws IOException {
  StoreProto.Store store =
      StoreProto.Store.newBuilder()
          .setName("SERVING")
          .setType(StoreType.REDIS)
          .setRedisConfig(RedisConfig.newBuilder().setHost("localhost").setPort(6379).build())
          .addSubscriptions(Subscription.newBuilder().setProject("*").setName("*").build())
          .build();

  SourceProto.Source source =
      SourceProto.Source.newBuilder()
          .setType(SourceType.KAFKA)
          .setKafkaSourceConfig(
              KafkaSourceConfig.newBuilder()
                  .setTopic("topic")
                  .setBootstrapServers("servers:9092")
                  .build())
          .build();

  FeatureSetProto.FeatureSet featureSet =
      FeatureSetProto.FeatureSet.newBuilder()
          .setSpec(
              FeatureSetSpec.newBuilder()
                  .setName("featureSet")
                  .setMaxAge(Duration.newBuilder())
                  .setSource(source)
                  .build())
          .build();

  Printer printer = JsonFormat.printer();

  String expectedJobId = "feast-job-0";
  ImportOptions expectedPipelineOptions =
      PipelineOptionsFactory.fromArgs("").as(ImportOptions.class);
  expectedPipelineOptions.setJobName(expectedJobId);
  expectedPipelineOptions.setAppName("DirectRunnerJobManager");
  expectedPipelineOptions.setRunner(DirectRunner.class);
  expectedPipelineOptions.setBlockOnRun(false);
  expectedPipelineOptions.setTargetParallelism(1);
  expectedPipelineOptions.setStoresJson(Lists.newArrayList(printer.print(store)));
  expectedPipelineOptions.setProject("");
  expectedPipelineOptions.setSourceJson(printer.print(source));

  ArgumentCaptor<ImportOptions> pipelineOptionsCaptor =
      ArgumentCaptor.forClass(ImportOptions.class);
  ArgumentCaptor<DirectJob> directJobCaptor = ArgumentCaptor.forClass(DirectJob.class);

  PipelineResult mockPipelineResult = Mockito.mock(PipelineResult.class);
  doReturn(mockPipelineResult).when(drJobManager).runPipeline(any());

  Job job =
      Job.builder()
          .setId(expectedJobId)
          .setExtId("")
          .setRunner(Runner.DIRECT)
          .setSource(Source.fromProto(source))
          .setStores(ImmutableSet.of(Store.fromProto(store)))
          .setFeatureSetJobStatuses(makeFeatureSetJobStatus(FeatureSet.fromProto(featureSet)))
          .setStatus(JobStatus.PENDING)
          .build();

  Job actual = drJobManager.startJob(job);

  verify(drJobManager, times(1)).runPipeline(pipelineOptionsCaptor.capture());
  verify(directJobRegistry, times(1)).add(directJobCaptor.capture());
  assertThat(actual.getStatus(), equalTo(JobStatus.RUNNING));

  ImportOptions actualPipelineOptions = pipelineOptionsCaptor.getValue();
  DirectJob jobStarted = directJobCaptor.getValue();
  expectedPipelineOptions.setOptionsId(
      actualPipelineOptions.getOptionsId()); // avoid comparing this value

  assertThat(
      actualPipelineOptions.getDeadLetterTableSpec(),
      equalTo(expectedPipelineOptions.getDeadLetterTableSpec()));
  assertThat(
      actualPipelineOptions.getStatsdHost(), equalTo(expectedPipelineOptions.getStatsdHost()));
  assertThat(
      actualPipelineOptions.getMetricsExporterType(),
      equalTo(expectedPipelineOptions.getMetricsExporterType()));
  assertThat(
      actualPipelineOptions.getStoresJson(), equalTo(expectedPipelineOptions.getStoresJson()));
  assertThat(
      actualPipelineOptions.getSourceJson(), equalTo(expectedPipelineOptions.getSourceJson()));
  assertThat(
      actualPipelineOptions.getSpecsStreamingUpdateConfigJson(),
      equalTo(printer.print(specsStreamingUpdateConfig)));

  assertThat(jobStarted.getPipelineResult(), equalTo(mockPipelineResult));
  assertThat(jobStarted.getJobId(), equalTo(expectedJobId));
  assertThat(actual.getExtId(), equalTo(expectedJobId));
}
 
Example #16
Source File: WindowRuntimeTest.java    From components with Apache License 2.0 4 votes vote down vote up
@Test
public void testSlidingWindow() {

    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    /*
     * // creation of PCollection with different timestamp PCollection<IndexedRecord>
     */
    List<TimestampedValue<IndexedRecord>> data = Arrays.asList( //
            TimestampedValue.of(irA, new Instant(0L)), //
            TimestampedValue.of(irB, new Instant(0L)), //
            TimestampedValue.of(irC, new Instant(1L)), //
            TimestampedValue.of(irA, new Instant(2L)), //
            TimestampedValue.of(irA, new Instant(2L)), //
            TimestampedValue.of(irB, new Instant(2L)), //
            TimestampedValue.of(irB, new Instant(3L)), //
            TimestampedValue.of(irC, new Instant(3L)), //
            TimestampedValue.of(irA, new Instant(4L)));

    Create.TimestampedValues<IndexedRecord> pt = Create.timestamped(data);
    pt = (Create.TimestampedValues<IndexedRecord>) pt.withCoder(LazyAvroCoder.of());
    PCollection<IndexedRecord> input = p.apply(pt);

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.setValue("windowLength", 4);
    windowProperties.setValue("windowSlideLength", 2);
    windowProperties.setValue("windowSession", false);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    // window duration: 4 - sliding: 2
    PAssert.that(windowed_counts).containsInAnyOrder( //
            KV.of(irA, 1L), //
            KV.of(irA, 1L), //
            KV.of(irA, 3L), //
            KV.of(irA, 3L), //
            KV.of(irB, 1L), //
            KV.of(irB, 3L), //
            KV.of(irB, 2L), //
            KV.of(irC, 1L), //
            KV.of(irC, 1L), //
            KV.of(irC, 2L));
    p.run();
}
 
Example #17
Source File: WindowRuntimeTest.java    From components with Apache License 2.0 4 votes vote down vote up
@Test
public void testSessionWindow() {
    PipelineOptions options = PipelineOptionsFactory.create();
    options.setRunner(DirectRunner.class);
    final Pipeline p = Pipeline.create(options);

    /*
     * // creation of PCollection with different timestamp PCollection<IndexedRecord>
     */
    List<TimestampedValue<IndexedRecord>> data = Arrays.asList( //
            TimestampedValue.of(irA, new Instant(0L)), //
            TimestampedValue.of(irB, new Instant(0L)), //
            TimestampedValue.of(irC, new Instant(1L)), //
            TimestampedValue.of(irA, new Instant(2L)), //
            TimestampedValue.of(irA, new Instant(2L)), //
            TimestampedValue.of(irB, new Instant(2L)), //
            TimestampedValue.of(irB, new Instant(30L)), //
            TimestampedValue.of(irA, new Instant(30L)), //
            TimestampedValue.of(irA, new Instant(50L)), //
            TimestampedValue.of(irC, new Instant(55L)), //
            TimestampedValue.of(irA, new Instant(59L)));

    Create.TimestampedValues<IndexedRecord> pt = Create.timestamped(data);
    pt = (Create.TimestampedValues<IndexedRecord>) pt.withCoder(LazyAvroCoder.of());
    PCollection<IndexedRecord> input = p.apply(pt);

    WindowProperties windowProperties = new WindowProperties("window");
    windowProperties.setValue("windowLength", 10);
    windowProperties.setValue("windowSlideLength", -1);
    windowProperties.setValue("windowSession", true);

    WindowRuntime windowRun = new WindowRuntime();
    windowRun.initialize(null, windowProperties);

    PCollection<IndexedRecord> test = windowRun.expand(input);

    PCollection<KV<IndexedRecord, Long>> windowed_counts = test.apply(Count.<IndexedRecord> perElement());

    // window duration: 4 - sliding: 2
    PAssert.that(windowed_counts).containsInAnyOrder( //
            KV.of(irA, 3L), //
            KV.of(irB, 2L), //
            KV.of(irC, 1L), //

            KV.of(irB, 1L), //
            KV.of(irA, 1L), //

            KV.of(irA, 2L), //
            KV.of(irC, 1L));

    p.run();
}