org.apache.beam.sdk.PipelineRunner Java Examples

The following examples show how to use org.apache.beam.sdk.PipelineRunner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HopPipelineMetaToBeamPipelineConverter.java    From hop with Apache License 2.0 6 votes vote down vote up
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws HopException {
  if ( runnerType == null ) {
    throw new HopException( "Please specify a valid runner type" );
  }
  switch ( runnerType ) {
    case Direct:
      return DirectRunner.class;
    case Flink:
      return FlinkRunner.class;
    case Spark:
      return SparkRunner.class;
    case DataFlow:
      return DataflowRunner.class;
    default:
      throw new HopException( "Unsupported runner type: " + runnerType.name() );
  }
}
 
Example #2
Source File: PipelineOptionsFactoryTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testAutomaticRegistrationInculdesWithoutRunnerSuffix() {
  // Sanity check to make sure the substring works appropriately
  assertEquals(
      "RegisteredTest",
      REGISTERED_RUNNER
          .getSimpleName()
          .substring(0, REGISTERED_RUNNER.getSimpleName().length() - "Runner".length()));
  Map<String, Class<? extends PipelineRunner<?>>> registered =
      PipelineOptionsFactory.CACHE.get().getSupportedPipelineRunners();
  assertEquals(
      REGISTERED_RUNNER,
      registered.get(
          REGISTERED_RUNNER
              .getSimpleName()
              .toLowerCase()
              .substring(0, REGISTERED_RUNNER.getSimpleName().length() - "Runner".length())));
}
 
Example #3
Source File: SerializableCoder.java    From beam with Apache License 2.0 6 votes vote down vote up
private static <T extends Serializable> void checkEqualsMethodDefined(Class<T> clazz) {
  boolean warn = true;
  if (!clazz.isInterface()) {
    Method method;
    try {
      method = clazz.getMethod("equals", Object.class);
    } catch (NoSuchMethodException e) {
      // All concrete classes have an equals method declared in their class hierarchy.
      throw new AssertionError(String.format("Concrete class %s has no equals method", clazz));
    }
    // Check if not default Object#equals implementation.
    warn = Object.class.equals(method.getDeclaringClass());
  }

  // Note that the order of these checks is important since we want the
  // "did we add the class to the set" check to happen last.
  if (warn && MISSING_EQUALS_METHOD.add(clazz)) {
    LOG.warn(
        "Can't verify serialized elements of type {} have well defined equals method. "
            + "This may produce incorrect results on some {}",
        clazz.getSimpleName(),
        PipelineRunner.class.getSimpleName());
  }
}
 
Example #4
Source File: PipelineOptions.java    From beam with Apache License 2.0 6 votes vote down vote up
@Override
public Class<? extends PipelineRunner<?>> create(PipelineOptions options) {
  try {
    @SuppressWarnings({"unchecked", "rawtypes"})
    Class<? extends PipelineRunner<?>> direct =
        (Class<? extends PipelineRunner<?>>)
            Class.forName(
                "org.apache.beam.runners.direct.DirectRunner",
                true,
                ReflectHelpers.findClassLoader());
    return direct;
  } catch (ClassNotFoundException e) {
    throw new IllegalArgumentException(
        String.format(
            "No Runner was specified and the DirectRunner was not found on the classpath.%n"
                + "Specify a runner by either:%n"
                + "    Explicitly specifying a runner by providing the 'runner' property%n"
                + "    Adding the DirectRunner to the classpath%n"
                + "    Calling 'PipelineOptions.setRunner(PipelineRunner)' directly"));
  }
}
 
Example #5
Source File: PipelineOptionsFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
private Cache() {
  final ClassLoader loader = ReflectHelpers.findClassLoader();
  // Store the list of all available pipeline runners.
  ImmutableMap.Builder<String, Class<? extends PipelineRunner<?>>> builder =
      ImmutableMap.builder();
  for (PipelineRunnerRegistrar registrar :
      ReflectHelpers.loadServicesOrdered(PipelineRunnerRegistrar.class, loader)) {
    for (Class<? extends PipelineRunner<?>> klass : registrar.getPipelineRunners()) {
      String runnerName = klass.getSimpleName().toLowerCase();
      builder.put(runnerName, klass);
      if (runnerName.endsWith("runner")) {
        builder.put(runnerName.substring(0, runnerName.length() - "Runner".length()), klass);
      }
    }
  }
  supportedPipelineRunners = builder.build();
  initializeRegistry(loader);
}
 
Example #6
Source File: DirectRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void defaultRunnerLoaded() {
  assertThat(
      DirectRunner.class,
      Matchers.<Class<? extends PipelineRunner>>equalTo(
          PipelineOptionsFactory.create().getRunner()));
}
 
Example #7
Source File: DirectRunnerApiSurfaceTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testDirectRunnerApiSurface() throws Exception {
  // The DirectRunner can expose the Core SDK, anything exposed by the Core SDK, and itself
  @SuppressWarnings("unchecked")
  final Set<String> allowed =
      ImmutableSet.of(
          "org.apache.beam.sdk",
          "org.apache.beam.runners.direct",
          "org.joda.time",
          "javax.annotation",
          "java.math");

  final Package thisPackage = getClass().getPackage();
  final ClassLoader thisClassLoader = getClass().getClassLoader();
  ApiSurface apiSurface =
      ApiSurface.ofPackage(thisPackage, thisClassLoader)
          // Do not include dependencies that are required based on the known exposures. This
          // could alternatively prune everything exposed by the public parts of the Core SDK
          .pruningClass(Pipeline.class)
          .pruningClass(PipelineRunner.class)
          .pruningClass(PipelineOptions.class)
          .pruningClass(PipelineOptionsRegistrar.class)
          .pruningClass(PipelineOptions.DirectRunner.class)
          .pruningClass(DisplayData.Builder.class)
          .pruningClass(MetricResults.class)
          .pruningClass(DirectGraphs.class)
          .pruningClass(
              WatermarkManager.class /* TODO: BEAM-4237 Consider moving to local-java */)
          .pruningPattern(
              "org[.]apache[.]beam[.]runners[.]direct[.]portable.*"
              /* TODO: BEAM-4237 reconsider package layout with the ReferenceRunner */ )
          .pruningPattern("org[.]apache[.]beam[.].*Test.*")
          .pruningPattern("org[.]apache[.]beam[.].*IT")
          .pruningPattern("java[.]io.*")
          .pruningPattern("java[.]lang.*")
          .pruningPattern("java[.]util.*");

  assertThat(apiSurface, containsOnlyPackages(allowed));
}
 
Example #8
Source File: PipelineRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
@Category({NeedsRunner.class, UsesCommittedMetrics.class, UsesCounterMetrics.class})
public void testRunPTransform() {
  final String namespace = PipelineRunnerTest.class.getName();
  final Counter counter = Metrics.counter(namespace, "count");
  final PipelineResult result =
      PipelineRunner.fromOptions(p.getOptions())
          .run(
              new PTransform<PBegin, POutput>() {
                @Override
                public POutput expand(PBegin input) {
                  PCollection<Double> output =
                      input
                          .apply(Create.of(1, 2, 3, 4))
                          .apply("ScaleByTwo", MapElements.via(new ScaleFn<>(2.0, counter)));
                  PAssert.that(output).containsInAnyOrder(2.0, 4.0, 6.0, 8.0);
                  return output;
                }
              });

  // Checking counters to verify the pipeline actually ran.
  assertThat(
      result
          .metrics()
          .queryMetrics(
              MetricsFilter.builder()
                  .addNameFilter(MetricNameFilter.inNamespace(namespace))
                  .build())
          .getCounters(),
      hasItem(metricsResult(namespace, "count", "ScaleByTwo", 4L, true)));
}
 
Example #9
Source File: PipelineRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void testInstantiation() {
  PipelineOptions options = PipelineOptionsFactory.create();
  options.setRunner(CrashingRunner.class);
  PipelineRunner<?> runner = PipelineRunner.fromOptions(options);
  assertTrue(runner instanceof CrashingRunner);
}
 
Example #10
Source File: CrashingRunnerTest.java    From beam with Apache License 2.0 5 votes vote down vote up
@Test
public void fromOptionsCreatesInstance() {
  PipelineOptions opts = PipelineOptionsFactory.create();
  opts.setRunner(CrashingRunner.class);
  PipelineRunner<? extends PipelineResult> runner = PipelineRunner.fromOptions(opts);

  assertTrue("Should have created a CrashingRunner", runner instanceof CrashingRunner);
}
 
Example #11
Source File: CrashingRunner.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public PipelineResult run(Pipeline pipeline) {
  throw new IllegalArgumentException(
      String.format(
          "Cannot call #run(Pipeline) on an instance "
              + "of %s. %s should only be used as the default to construct a Pipeline "
              + "using %s, and cannot execute Pipelines. Instead, specify a %s "
              + "by providing PipelineOptions in the system property '%s'.",
          CrashingRunner.class.getSimpleName(),
          CrashingRunner.class.getSimpleName(),
          TestPipeline.class.getSimpleName(),
          PipelineRunner.class.getSimpleName(),
          TestPipeline.PROPERTY_BEAM_TEST_PIPELINE_OPTIONS));
}
 
Example #12
Source File: PipelineOptionsFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@VisibleForTesting
Set<String> getSupportedRunners() {
  ImmutableSortedSet.Builder<String> supportedRunners = ImmutableSortedSet.naturalOrder();
  for (Class<? extends PipelineRunner<?>> runner : supportedPipelineRunners.values()) {
    supportedRunners.add(runner.getSimpleName());
  }
  return supportedRunners.build();
}
 
Example #13
Source File: TransMetaPipelineConverter.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
public Pipeline createPipeline( PipelineOptions pipelineOptions ) throws Exception {

    LogChannelInterface log = LogChannel.GENERAL;

    // Create a new Pipeline
    //
    RunnerType runnerType = RunnerType.getRunnerTypeByName( beamJobConfig.getRunnerTypeName() );
    Class<? extends PipelineRunner<?>> runnerClass = getPipelineRunnerClass(runnerType);

    pipelineOptions.setRunner( runnerClass );
    Pipeline pipeline = Pipeline.create( pipelineOptions );

    pipeline.getCoderRegistry().registerCoderForClass( KettleRow.class, new KettleRowCoder() );

    log.logBasic( "Created pipeline job with name '" + pipelineOptions.getJobName() + "'" );

    // Keep track of which step outputs which Collection
    //
    Map<String, PCollection<KettleRow>> stepCollectionMap = new HashMap<>();

    // Handle io
    //
    handleBeamInputSteps( log, stepCollectionMap, pipeline );

    // Transform all the other steps...
    //
    handleGenericStep( stepCollectionMap, pipeline );

    // Output handling
    //
    handleBeamOutputSteps( log, stepCollectionMap, pipeline );

    return pipeline;
  }
 
Example #14
Source File: PipelineOptions.java    From beam with Apache License 2.0 5 votes vote down vote up
/**
 * The pipeline runner that will be used to execute the pipeline. For registered runners, the
 * class name can be specified, otherwise the fully qualified name needs to be specified.
 */
@Validation.Required
@Description(
    "The pipeline runner that will be used to execute the pipeline. "
        + "For registered runners, the class name can be specified, otherwise the fully "
        + "qualified name needs to be specified.")
@Default.InstanceFactory(DirectRunner.class)
Class<? extends PipelineRunner<?>> getRunner();
 
Example #15
Source File: NemoPipelineOptionsFactory.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
/**
 * Create a PipelineOptions for nemo runner.
 *
 * @return pipeline options
 */
public static PipelineOptions create() {
  final PipelineOptions options = PipelineOptionsFactory.create().as(NemoPipelineOptions.class);
  try {
    options.setRunner((Class<? extends PipelineRunner<?>>)
      Class.forName("org.apache.nemo.client.beam.NemoRunner"));
    return options;
  } catch (final ClassNotFoundException e) {
    e.printStackTrace();
    throw new RuntimeException(e);
  }
}
 
Example #16
Source File: TransMetaPipelineConverter.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws KettleException {
  if (runnerType==null) {
    throw new KettleException( "Please specify a valid runner type");
  }
  switch(runnerType) {
    case Direct: return DirectRunner.class;
    case Flink: return FlinkRunner.class;
    case Spark: return SparkRunner.class;
    case DataFlow: return DataflowRunner.class;
    default:
      throw new KettleException( "Unsupported runner type: "+runnerType.name() );
  }
}
 
Example #17
Source File: HopPipelineMetaToBeamPipelineConverter.java    From hop with Apache License 2.0 4 votes vote down vote up
public Pipeline createPipeline() throws Exception {

    ILogChannel log = LogChannel.GENERAL;

    // Create a new Pipeline
    //
    RunnerType runnerType = pipelineRunConfiguration.getRunnerType();
    Class<? extends PipelineRunner<?>> runnerClass = getPipelineRunnerClass( runnerType );

    PipelineOptions pipelineOptions = pipelineRunConfiguration.getPipelineOptions();
    // The generic options
    //
    pipelineOptions.setUserAgent( pipelineRunConfiguration.environmentSubstitute( pipelineRunConfiguration.getUserAgent() ) );
    pipelineOptions.setTempLocation( pipelineRunConfiguration.environmentSubstitute( pipelineRunConfiguration.getTempLocation() ) );
    pipelineOptions.setJobName( pipelineMeta.getName() );

    pipelineOptions.setRunner( runnerClass );
    Pipeline pipeline = Pipeline.create( pipelineOptions );

    pipeline.getCoderRegistry().registerCoderForClass( HopRow.class, new HopRowCoder() );

    log.logBasic( "Created Apache Beam pipeline with name '" + pipelineOptions.getJobName() + "'" );

    // Keep track of which transform outputs which Collection
    //
    Map<String, PCollection<HopRow>> stepCollectionMap = new HashMap<>();

    // Handle io
    //
    handleBeamInputSteps( log, stepCollectionMap, pipeline );

    // Transform all the other transforms...
    //
    handleGenericStep( stepCollectionMap, pipeline );

    // Output handling
    //
    handleBeamOutputSteps( log, stepCollectionMap, pipeline );

    return pipeline;
  }
 
Example #18
Source File: PipelineOptionsFactory.java    From beam with Apache License 2.0 4 votes vote down vote up
static Map<String, Class<? extends PipelineRunner<?>>> getRegisteredRunners() {
  return CACHE.get().supportedPipelineRunners;
}
 
Example #19
Source File: DirectRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(DirectRunner.class);
}
 
Example #20
Source File: DataflowPipelineRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(DataflowRunner.class, TestDataflowRunner.class);
}
 
Example #21
Source File: SamzaRunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(SamzaRunner.class, TestSamzaRunner.class);
}
 
Example #22
Source File: FlinkRunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(FlinkRunner.class, TestFlinkRunner.class);
}
 
Example #23
Source File: SparkRunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(
      SparkRunner.class, TestSparkRunner.class, SparkStructuredStreamingRunner.class);
}
 
Example #24
Source File: JetTestRunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return Collections.singletonList(TestJetRunner.class);
}
 
Example #25
Source File: JetRunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(JetRunner.class);
}
 
Example #26
Source File: PortableRunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(PortableRunner.class);
}
 
Example #27
Source File: Twister2RunnerRegistrar.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(Twister2Runner.class, Twister2TestRunner.class);
}
 
Example #28
Source File: NemoRunnerRegistrar.java    From incubator-nemo with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(NemoRunner.class);
}
 
Example #29
Source File: PipelineOptionsFactoryTest.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
  return ImmutableList.of(RegisteredTestRunner.class);
}
 
Example #30
Source File: PipelineOptionsFactoryTest.java    From beam with Apache License 2.0 4 votes vote down vote up
public static PipelineRunner<PipelineResult> fromOptions(PipelineOptions options) {
  return new RegisteredTestRunner();
}