org.apache.beam.runners.spark.SparkRunner Java Examples

The following examples show how to use org.apache.beam.runners.spark.SparkRunner. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BeamPipelineEngine.java    From hop with Apache License 2.0 6 votes vote down vote up
private PipelineResult executePipeline( org.apache.beam.sdk.Pipeline pipeline ) throws HopException {

    RunnerType runnerType = beamEngineRunConfiguration.getRunnerType();
    switch ( runnerType ) {
      case Direct:
        return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Flink:
        return FlinkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case DataFlow:
        return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Spark:
        return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      default:
        throw new HopException( "Execution on runner '" + runnerType.name() + "' is not supported yet." );
    }
  }
 
Example #2
Source File: HopPipelineMetaToBeamPipelineConverter.java    From hop with Apache License 2.0 6 votes vote down vote up
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws HopException {
  if ( runnerType == null ) {
    throw new HopException( "Please specify a valid runner type" );
  }
  switch ( runnerType ) {
    case Direct:
      return DirectRunner.class;
    case Flink:
      return FlinkRunner.class;
    case Spark:
      return SparkRunner.class;
    case DataFlow:
      return DataflowRunner.class;
    default:
      throw new HopException( "Unsupported runner type: " + runnerType.name() );
  }
}
 
Example #3
Source File: TranslationUtils.java    From beam with Apache License 2.0 6 votes vote down vote up
/**
 * Reject state and timers {@link DoFn}.
 *
 * @param doFn the {@link DoFn} to possibly reject.
 */
public static void rejectStateAndTimers(DoFn<?, ?> doFn) {
  DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());

  if (signature.stateDeclarations().size() > 0) {
    throw new UnsupportedOperationException(
        String.format(
            "Found %s annotations on %s, but %s cannot yet be used with state in the %s.",
            DoFn.StateId.class.getSimpleName(),
            doFn.getClass().getName(),
            DoFn.class.getSimpleName(),
            SparkRunner.class.getSimpleName()));
  }

  if (signature.timerDeclarations().size() > 0
      || signature.timerFamilyDeclarations().size() > 0) {
    throw new UnsupportedOperationException(
        String.format(
            "Found %s annotations on %s, but %s cannot yet be used with timers in the %s.",
            DoFn.TimerId.class.getSimpleName(),
            doFn.getClass().getName(),
            DoFn.class.getSimpleName(),
            SparkRunner.class.getSimpleName()));
  }
}
 
Example #4
Source File: TrackStreamingSourcesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTrackSingle() {
  options.setRunner(SparkRunner.class);
  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  JavaStreamingContext jssc =
      new JavaStreamingContext(
          jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis()));

  Pipeline p = Pipeline.create(options);

  CreateStream<Integer> emptyStream =
      CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis()))
          .emptyBatch();

  p.apply(emptyStream).apply(ParDo.of(new PassthroughFn<>()));

  p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0));
  assertThat(StreamingSourceTracker.numAssertions, equalTo(1));
}
 
Example #5
Source File: TrackStreamingSourcesTest.java    From beam with Apache License 2.0 6 votes vote down vote up
@Test
public void testTrackFlattened() {
  options.setRunner(SparkRunner.class);
  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  JavaStreamingContext jssc =
      new JavaStreamingContext(
          jsc, new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis()));

  Pipeline p = Pipeline.create(options);

  CreateStream<Integer> queueStream1 =
      CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis()))
          .emptyBatch();
  CreateStream<Integer> queueStream2 =
      CreateStream.of(VarIntCoder.of(), Duration.millis(options.getBatchIntervalMillis()))
          .emptyBatch();

  PCollection<Integer> pcol1 = p.apply(queueStream1);
  PCollection<Integer> pcol2 = p.apply(queueStream2);
  PCollection<Integer> flattened =
      PCollectionList.of(pcol1).and(pcol2).apply(Flatten.pCollections());
  flattened.apply(ParDo.of(new PassthroughFn<>()));

  p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class, 0, 1));
  assertThat(StreamingSourceTracker.numAssertions, equalTo(1));
}
 
Example #6
Source File: TransMetaPipelineConverter.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
public static Class<? extends PipelineRunner<?>> getPipelineRunnerClass( RunnerType runnerType ) throws KettleException {
  if (runnerType==null) {
    throw new KettleException( "Please specify a valid runner type");
  }
  switch(runnerType) {
    case Direct: return DirectRunner.class;
    case Flink: return FlinkRunner.class;
    case Spark: return SparkRunner.class;
    case DataFlow: return DataflowRunner.class;
    default:
      throw new KettleException( "Unsupported runner type: "+runnerType.name() );
  }
}
 
Example #7
Source File: KettleBeamPipelineExecutor.java    From kettle-beam with Apache License 2.0 5 votes vote down vote up
private PipelineResult asyncExecutePipeline( Pipeline pipeline ) throws KettleException {

    RunnerType runnerType = RunnerType.getRunnerTypeByName( transMeta.environmentSubstitute( jobConfig.getRunnerTypeName() ) );
    if (runnerType==null) {
      throw new KettleException( "Runner type '"+jobConfig.getRunnerTypeName()+"' is not recognized");
    }
    switch ( runnerType ) {
      case Direct: return DirectRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Flink: return FlinkRunner.fromOptions(pipeline.getOptions()).run( pipeline );
      case DataFlow: return DataflowRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      case Spark: return SparkRunner.fromOptions( pipeline.getOptions() ).run( pipeline );
      default:
        throw new KettleException( "Execution on runner '" + runnerType.name() + "' is not supported yet, sorry." );
    }
  }
 
Example #8
Source File: SparkRunnerStreamingContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public JavaStreamingContext call() throws Exception {
  LOG.info("Creating a new Spark Streaming Context");
  // validate unbounded read properties.
  checkArgument(
      options.getMinReadTimeMillis() < options.getBatchIntervalMillis(),
      "Minimum read time has to be less than batch time.");
  checkArgument(
      options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1,
      "Read time percentage is bound to (0, 1).");

  SparkPipelineTranslator translator =
      new StreamingTransformTranslator.Translator(new TransformTranslator.Translator());
  Duration batchDuration = new Duration(options.getBatchIntervalMillis());
  LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds());

  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration);

  // We must first init accumulators since translators expect them to be instantiated.
  SparkRunner.initAccumulators(options, jsc);
  // do not need to create a MetricsPusher instance here because if is called in SparkRunner.run()

  EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc);
  // update cache candidates
  SparkRunner.updateCacheCandidates(pipeline, translator, ctxt);
  pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt));
  ctxt.computeOutputs();

  checkpoint(jssc, checkpointDir);

  return jssc;
}
 
Example #9
Source File: TrackStreamingSourcesTest.java    From beam with Apache License 2.0 5 votes vote down vote up
private StreamingSourceTracker(
    JavaStreamingContext jssc,
    Pipeline pipeline,
    Class<? extends PTransform> transformClassToAssert,
    Integer... expected) {
  this.ctxt = new EvaluationContext(jssc.sparkContext(), pipeline, options, jssc);
  this.evaluator =
      new SparkRunner.Evaluator(
          new StreamingTransformTranslator.Translator(new TransformTranslator.Translator()),
          ctxt);
  this.transformClassToAssert = transformClassToAssert;
  this.expected = expected;
}
 
Example #10
Source File: SparkRunnerTestUtils.java    From components with Apache License 2.0 5 votes vote down vote up
public Pipeline createPipeline() {
    SparkContextOptions sparkOpts = options.as(SparkContextOptions.class);
    sparkOpts.setFilesToStage(emptyList());

    SparkConf conf = new SparkConf();
    conf.setAppName(appName);
    conf.setMaster("local[2]");
    conf.set("spark.driver.allowMultipleContexts", "true");
    JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf));
    sparkOpts.setProvidedSparkContext(jsc);
    sparkOpts.setUsesProvidedSparkContext(true);
    sparkOpts.setRunner(SparkRunner.class);

    return Pipeline.create(sparkOpts);
}
 
Example #11
Source File: S3SparkRuntimeTestIT.java    From components with Apache License 2.0 5 votes vote down vote up
@Before
public void setupLazyAvroCoder() {
    options = PipelineOptionsFactory.as(SparkPipelineOptions.class);
    options.setRunner(SparkRunner.class);
    options.setSparkMaster("local");
    options.setStreaming(false);
    pWrite = Pipeline.create(options);
    pRead = Pipeline.create(options);

}
 
Example #12
Source File: SparkIntegrationTestResource.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * @return the options used to create this pipeline. These can be or changed before the Pipeline is created.
 */
public SparkContextOptions getOptions() {
    if (options == null) {
        options = PipelineOptionsFactory.as(SparkContextOptions.class);
        options.setRunner(SparkRunner.class);
        options.setFilesToStage(emptyList()); // useless for us and broken on java > 8 with beam <= 2.10.0
    }
    return options;
}