org.apache.beam.runners.spark.SparkContextOptions Java Examples

The following examples show how to use org.apache.beam.runners.spark.SparkContextOptions. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkContextFactory.java    From beam with Apache License 2.0 6 votes vote down vote up
public static synchronized JavaSparkContext getSparkContext(SparkPipelineOptions options) {
  SparkContextOptions contextOptions = options.as(SparkContextOptions.class);
  usesProvidedSparkContext = contextOptions.getUsesProvidedSparkContext();
  // reuse should be ignored if the context is provided.
  if (Boolean.getBoolean(TEST_REUSE_SPARK_CONTEXT) && !usesProvidedSparkContext) {

    // if the context is null or stopped for some reason, re-create it.
    if (sparkContext == null || sparkContext.sc().isStopped()) {
      sparkContext = createSparkContext(contextOptions);
      sparkMaster = options.getSparkMaster();
    } else if (!options.getSparkMaster().equals(sparkMaster)) {
      throw new IllegalArgumentException(
          String.format(
              "Cannot reuse spark context "
                  + "with different spark master URL. Existing: %s, requested: %s.",
              sparkMaster, options.getSparkMaster()));
    }
    return sparkContext;
  } else {
    return createSparkContext(contextOptions);
  }
}
 
Example #2
Source File: SparkRunnerKryoRegistratorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void runSimplePipelineWithSparkContext(SparkConf conf) {
  SparkPipelineOptions options =
      PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class);
  options.setRunner(TestSparkRunner.class);

  conf.set("spark.master", "local");
  conf.setAppName("test");

  JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
  options.setUsesProvidedSparkContext(true);
  options.as(SparkContextOptions.class).setProvidedSparkContext(javaSparkContext);
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of("a")); // some operation to trigger pipeline construction
  p.run().waitUntilFinish();
  javaSparkContext.stop();
}
 
Example #3
Source File: SparkContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}
 
Example #4
Source File: SparkRunnerTestUtils.java    From components with Apache License 2.0 5 votes vote down vote up
public Pipeline createPipeline() {
    SparkContextOptions sparkOpts = options.as(SparkContextOptions.class);
    sparkOpts.setFilesToStage(emptyList());

    SparkConf conf = new SparkConf();
    conf.setAppName(appName);
    conf.setMaster("local[2]");
    conf.set("spark.driver.allowMultipleContexts", "true");
    JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf));
    sparkOpts.setProvidedSparkContext(jsc);
    sparkOpts.setUsesProvidedSparkContext(true);
    sparkOpts.setRunner(SparkRunner.class);

    return Pipeline.create(sparkOpts);
}
 
Example #5
Source File: SparkIntegrationTestResource.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * @return the options used to create this pipeline. These can be or changed before the Pipeline is created.
 */
public SparkContextOptions getOptions() {
    if (options == null) {
        options = PipelineOptionsFactory.as(SparkContextOptions.class);
        options.setRunner(SparkRunner.class);
        options.setFilesToStage(emptyList()); // useless for us and broken on java > 8 with beam <= 2.10.0
    }
    return options;
}
 
Example #6
Source File: KettleBeamPipelineExecutor.java    From kettle-beam with Apache License 2.0 4 votes vote down vote up
public Pipeline getPipeline( TransMeta transMeta, BeamJobConfig config ) throws KettleException {

    try {

      if ( StringUtils.isEmpty( config.getRunnerTypeName() ) ) {
        throw new KettleException( "You need to specify a runner type, one of : " + RunnerType.values().toString() );
      }
      PipelineOptions pipelineOptions = null;
      VariableSpace space = transMeta;

      RunnerType runnerType = RunnerType.getRunnerTypeByName( transMeta.environmentSubstitute( config.getRunnerTypeName() ) );
      switch ( runnerType ) {
        case Direct:
          pipelineOptions = PipelineOptionsFactory.create();
          break;
        case DataFlow:
          DataflowPipelineOptions dfOptions = PipelineOptionsFactory.as( DataflowPipelineOptions.class );
          configureDataFlowOptions( config, dfOptions, space );
          pipelineOptions = dfOptions;
          break;
        case Spark:
          SparkPipelineOptions sparkOptions;
          if (sparkContext!=null) {
            SparkContextOptions sparkContextOptions = PipelineOptionsFactory.as( SparkContextOptions.class );
            sparkContextOptions.setProvidedSparkContext( sparkContext );
            sparkOptions = sparkContextOptions;
          } else {
            sparkOptions = PipelineOptionsFactory.as( SparkPipelineOptions.class );
          }
          configureSparkOptions( config, sparkOptions, space, transMeta.getName() );
          pipelineOptions = sparkOptions;
          break;
        case Flink:
          FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as( FlinkPipelineOptions.class );
          configureFlinkOptions( config, flinkOptions, space );
          pipelineOptions = flinkOptions;
          break;
        default:
          throw new KettleException( "Sorry, this isn't implemented yet" );
      }

      configureStandardOptions( config, transMeta.getName(), pipelineOptions, space );

      setVariablesInTransformation( config, transMeta );

      TransMetaPipelineConverter converter;
      if (stepPluginClasses!=null && xpPluginClasses!=null) {
        converter = new TransMetaPipelineConverter( transMeta, metaStore, stepPluginClasses, xpPluginClasses, jobConfig );
      } else {
        converter = new TransMetaPipelineConverter( transMeta, metaStore, config.getPluginsToStage(), jobConfig );
      }
      Pipeline pipeline = converter.createPipeline( pipelineOptions );

      // Also set the pipeline options...
      //
      FileSystems.setDefaultPipelineOptions(pipelineOptions);

      return pipeline;
    } catch ( Exception e ) {
      throw new KettleException( "Error configuring local Beam Engine", e );
    }

  }