Java Code Examples for org.apache.spark.SparkConf#setJars()

The following examples show how to use org.apache.spark.SparkConf#setJars() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ChronixSparkLoader.java    From chronix.spark with Apache License 2.0 6 votes vote down vote up
public ChronixSparkContext createChronixSparkContext() throws IOException {
    if (chronixSparkContext != null) {
        return chronixSparkContext;
    }

    SparkConf sparkConf = new SparkConf()
            .setMaster(chronixYAMLConfiguration.getSparkMaster())
            .setAppName(chronixYAMLConfiguration.getAppName());

    ChronixSparkContext.tuneSparkConf(sparkConf);

    //Set spark values given in yaml config
    for (Map.Entry<String, String> setting : chronixYAMLConfiguration.getSparkSettings().entrySet()) {
        sparkConf.set(setting.getKey(), setting.getValue());
    }

    if (chronixYAMLConfiguration.isDistributed()) {
        sparkConf.setJars(chronixYAMLConfiguration.getJars());
    }

    chronixSparkContext = new ChronixSparkContext(new JavaSparkContext(sparkConf));
    return chronixSparkContext;
}
 
Example 2
Source File: JavaSparkUtil.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
public static void packProjectJars(SparkConf conf) throws IOException {
  String classPath = System.getProperty(JAVA_CLASS_PATH);
  String pathSeparator = System.getProperty(PATH_SEPARATOR);
  Splitter splitter = Splitter.on(pathSeparator);
  Iterable<String> split = splitter.split(classPath);
  List<String> list = toList(split);
  List<String> classPathThatNeedsToBeIncluded = removeSparkLibs(list);
  List<String> jars = new ArrayList<String>();
  for (String s : classPathThatNeedsToBeIncluded) {
    if (isJarFile(s)) {
      jars.add(s);
    } else {
      jars.add(createJar(s));
    }
  }
  conf.setJars(jars.toArray(new String[jars.size()]));
}
 
Example 3
Source File: StreamingContextConfiguration.java    From Decision with Apache License 2.0 6 votes vote down vote up
private JavaStreamingContext create(String streamingContextName, int port, long streamingBatchTime, String sparkHost) {
    SparkConf conf = new SparkConf();
    conf.set("spark.ui.port", String.valueOf(port));
    conf.setAppName(streamingContextName);
    conf.setJars(JavaStreamingContext.jarOfClass(StreamingEngine.class));
    conf.setMaster(sparkHost);

    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    conf.registerKryoClasses(new Class[] { StratioStreamingMessage.class, InsertMessage.class, ColumnType.class,
            Action.class});


    HashMap<String, String> tuningProperties = configurationContext.getSparkTunningProperties();
    if (tuningProperties != null && tuningProperties.size() > 0) {
        tuningProperties.forEach( (key, value) ->  conf.set(key, value));
    }

    JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(streamingBatchTime));

    return streamingContext;
}
 
Example 4
Source File: SparkStreamingJob.java    From zipkin-sparkstreaming with Apache License 2.0 5 votes vote down vote up
@Memoized
JavaStreamingContext jsc() {
  SparkConf conf = new SparkConf(true)
      .setMaster(master())
      .setAppName(getClass().getName());
  if (!jars().isEmpty()) conf.setJars(jars().toArray(new String[0]));
  for (Map.Entry<String, String> entry : conf().entrySet()) {
    conf.set(entry.getKey(), entry.getValue());
  }
  return new JavaStreamingContext(conf, new Duration(batchDuration()));
}
 
Example 5
Source File: SparkUtils.java    From SparkDemo with MIT License 5 votes vote down vote up
public static JavaSparkContext getRemoteSparkContext(Class clazz) {
    System.setProperty("HADOOP_USER_NAME", "root");
    /**
     * SparkConf:第一步创建一个SparkConf,在这个对象里面可以设置允许模式Local Standalone yarn
     * AppName(可以在Web UI中看到) 还可以设置Spark运行时的资源要求
     */
    SparkConf conf = getRemoteSparkConf(clazz);
    conf.setJars(new String[]{"target/SparkDemo-1.0-SNAPSHOT-jar-with-dependencies.jar"});
    /**
     * 基于SparkConf的对象可以创建出来一个SparkContext Spark上下文
     * SparkContext是通往集群的唯一通道,SparkContext在创建的时候还会创建任务调度器
     */
    return new JavaSparkContext(conf);
}
 
Example 6
Source File: TranslationContext.java    From beam with Apache License 2.0 5 votes vote down vote up
public TranslationContext(SparkStructuredStreamingPipelineOptions options) {
  SparkConf sparkConf = new SparkConf();
  sparkConf.setMaster(options.getSparkMaster());
  sparkConf.setAppName(options.getAppName());
  if (options.getFilesToStage() != null && !options.getFilesToStage().isEmpty()) {
    sparkConf.setJars(options.getFilesToStage().toArray(new String[0]));
  }

  // By default, Spark defines 200 as a number of sql partitions. This seems too much for local
  // mode, so try to align with value of "sparkMaster" option in this case.
  // We should not overwrite this value (or any user-defined spark configuration value) if the
  // user has already configured it.
  String sparkMaster = options.getSparkMaster();
  if (sparkMaster != null
      && sparkMaster.startsWith("local[")
      && System.getProperty("spark.sql.shuffle.partitions") == null) {
    int numPartitions =
        Integer.parseInt(sparkMaster.substring("local[".length(), sparkMaster.length() - 1));
    if (numPartitions > 0) {
      sparkConf.set("spark.sql.shuffle.partitions", String.valueOf(numPartitions));
    }
  }

  this.sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
  this.serializablePipelineOptions = new SerializablePipelineOptions(options);
  this.datasets = new HashMap<>();
  this.leaves = new HashSet<>();
  this.broadcastDataSets = new HashMap<>();
}
 
Example 7
Source File: SparkContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}
 
Example 8
Source File: SparkScheduler.java    From oodt with Apache License 2.0 5 votes vote down vote up
public SparkScheduler(JobQueue queue) {
    SparkConf conf = new SparkConf();
    conf.setMaster(System.getProperty("resource.runner.spark.host","local"));
    conf.setAppName("OODT Spark Job");

    URL location = SparkScheduler.class.getResource('/'+SparkScheduler.class.getName().replace('.', '/')+".class");
    conf.setJars(new String[]{"../lib/cas-resource-0.8-SNAPSHOT.jar"});
    sc = new SparkContext(conf);
    ssc = new StreamingContext(sc,new Duration(10000));
    this.queue = queue;
}
 
Example 9
Source File: BlurSparkUtil.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
public static void packJars(SparkConf conf, Class<?>... clazzes) throws IOException {
  String[] jarPaths = StreamUtil.getJarPaths(clazzes);
  conf.setJars(jarPaths);
}