Java Code Examples for org.apache.spark.SparkConf#setAppName()

The following examples show how to use org.apache.spark.SparkConf#setAppName() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkRunnerKryoRegistratorTest.java    From beam with Apache License 2.0 6 votes vote down vote up
private void runSimplePipelineWithSparkContext(SparkConf conf) {
  SparkPipelineOptions options =
      PipelineOptionsFactory.create().as(TestSparkPipelineOptions.class);
  options.setRunner(TestSparkRunner.class);

  conf.set("spark.master", "local");
  conf.setAppName("test");

  JavaSparkContext javaSparkContext = new JavaSparkContext(conf);
  options.setUsesProvidedSparkContext(true);
  options.as(SparkContextOptions.class).setProvidedSparkContext(javaSparkContext);
  Pipeline p = Pipeline.create(options);
  p.apply(Create.of("a")); // some operation to trigger pipeline construction
  p.run().waitUntilFinish();
  javaSparkContext.stop();
}
 
Example 2
Source File: BlurLoadSparkProcessor.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
public void run() throws IOException {
  SparkConf conf = new SparkConf();
  conf.setAppName(getAppName());
  conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
  JavaSparkUtil.packProjectJars(conf);
  setupSparkConf(conf);

  JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
  List<JavaDStream<T>> streamsList = getStreamsList(ssc);

  // Union all the streams if there is more than 1 stream
  JavaDStream<T> streams = unionStreams(ssc, streamsList);

  JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
    public Tuple2<String, RowMutation> call(T t) {
      RowMutation rowMutation = convert(t);
      return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
    }
  });

  pairDStream.foreachRDD(getFunction());

  ssc.start();
  ssc.awaitTermination();
}
 
Example 3
Source File: LensAPI.java    From cognition with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for creating the spark context from the given cognition configuration
 * @return a new configured spark context
 */
public SparkContext createSparkContext() {
  SparkConf conf = new SparkConf();

  Configuration config = cognition.getProperties();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName(config.getString("app.name"));
  conf.setMaster(config.getString("master"));

  Iterator<String> iterator = config.getKeys("spark");
  while (iterator.hasNext()) {
    String key = iterator.next();
    conf.set(key, config.getString(key));
  }

  SparkContext sc = new SparkContext(conf);
  for (String jar : config.getStringArray("jars")) {
    sc.addJar(jar);
  }

  return sc;
}
 
Example 4
Source File: SparkContextProvider.java    From rdf2x with Apache License 2.0 6 votes vote down vote up
/**
 * Provide a {@link JavaSparkContext} based on default settings
 *
 * @return a {@link JavaSparkContext} based on default settings
 */
public static JavaSparkContext provide() {
    SparkConf config = new SparkConf()
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .registerKryoClasses(getSerializableClasses());

    if (!config.contains("spark.app.name")) {
        config.setAppName("RDF2X");
    }
    if (!config.contains("spark.master")) {
        config.setMaster("local");
    }

    // set serialization registration required if you want to make sure you registered all your classes
    // some spark internal classes will need to be registered as well
    // config.set("spark.kryo.registrationRequired", "true");


    log.info("Getting Spark Context for config: \n{}", config.toDebugString());
    return new JavaSparkContext(config);
}
 
Example 5
Source File: SparkAppLauncher.java    From sylph with Apache License 2.0 6 votes vote down vote up
public ApplicationId run(Job job)
        throws Exception
{
    SparkJobConfig jobConfig = job.getConfig();

    System.setProperty("SPARK_YARN_MODE", "true");
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.driver.extraJavaOptions", "-XX:PermSize=64M -XX:MaxPermSize=128M");
    sparkConf.set("spark.yarn.stagingDir", appHome);
    //-------------
    sparkConf.set("spark.executor.instances", jobConfig.getNumExecutors() + "");   //EXECUTOR_COUNT
    sparkConf.set("spark.executor.memory", jobConfig.getExecutorMemory());  //EXECUTOR_MEMORY
    sparkConf.set("spark.executor.cores", jobConfig.getExecutorCores() + "");

    sparkConf.set("spark.driver.cores", jobConfig.getDriverCores() + "");
    sparkConf.set("spark.driver.memory", jobConfig.getDriverMemory());
    //--------------

    sparkConf.setSparkHome(sparkHome);

    sparkConf.setMaster("yarn");
    sparkConf.setAppName(job.getName());

    sparkConf.set("spark.submit.deployMode", "cluster"); // worked
    //set Depends set spark.yarn.dist.jars and spark.yarn.dist.files
    setDistJars(job, sparkConf);

    String[] args = getArgs();
    ClientArguments clientArguments = new ClientArguments(args);   // spark-2.0.0
    //yarnClient.getConfig().iterator().forEachRemaining(x -> sparkConf.set("spark.hadoop." + x.getKey(), x.getValue()));

    Client appClient = new SylphSparkYarnClient(clientArguments, sparkConf, yarnClient, jobConfig.getQueue());
    return appClient.submitApplication();
}
 
Example 6
Source File: PreprocessSpark.java    From Java-Deep-Learning-Cookbook with MIT License 6 votes vote down vote up
protected void entryPoint(String[] args) throws Exception {
    JCommander jcmdr = new JCommander(this);
    jcmdr.parse(args);
    //JCommanderUtils.parseArgs(this, args);
    SparkConf conf = new SparkConf();
    conf.setMaster("local[*]");
    conf.setAppName("DL4JTinyImageNetSparkPreproc");
    JavaSparkContext sc = new JavaSparkContext(conf);

    //Create training set
    JavaRDD<String> filePathsTrain = SparkUtils.listPaths(sc, sourceDir + "/train", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTrain, saveDir, batchSize, sc);

    //Create test set
    JavaRDD<String> filePathsTest = SparkUtils.listPaths(sc, sourceDir + "/test", true, NativeImageLoader.ALLOWED_FORMATS);
    SparkDataUtils.createFileBatchesSpark(filePathsTest, saveDir, batchSize, sc);


    System.out.println("----- Data Preprocessing Complete -----");
}
 
Example 7
Source File: AbstractSparkLayer.java    From spark-streaming-direct-kafka with Apache License 2.0 5 votes vote down vote up
public SparkConf getSparkConf() {
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.streaming.kafka.maxRatePerPartition",
            config.getSparkStreamingKafkaMaxRatePerPartition()); // rate limiting
    sparkConf.setAppName("StreamingEngine-" + config.getTopicSet().toString() + "-" + config.getNamespace());

    if (config.getLocalMode()) {
        sparkConf.setMaster("local[4]");
    }
    return sparkConf;
}
 
Example 8
Source File: SparkStreaming.java    From kafka-spark-avro-example with Apache License 2.0 5 votes vote down vote up
public static void main(String... args) {
  SparkConf conf = new SparkConf();
  conf.setMaster("local[2]");
  conf.setAppName("Spark Streaming Test Java");

  JavaSparkContext sc = new JavaSparkContext(conf);
  JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10));

  processStream(ssc, sc);

  ssc.start();
  ssc.awaitTermination();
}
 
Example 9
Source File: SparkTestEnvironment.java    From geowave with Apache License 2.0 5 votes vote down vote up
@Override
public void setup() throws Exception {
  if (defaultSession == null) {
    final SparkConf addonOptions = new SparkConf();
    addonOptions.setMaster("local[*]");
    addonOptions.setAppName("CoreGeoWaveSparkITs");
    defaultSession = GeoWaveSparkConf.createDefaultSession(addonOptions);
    if (defaultSession == null) {
      LOGGER.error("Unable to create default spark session for tests");
      return;
    }
  }
}
 
Example 10
Source File: SparkContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}
 
Example 11
Source File: SparkTestBase.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
    SparkConf sparkConf = new SparkConf();
    String master = "local[2]";
    sparkConf.setMaster(master);
    sparkConf.setAppName("Local Spark Unit Test");
    sc = new JavaSparkContext(new SparkContext(sparkConf));
    sqlContext = new SQLContext(sc);
}
 
Example 12
Source File: TestNd4jKryoSerialization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    sparkConf.set("spark.driver.host", "localhost");
    sparkConf.setAppName("Iris");

    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    sparkConf.set("spark.kryo.registrator", "org.nd4j.kryo.Nd4jRegistrator");

    sc = new JavaSparkContext(sparkConf);
}
 
Example 13
Source File: LensTest.java    From cognition with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws AccumuloSecurityException, IOException, AccumuloException, TableExistsException, TableNotFoundException {

/*Connector conn = instance.getConnector("root", new PasswordToken());
Scanner scan = conn.createScanner("moreover", Authorizations.EMPTY);
for(Map.Entry<Key, Value> entry : scan){
	System.out.println(entry);
}*/

  SparkConf conf = new SparkConf();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName("test");
  conf.setMaster("local[2]");

  SparkContext sc = new SparkContext(conf);

  CognitionConfiguration pip = new CognitionConfiguration(new AccumuloConfiguration(instance, user, password, true));
  LensAPI lens = new LensAPI(sc, pip);
  Criteria criteria = new Criteria();
  criteria.addKeyword("test");
  criteria.setDates(Instant.parse("2015-10-20T09:19:12Z"), Instant.parse("2015-10-20T09:19:13Z"));
  SchemaAdapter s = new SchemaAdapter();
  s.loadJson("moreover-schema.json");
  criteria.setSchema(s);
  criteria.setAccumuloTable("moreover");
  String json = lens.query(criteria);
  assertEquals("[moreover json]", json);
}
 
Example 14
Source File: SparkRunnerTestUtils.java    From components with Apache License 2.0 5 votes vote down vote up
public Pipeline createPipeline() {
    SparkContextOptions sparkOpts = options.as(SparkContextOptions.class);
    sparkOpts.setFilesToStage(emptyList());

    SparkConf conf = new SparkConf();
    conf.setAppName(appName);
    conf.setMaster("local[2]");
    conf.set("spark.driver.allowMultipleContexts", "true");
    JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf));
    sparkOpts.setProvidedSparkContext(jsc);
    sparkOpts.setUsesProvidedSparkContext(true);
    sparkOpts.setRunner(SparkRunner.class);

    return Pipeline.create(sparkOpts);
}
 
Example 15
Source File: SparkIntegrationTestResource.java    From components with Apache License 2.0 5 votes vote down vote up
/**
 * @return a clean spark configuration created from the options in this resource.
 */
public SparkConf createSparkConf(String appName) {
    SparkConf conf = new SparkConf();
    conf.setAppName(appName);
    conf.setMaster(sm);
    // conf.set("spark.driver.host", "10.42.30.148");
    for (Map.Entry<String, String> kv : hadoopConf.entrySet())
        conf.set("spark.hadoop." + kv.getKey(), kv.getValue());
    return conf;
}
 
Example 16
Source File: Spark.java    From tinkerpop with Apache License 2.0 5 votes vote down vote up
public static SparkContext create(final SparkConf sparkConf) {
    if (isContextNullOrStopped()) {
        sparkConf.setAppName("Apache TinkerPop's Spark-Gremlin");
        CONTEXT = SparkContext.getOrCreate(sparkConf);
    }
    return CONTEXT;
}
 
Example 17
Source File: SparkRefine.java    From p3-batchrefine with Apache License 2.0 5 votes vote down vote up
public SparkRefine() {
    LogManager.getRootLogger().setLevel(Level.ERROR);
    fLogger.setLevel(Level.INFO);
    SparkConf sparkConfiguration = new SparkConf(true);
    sparkConfiguration.setAppName(APP_NAME);
    sparkConfiguration.setMaster(sparkConfiguration.get("spark.master", "local"));
    sparkConfiguration.set("spark.task.cpus", sparkConfiguration.get("spark.executor.cores", "1"));
    sparkContext = new JavaSparkContext(sparkConfiguration);
    new ConsoleProgressBar(sparkContext.sc());
}
 
Example 18
Source File: WordCountJava.java    From BigDataArchitect with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws FileNotFoundException {

        SparkConf conf = new SparkConf();
        conf.setAppName("java-wordcount");
        conf.setMaster("local");

        JavaSparkContext jsc = new JavaSparkContext(conf);

        JavaRDD<String> fileRDD = jsc.textFile("bigdata-spark/data/testdata.txt");

        JavaRDD<String> words = fileRDD.flatMap(new FlatMapFunction<String, String>() {
            public Iterator<String> call(String line) throws Exception {
                return Arrays.asList(line.split(" ")).iterator();
            }
        });

        JavaPairRDD<String, Integer> pairWord = words.mapToPair(new PairFunction<String, String, Integer>() {
            public Tuple2<String, Integer> call(String word) throws Exception {
                return new Tuple2<String, Integer>(word, 1);
            }
        });

        JavaPairRDD<String, Integer> res = pairWord.reduceByKey(new Function2<Integer, Integer, Integer>() {
            public Integer call(Integer oldV, Integer v) throws Exception {
                return oldV + v;
            }
        });

        res.foreach(new VoidFunction<Tuple2<String, Integer>>() {
            public void call(Tuple2<String, Integer> value) throws Exception {
                System.out.println(value._1+"\t"+value._2);
            }
        });

//
//        RandomAccessFile rfile = new RandomAccessFile("ooxx","rw");
//
////        rfile.seek(222);
//        FileChannel channel = rfile.getChannel();
//        //  linux  fd   write(fd)  read(fd)
//
//
//        ByteBuffer b1 = ByteBuffer.allocate(1024);
//        ByteBuffer b2 = ByteBuffer.allocateDirect(1024);
//        MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 80, 120);
//


    }
 
Example 19
Source File: WordCountingAppWithCheckpoint.java    From tutorials with MIT License 4 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        Logger.getLogger("org")
            .setLevel(Level.OFF);
        Logger.getLogger("akka")
            .setLevel(Level.OFF);

        Map<String, Object> kafkaParams = new HashMap<>();
        kafkaParams.put("bootstrap.servers", "localhost:9092");
        kafkaParams.put("key.deserializer", StringDeserializer.class);
        kafkaParams.put("value.deserializer", StringDeserializer.class);
        kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
        kafkaParams.put("auto.offset.reset", "latest");
        kafkaParams.put("enable.auto.commit", false);

        Collection<String> topics = Arrays.asList("messages");

        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("local[2]");
        sparkConf.setAppName("WordCountingAppWithCheckpoint");
        sparkConf.set("spark.cassandra.connection.host", "127.0.0.1");

        JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        sparkContext = streamingContext.sparkContext();

        streamingContext.checkpoint("./.checkpoint");

        JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String> Subscribe(topics, kafkaParams));

        JavaPairDStream<String, String> results = messages.mapToPair(record -> new Tuple2<>(record.key(), record.value()));

        JavaDStream<String> lines = results.map(tuple2 -> tuple2._2());

        JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(x.split("\\s+"))
            .iterator());

        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
            .reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2);

        JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((word, one, state) -> {
            int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<>(word, sum);
            state.update(sum);
            return output;
        }));

        cumulativeWordCounts.foreachRDD(javaRdd -> {
            List<Tuple2<String, Integer>> wordCountList = javaRdd.collect();
            for (Tuple2<String, Integer> tuple : wordCountList) {
                List<Word> wordList = Arrays.asList(new Word(tuple._1, tuple._2));
                JavaRDD<Word> rdd = sparkContext.parallelize(wordList);
                javaFunctions(rdd).writerBuilder("vocabulary", "words", mapToRow(Word.class))
                    .saveToCassandra();
            }
        });

        streamingContext.start();
        streamingContext.awaitTermination();
    }
 
Example 20
Source File: LocalPropertyTest.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Test
public void shouldSetThreadLocalProperties() throws Exception {
    final String testName = "ThreadLocalProperties";
    final String rddName = TestHelper.makeTestDataDirectory(LocalPropertyTest.class, UUID.randomUUID().toString());
    final Configuration configuration = new BaseConfiguration();
    configuration.setProperty("spark.master", "local[4]");
    configuration.setProperty("spark.serializer", GryoSerializer.class.getCanonicalName());
    configuration.setProperty(Graph.GRAPH, HadoopGraph.class.getName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, SparkHadoopGraphProvider.PATHS.get("tinkerpop-modern-v3d0.kryo"));
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, GryoInputFormat.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, PersistedOutputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, false);
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "22");
    Graph graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.EDGES)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(Computer.compute(SparkGraphComputer.class)),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ////////
    SparkConf sparkConfiguration = new SparkConf();
    sparkConfiguration.setAppName(testName);
    ConfUtil.makeHadoopConfiguration(configuration).forEach(entry -> sparkConfiguration.set(entry.getKey(), entry.getValue()));
    JavaSparkContext sparkContext = new JavaSparkContext(SparkContext.getOrCreate(sparkConfiguration));
    JavaSparkStatusTracker statusTracker = sparkContext.statusTracker();
    assertTrue(statusTracker.getJobIdsForGroup("22").length >= 1);
    assertTrue(Spark.hasRDD(Constants.getGraphLocation(rddName)));
    ///////
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_READER, PersistedInputRDD.class.getCanonicalName());
    configuration.setProperty(Constants.GREMLIN_HADOOP_INPUT_LOCATION, rddName);
    configuration.setProperty(Constants.GREMLIN_HADOOP_GRAPH_WRITER, null);
    configuration.setProperty(Constants.GREMLIN_HADOOP_OUTPUT_LOCATION, null);

    // just a note that this value should have always been set to true, but from the initial commit was false.
    // interestingly the last assertion had always passed up to spark 2.3.x when it started to fail. apparently
    // that assertion should likely have never passed, so it stands to reason that there was a bug in spark in
    // 2.2.x that was resolved for 2.3.x....that's my story and i'm sticking to it.
    configuration.setProperty(Constants.GREMLIN_SPARK_PERSIST_CONTEXT, true);
    configuration.setProperty("spark.jobGroup.id", "44");
    graph = GraphFactory.open(configuration);
    graph.compute(SparkGraphComputer.class)
            .result(GraphComputer.ResultGraph.NEW)
            .persist(GraphComputer.Persist.NOTHING)
            .program(TraversalVertexProgram.build()
                    .traversal(graph.traversal().withComputer(SparkGraphComputer.class),
                            "gremlin-groovy",
                            "g.V()").create(graph)).submit().get();
    ///////
    assertTrue(statusTracker.getJobIdsForGroup("44").length >= 1);
}