Java Code Examples for org.apache.spark.SparkConf#setMaster()

The following examples show how to use org.apache.spark.SparkConf#setMaster() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LensAPI.java    From cognition with Apache License 2.0 6 votes vote down vote up
/**
 * Helper method for creating the spark context from the given cognition configuration
 * @return a new configured spark context
 */
public SparkContext createSparkContext() {
  SparkConf conf = new SparkConf();

  Configuration config = cognition.getProperties();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName(config.getString("app.name"));
  conf.setMaster(config.getString("master"));

  Iterator<String> iterator = config.getKeys("spark");
  while (iterator.hasNext()) {
    String key = iterator.next();
    conf.set(key, config.getString(key));
  }

  SparkContext sc = new SparkContext(conf);
  for (String jar : config.getStringArray("jars")) {
    sc.addJar(jar);
  }

  return sc;
}
 
Example 2
Source File: SparkContextProvider.java    From rdf2x with Apache License 2.0 6 votes vote down vote up
/**
 * Provide a {@link JavaSparkContext} based on default settings
 *
 * @return a {@link JavaSparkContext} based on default settings
 */
public static JavaSparkContext provide() {
    SparkConf config = new SparkConf()
            .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
            .registerKryoClasses(getSerializableClasses());

    if (!config.contains("spark.app.name")) {
        config.setAppName("RDF2X");
    }
    if (!config.contains("spark.master")) {
        config.setMaster("local");
    }

    // set serialization registration required if you want to make sure you registered all your classes
    // some spark internal classes will need to be registered as well
    // config.set("spark.kryo.registrationRequired", "true");


    log.info("Getting Spark Context for config: \n{}", config.toDebugString());
    return new JavaSparkContext(config);
}
 
Example 3
Source File: SparkUtils.java    From BigDataPlatform with GNU General Public License v3.0 5 votes vote down vote up
/**
 * 根据当前是否本地测试的配置
 * 决定,如何设置SparkConf的master
 */
public static void setMaster(SparkConf conf) {
	boolean local = ConfigurationManager.getBoolean(Constants.SPARK_LOCAL);
	if(local) {
		conf.setMaster("local");  
	}  
}
 
Example 4
Source File: SparkTestBase.java    From spark-transformers with Apache License 2.0 5 votes vote down vote up
@Before
public void setup() {
    SparkConf sparkConf = new SparkConf();
    String master = "local[2]";
    sparkConf.setMaster(master);
    sparkConf.setAppName("Local Spark Unit Test");
    sc = new JavaSparkContext(new SparkContext(sparkConf));
    sqlContext = new SQLContext(sc);
}
 
Example 5
Source File: JavaDemo.java    From spark-on-cassandra-quickstart with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Syntax: com.datastax.spark.demo.JavaDemo <Spark Master URL> <Cassandra contact point>");
        System.exit(1);
    }

    SparkConf conf = new SparkConf();
    conf.setAppName("Java API demo");
    conf.setMaster(args[0]);
    conf.set("spark.cassandra.connection.host", args[1]);

    JavaDemo app = new JavaDemo(conf);
    app.run();
}
 
Example 6
Source File: TestNd4jKryoSerialization.java    From nd4j with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    sparkConf.set("spark.driver.host", "localhost");
    sparkConf.setAppName("Iris");

    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    sparkConf.set("spark.kryo.registrator", "org.nd4j.Nd4jRegistrator");

    sc = new JavaSparkContext(sparkConf);
}
 
Example 7
Source File: LensTest.java    From cognition with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws AccumuloSecurityException, IOException, AccumuloException, TableExistsException, TableNotFoundException {

/*Connector conn = instance.getConnector("root", new PasswordToken());
Scanner scan = conn.createScanner("moreover", Authorizations.EMPTY);
for(Map.Entry<Key, Value> entry : scan){
	System.out.println(entry);
}*/

  SparkConf conf = new SparkConf();

  conf.set("spark.serializer", KryoSerializer.class.getName());
  conf.setAppName("test");
  conf.setMaster("local[2]");

  SparkContext sc = new SparkContext(conf);

  CognitionConfiguration pip = new CognitionConfiguration(new AccumuloConfiguration(instance, user, password, true));
  LensAPI lens = new LensAPI(sc, pip);
  Criteria criteria = new Criteria();
  criteria.addKeyword("test");
  criteria.setDates(Instant.parse("2015-10-20T09:19:12Z"), Instant.parse("2015-10-20T09:19:13Z"));
  SchemaAdapter s = new SchemaAdapter();
  s.loadJson("moreover-schema.json");
  criteria.setSchema(s);
  criteria.setAccumuloTable("moreover");
  String json = lens.query(criteria);
  assertEquals("[moreover json]", json);
}
 
Example 8
Source File: SparkStreaming.java    From kafka-spark-avro-example with Apache License 2.0 5 votes vote down vote up
public static void main(String... args) {
  SparkConf conf = new SparkConf();
  conf.setMaster("local[2]");
  conf.setAppName("Spark Streaming Test Java");

  JavaSparkContext sc = new JavaSparkContext(conf);
  JavaStreamingContext ssc = new JavaStreamingContext(sc, Durations.seconds(10));

  processStream(ssc, sc);

  ssc.start();
  ssc.awaitTermination();
}
 
Example 9
Source File: AbstractSparkLayer.java    From spark-streaming-direct-kafka with Apache License 2.0 5 votes vote down vote up
public SparkConf getSparkConf() {
    SparkConf sparkConf = new SparkConf();
    sparkConf.set("spark.streaming.kafka.maxRatePerPartition",
            config.getSparkStreamingKafkaMaxRatePerPartition()); // rate limiting
    sparkConf.setAppName("StreamingEngine-" + config.getTopicSet().toString() + "-" + config.getNamespace());

    if (config.getLocalMode()) {
        sparkConf.setMaster("local[4]");
    }
    return sparkConf;
}
 
Example 10
Source File: KafkaProcessingApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testKafkaDataProcessingActivityLevel() throws Exception {
    //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0");

    //run main class
    String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_ACTIVITY, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY, "-d", "|", "-sr", "false", "-dl", "activity", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_activity/"};
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    SparkSession.builder().config(sparkConf).getOrCreate();

    // run main class
    KafkaProcessingApplication.main(args);

    //start Spark session
    SparkSession sparkSession = SparkSession.builder()
            .master("local[*]")
            .appName("IntegrationTest")
            .getOrCreate();

    //generate Dataset and create hash to compare
    Dataset<Row> importedDataset = sparkSession.read()
            .option("inferSchema", "true")
            .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_ACTIVITY + "/result/parquet");

    //check that dataset contains 12 lines
    assertEquals(12, importedDataset.count());

    //check that dataset contains 43 columns
    assertEquals(43, importedDataset.columns().length);

    //check hash of dataset
    String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect());
    System.out.println(hash);
    assertEquals("A8BBFC3B17C00C40C9883DA1F396D453", hash);

    //close Spark session
    sparkSession.close();
}
 
Example 11
Source File: CSVImportAndProcessingApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@BeforeClass
public static void setUpBeforeClass() throws IOException {
    //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0");

    String args[] = {"-fs", TEST_INPUT_FILE_NAME, "-fd", TEST_OUTPUT_FILE_PATH, "-d", ";", "-sr", "true", "-sm", "overwrite", "-of", "csv", "-wd", "./src/test/resources/config/csv/"};
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    SparkSession.builder().config(sparkConf).getOrCreate();

    // run main class
    CSVImportAndProcessingApplication.main(args);

    //read result csv
    BufferedReader resultFileReader = new BufferedReader(new FileReader(new File(TEST_OUTPUT_FILE_NAME)));

    headerValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);
    firstLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);
    secondLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);
    thirdLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);
    fourthLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);
    fifthLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);

    //result should only contain 5 value lines
    try {
        sixthLineValues = resultFileReader.readLine().split(RESULT_FILE_DELIMITER);
    } catch (NullPointerException e) {
        //expected, so continue. will be tested later
    }

    resultFileReader.close();
}
 
Example 12
Source File: TestNd4jKryoSerialization.java    From deeplearning4j with Apache License 2.0 5 votes vote down vote up
@Before
public void before() {
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    sparkConf.set("spark.driver.host", "localhost");
    sparkConf.setAppName("Iris");

    sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    sparkConf.set("spark.kryo.registrator", "org.nd4j.kryo.Nd4jRegistrator");

    sc = new JavaSparkContext(sparkConf);
}
 
Example 13
Source File: SparkScheduler.java    From oodt with Apache License 2.0 5 votes vote down vote up
public SparkScheduler(JobQueue queue) {
    SparkConf conf = new SparkConf();
    conf.setMaster(System.getProperty("resource.runner.spark.host","local"));
    conf.setAppName("OODT Spark Job");

    URL location = SparkScheduler.class.getResource('/'+SparkScheduler.class.getName().replace('.', '/')+".class");
    conf.setJars(new String[]{"../lib/cas-resource-0.8-SNAPSHOT.jar"});
    sc = new SparkContext(conf);
    ssc = new StreamingContext(sc,new Duration(10000));
    this.queue = queue;
}
 
Example 14
Source File: GeoWaveSparkConf.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static SparkConf getDefaultConfig() {
  SparkConf defaultConfig = new SparkConf();
  defaultConfig = defaultConfig.setMaster("yarn");
  defaultConfig =
      defaultConfig.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
  defaultConfig =
      defaultConfig.set(
          "spark.kryo.registrator",
          "org.locationtech.geowave.analytic.spark.GeoWaveRegistrator");
  return defaultConfig;
}
 
Example 15
Source File: SparkRefine.java    From p3-batchrefine with Apache License 2.0 5 votes vote down vote up
public SparkRefine() {
    LogManager.getRootLogger().setLevel(Level.ERROR);
    fLogger.setLevel(Level.INFO);
    SparkConf sparkConfiguration = new SparkConf(true);
    sparkConfiguration.setAppName(APP_NAME);
    sparkConfiguration.setMaster(sparkConfiguration.get("spark.master", "local"));
    sparkConfiguration.set("spark.task.cpus", sparkConfiguration.get("spark.executor.cores", "1"));
    sparkContext = new JavaSparkContext(sparkConfiguration);
    new ConsoleProgressBar(sparkContext.sc());
}
 
Example 16
Source File: SparkUtils.java    From SparkDemo with MIT License 4 votes vote down vote up
public static SparkConf getRemoteSparkConf(Class clazz) {
    SparkConf conf = new SparkConf().setAppName(clazz.getName());
    conf.setMaster(Constant.SPARK_REMOTE_SERVER_ADDRESS);
    conf.set("deploy-mode", "client");
    return conf;
}
 
Example 17
Source File: KafkaProcessingApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
@Test
    public void testKafkaDataProcessingProcessLevel() throws Exception {
        //System.setProperty("hadoop.home.dir", "C:\\Users\\b60\\Desktop\\hadoop-2.6.0\\hadoop-2.6.0");

        //run main class
        String args[] = {"-fs", DATA_PROCESSING_TEST_INPUT_DIRECTORY_PROCESS, "-fd", DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS, "-d", "|", "-sr", "false", "-sm", "overwrite", "-of", "parquet", "-wd", "./src/test/resources/config/kafka_processing_process/"};
        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("local[*]");
        SparkSession.builder().config(sparkConf).getOrCreate();

        // run main class
        KafkaProcessingApplication.main(args);

        //start Spark session
        SparkSession sparkSession = SparkSession.builder()
                .master("local[*]")
                .appName("IntegrationTest")
                .getOrCreate();

        //generate Dataset and create hash to compare
        Dataset<Row> importedDataset = sparkSession.read()
                .option("inferSchema", "true")
                .load(DATA_PROCESSING_TEST_OUTPUT_DIRECTORY_PROCESS + "/result/parquet");

        //check that dataset contains 4 lines
        assertEquals(4, importedDataset.count());

        //check that dataset contains 42 columns
        assertEquals(42, importedDataset.columns().length);

        //convert rows to string
        String[] resultLines = (String[]) importedDataset.map(row -> row.mkString(), Encoders.STRING()).collectAsList().toArray();
        for(String l : resultLines) {
            System.out.println(l);
        }

        //check if hashes of line values are correct
        //kept in for easier amendment after test case change
//        System.out.println(DigestUtils.md5Hex(resultLines[0]).toUpperCase());
//        System.out.println(DigestUtils.md5Hex(resultLines[1]).toUpperCase());
//        System.out.println(DigestUtils.md5Hex(resultLines[2]).toUpperCase());
//        System.out.println(DigestUtils.md5Hex(resultLines[3]).toUpperCase());

        assertEquals("9088849D6374163C3E9DACB3090D4E56", DigestUtils.md5Hex(resultLines[0]).toUpperCase());
        assertEquals("415A0A505F9A32002C1342171E7649F9", DigestUtils.md5Hex(resultLines[1]).toUpperCase());
        assertEquals("C83F9CC0618D7FA50D63753FBC429188", DigestUtils.md5Hex(resultLines[2]).toUpperCase());
        assertEquals("0559C383855FDE566069B483188E06C0", DigestUtils.md5Hex(resultLines[3]).toUpperCase());

        //close Spark session
        sparkSession.close();
    }
 
Example 18
Source File: Spark.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
public static SparkContext create(final String master) {
    final SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster(master);
    return Spark.create(sparkConf);
}
 
Example 19
Source File: UsingBlurRDD.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("serial")
public static void main(String[] args) throws IOException {
  SparkConf sparkConf = new SparkConf();
  sparkConf.setAppName("test");
  sparkConf.setMaster("local[2]");
  BlurSparkUtil.packJars(sparkConf, UsingBlurRDD.class);
  JavaSparkContext context = new JavaSparkContext(sparkConf);

  Iface client = BlurClient.getClient("127.0.0.1:40020");
  BlurRDD blurRDD = new BlurRDD(client, sparkConf);
  String table = "test1234";
  final String field = "fam0.col0";

  for (int i = 0; i < 1; i++) {
    long s = System.nanoTime();
    JavaRDD<String> rdd = blurRDD.executeStream(context, table, new StreamFunction<String>() {
      @Override
      public void call(IndexContext indexContext, StreamWriter<String> writer) throws Exception {
        IndexReader indexReader = indexContext.getIndexReader();
        for (AtomicReaderContext atomicReaderContext : indexReader.leaves()) {
          AtomicReader reader = atomicReaderContext.reader();
          Terms terms = reader.fields().terms(field);
          if (terms != null) {
            TermsEnum termsEnum = terms.iterator(null);
            BytesRef ref;
            while ((ref = termsEnum.next()) != null) {
              writer.write(ref.utf8ToString());
            }
          }
        }
      }
    });
    long count = rdd.distinct().count();
    long e = System.nanoTime();

    System.out.println(count + " " + (e - s) / 1000000.0 + " ms");

  }
  // Iterator<String> iterator = rdd.distinct().toLocalIterator();
  // while (iterator.hasNext()) {
  // System.out.println(iterator.next());
  // }
  context.close();
}
 
Example 20
Source File: WordCountingAppWithCheckpoint.java    From tutorials with MIT License 4 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        Logger.getLogger("org")
            .setLevel(Level.OFF);
        Logger.getLogger("akka")
            .setLevel(Level.OFF);

        Map<String, Object> kafkaParams = new HashMap<>();
        kafkaParams.put("bootstrap.servers", "localhost:9092");
        kafkaParams.put("key.deserializer", StringDeserializer.class);
        kafkaParams.put("value.deserializer", StringDeserializer.class);
        kafkaParams.put("group.id", "use_a_separate_group_id_for_each_stream");
        kafkaParams.put("auto.offset.reset", "latest");
        kafkaParams.put("enable.auto.commit", false);

        Collection<String> topics = Arrays.asList("messages");

        SparkConf sparkConf = new SparkConf();
        sparkConf.setMaster("local[2]");
        sparkConf.setAppName("WordCountingAppWithCheckpoint");
        sparkConf.set("spark.cassandra.connection.host", "127.0.0.1");

        JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        sparkContext = streamingContext.sparkContext();

        streamingContext.checkpoint("./.checkpoint");

        JavaInputDStream<ConsumerRecord<String, String>> messages = KafkaUtils.createDirectStream(streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String> Subscribe(topics, kafkaParams));

        JavaPairDStream<String, String> results = messages.mapToPair(record -> new Tuple2<>(record.key(), record.value()));

        JavaDStream<String> lines = results.map(tuple2 -> tuple2._2());

        JavaDStream<String> words = lines.flatMap(x -> Arrays.asList(x.split("\\s+"))
            .iterator());

        JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<>(s, 1))
            .reduceByKey((Function2<Integer, Integer, Integer>) (i1, i2) -> i1 + i2);

        JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> cumulativeWordCounts = wordCounts.mapWithState(StateSpec.function((word, one, state) -> {
            int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
            Tuple2<String, Integer> output = new Tuple2<>(word, sum);
            state.update(sum);
            return output;
        }));

        cumulativeWordCounts.foreachRDD(javaRdd -> {
            List<Tuple2<String, Integer>> wordCountList = javaRdd.collect();
            for (Tuple2<String, Integer> tuple : wordCountList) {
                List<Word> wordList = Arrays.asList(new Word(tuple._1, tuple._2));
                JavaRDD<Word> rdd = sparkContext.parallelize(wordList);
                javaFunctions(rdd).writerBuilder("vocabulary", "words", mapToRow(Word.class))
                    .saveToCassandra();
            }
        });

        streamingContext.start();
        streamingContext.awaitTermination();
    }