org.apache.spark.SparkConf Java Examples

The following examples show how to use org.apache.spark.SparkConf. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SparkTrainWorker.java    From ytk-learn with MIT License 6 votes vote down vote up
public SparkTrainWorker(
                        SparkConf conf,
                        String modelName,
                        String configPath,
                        String configFile,
                        String pyTransformScript,
                        boolean needPyTransform,
                        String loginName,
                        String hostName,
                        int hostPort,
                        int slaveNum,
                        int threadNum) throws Exception {
    super(modelName, configPath, configFile, pyTransformScript, needPyTransform,
            loginName, hostName, hostPort, threadNum);
    this.slaveNum = slaveNum;

    conf.set("spark.files.fetchTimeout", "3200");
    conf.set("spark.network.timeout", "3200");
    conf.set("spark.dynamicAllocation.executorIdleTimeout", "3200");
    conf.set("spark.dynamicAllocation.schedulerBacklogTimeout", "300");
    conf.set("spark.core.connection.auth.wait.timeout", "3200");
    conf.set("spark.memory.fraction", "0.01");
}
 
Example #2
Source File: SparkDistributor.java    From DataGenerator with Apache License 2.0 6 votes vote down vote up
@Override
public void distribute(final List<Frontier> frontierList) {
    JavaSparkContext sc = new JavaSparkContext(new SparkConf().setAppName("dg-spark").setMaster(masterURL));

    generatedMaps = sc
            .parallelize(frontierList)
            .flatMap(new FlatMapFunction<Frontier, Map<String, String>>() {
                @Override
                public Iterable<Map<String, String>> call(Frontier frontier) {
                    LinkedList<Map<String, String>> storage = new LinkedList<>();
                    frontier.searchForScenarios(new CatchAndStoreProcessing(storage), searchExitFlag);

                    return storage;
                }
            })
            .flatMap(new FlatMapFunction<Map<String, String>, Map<String, String>>() {
                @Override
                public Iterable<Map<String, String>> call(Map<String, String> initialVars) {
                    return SparkDistributor.dataConsumer.transformAndReturn(initialVars);
                }
            });
}
 
Example #3
Source File: NotFilterExample.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws FileNotFoundException {

		String path = MmtfReader.getMmtfReducedPath();
	    
	    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(NotFilterExample.class.getSimpleName());
	    JavaSparkContext sc = new JavaSparkContext(conf);
	    
	    long count = MmtfReader
	    		.readSequenceFile(path, sc) // read MMTF hadoop sequence file
	    		.filter(new ContainsLProteinChain()) // retain pdb entries that exclusively contain L-peptide chains
	    		// a NotFilter can be used to reverse a filter
	    		.filter(new NotFilter(new ContainsDnaChain())) // should not contain any DNA chains
	    		.count();
	    
	    System.out.println("# PDB entries with L-protein and without DNA chains: " + count);
	    sc.close();
	}
 
Example #4
Source File: Word2VecPerformerVoid.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(ADAGRAD, false);
    negative = conf.getDouble(NEGATIVE, 5);
    numWords = conf.getInt(NUM_WORDS, 1);
    window = conf.getInt(WINDOW, 5);
    alpha = conf.getDouble(ALPHA, 0.025f);
    minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(NUM_WORDS, 1);
    iterations = conf.getInt(ITERATIONS, 5);
    vectorLength = conf.getInt(VECTOR_LENGTH, 100);

    initExpTable();

    if (negative > 0 && conf.contains(TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }
}
 
Example #5
Source File: PolymerCompositionTest.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
	SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.class.getSimpleName());
    sc = new JavaSparkContext(conf);
    
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain (with std. nucleotides)
    // 5L2G: DNA chain (with non-std. nucleotide)
    // 2MK1: D-saccharide
    // 5UZT: RNA chain (with std. nucleotides)
    // 1AA6: contains SEC, selenocysteine (21st amino acid)
    // 1NTH: contains PYL, pyrrolysine (22nd amino acid)
    List<String> pdbIds = Arrays.asList("2ONX","1JLP","5X6H","5L2G","2MK1","5UZT","1AA6","1NTH");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
 
Example #6
Source File: ReduceByKeyAndWindow.java    From sparkResearch with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    SparkConf sparkConf = new SparkConf().setAppName("reduceByKeyAndWindow").setMaster("local[2]");
    JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10));
    //检查点设置
    streamingContext.checkpoint("hdfs://localhost:9300");
    //数据源
    JavaDStream<String> dStream = streamingContext.socketTextStream("localhost", 8080);

    JavaPairDStream<String, Long> ipPairDstream = dStream.mapToPair(new GetIp());

    JavaPairDStream<String, Long> result = ipPairDstream.reduceByKeyAndWindow(new AddLongs(),
            new SubtractLongs(), Durations.seconds(30), Durations.seconds(10));

    try {
        streamingContext.start();
        streamingContext.awaitTermination();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}
 
Example #7
Source File: TestManifestFileSerialization.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testManifestFileKryoSerialization() throws IOException {
  File data = temp.newFile();
  Assert.assertTrue(data.delete());

  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();

  ManifestFile manifest = writeManifest(FILE_A);

  try (Output out = new Output(new FileOutputStream(data))) {
    kryo.writeClassAndObject(out, manifest);
    kryo.writeClassAndObject(out, manifest.copy());
    kryo.writeClassAndObject(out, GenericManifestFile.copyOf(manifest).build());
  }

  try (Input in = new Input(new FileInputStream(data))) {
    for (int i = 0; i < 3; i += 1) {
      Object obj = kryo.readClassAndObject(in);
      Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile);
      checkManifestFile(manifest, (ManifestFile) obj);
    }
  }
}
 
Example #8
Source File: ContainsDProteinChainTest.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
	SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsDProteinChainTest.class.getSimpleName());
    sc = new JavaSparkContext(conf);
    
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    // 2V5W: Chain C: GLY-GLY-GLY matches both D-protein and L-protein
    // 5XDP: L-protein and D-protein (modified)
    // 5GOD: 2 L-protein + 2 D-protein
    List<String> pdbIds = Arrays.asList("2ONX","1JLP","5X6H","5L2G","2MK1","2V5W","5XDP","5GOD");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
 
Example #9
Source File: JavaKernelDensityEstimationExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    // an RDD of sample data
    JavaRDD<Double> data = jsc.parallelize(
      Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));

    // Construct the density estimator with the sample data
    // and a standard deviation for the Gaussian kernels
    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);

    // Find density estimates for the given values
    double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});

    System.out.println(Arrays.toString(densities));
    // $example off$

    jsc.stop();
  }
 
Example #10
Source File: SparkExtensionTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
public static void main(final String[] args) {
    final SparkConf conf =
            new SparkConf().setAppName(SparkClusterRuleTest.SubmittableMain.class.getName()).setMaster(args[0]);
    final JavaSparkContext context = new JavaSparkContext(conf);

    context
            .parallelize(singletonList("a b"))
            .flatMap((FlatMapFunction<String, String>) text -> asList(text.split(" ")).iterator())
            .mapToPair(word -> new Tuple2<>(word, 1))
            .reduceByKey((a, b) -> a + b)
            .foreach(result -> {
                try (final FileWriter writer = new FileWriter(args[1], true)) {
                    writer.write(result._1 + " -> " + result._2 + '\n');
                }
            });
}
 
Example #11
Source File: WordCount.java    From Apache-Spark-2x-for-Java-Developers with MIT License 6 votes vote down vote up
public static void wordCountJava8( String filename )
{
    // Define a configuration to use to interact with Spark
    SparkConf conf = new SparkConf().setMaster("local").setAppName("Work Count App");

    // Create a Java version of the Spark Context from the configuration
    JavaSparkContext sc = new JavaSparkContext(conf);

    // Load the input data, which is a text file read from the command line
    JavaRDD<String> input = sc.textFile( filename );

    // Java 8 with lambdas: split the input string into words
   // TODO here a change has happened 
    JavaRDD<String> words = input.flatMap( s -> Arrays.asList( s.split( " " ) ).iterator() );

    // Java 8 with lambdas: transform the collection of words into pairs (word and 1) and then count them
    JavaPairRDD<Object, Object> counts = words.mapToPair( t -> new Tuple2( t, 1 ) ).reduceByKey( (x, y) -> (int)x + (int)y );

    // Save the word count back out to a text file, causing evaluation.
    counts.saveAsTextFile( "output" );
}
 
Example #12
Source File: SparkApplication.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void autoSetSparkConf(SparkConf sparkConf) throws Exception {
    logger.info("Start set spark conf automatically.");
    SparkConfHelper helper = new SparkConfHelper();
    helper.setFetcher(KylinBuildEnv.get().clusterInfoFetcher());
    Path shareDir = config.getJobTmpShareDir(project, jobId);
    String contentSize = chooseContentSize(shareDir);

    // add content size with unit
    helper.setOption(SparkConfHelper.SOURCE_TABLE_SIZE, contentSize);
    helper.setOption(SparkConfHelper.LAYOUT_SIZE, Integer.toString(layoutSize));
    Map<String, String> configOverride = config.getSparkConfigOverride();
    helper.setConf(SparkConfHelper.DEFAULT_QUEUE, configOverride.get(SparkConfHelper.DEFAULT_QUEUE));
    helper.setOption(SparkConfHelper.REQUIRED_CORES, calculateRequiredCores());
    helper.setConf(SparkConfHelper.COUNT_DISTICT, hasCountDistinct().toString());
    helper.generateSparkConf();
    helper.applySparkConf(sparkConf);
}
 
Example #13
Source File: WordCount.java    From tutorials with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage: JavaWordCount <file>");
        System.exit(1);
    }
    SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount")
        .setMaster("local");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
    JavaPairRDD<String, Integer> wordAsTuple = words.mapToPair(word -> new Tuple2<>(word, 1));
    JavaPairRDD<String, Integer> wordWithCount = wordAsTuple.reduceByKey((Integer i1, Integer i2)->i1 + i2);
    List<Tuple2<String, Integer>> output = wordWithCount.collect();
    for (Tuple2<?, ?> tuple : output) {
         System.out.println(tuple._1() + ": " + tuple._2());
    }
    ctx.stop();
}
 
Example #14
Source File: StreamingRsvpsDStreamCountWindow.java    From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

        final SparkConf conf = new SparkConf()
                .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
                .setAppName(APPLICATION_NAME)
                .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI)
                .set("spark.streaming.kafka.consumer.cache.enabled", "false");

        final JavaStreamingContext streamingContext
                = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS));

        streamingContext.checkpoint(CHECKPOINT_FOLDER);

        final JavaInputDStream<ConsumerRecord<String, String>> meetupStream =
                KafkaUtils.createDirectStream(
                        streamingContext,
                        LocationStrategies.PreferConsistent(),
                        ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES)
                );
                
        // transformations, streaming algorithms, etc
        JavaDStream<Long> countStream  
            = meetupStream.countByWindow(
                 new Duration(WINDOW_LENGTH_MS), 
                 new Duration(SLIDING_INTERVAL_MS));        

        countStream.foreachRDD((JavaRDD<Long> countRDD) -> {                
            MongoSpark.save(        
                    countRDD.map(
                        r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}")
                    )
            );            
        });
        
        // some time later, after outputs have completed
        meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> {        
            OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges();            

            ((CanCommitOffsets) meetupStream.inputDStream())
                .commitAsync(offsetRanges, new MeetupOffsetCommitCallback());
        });
        
        streamingContext.start();
        streamingContext.awaitTermination();    
    }
 
Example #15
Source File: SparkSequenceVectorsTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    if (sequencesCyclic == null) {
        sequencesCyclic = new ArrayList<>();

        // 10 sequences in total
        for (int с = 0; с < 10; с++) {

            Sequence<VocabWord> sequence = new Sequence<>();

            for (int e = 0; e < 10; e++) {
                // we will have 9 equal elements, with total frequency of 10
                sequence.addElement(new VocabWord(1.0, "" + e, (long) e));
            }

            // and 1 element with frequency of 20
            sequence.addElement(new VocabWord(1.0, "0", 0L));
            sequencesCyclic.add(sequence);
        }
    }

    SparkConf sparkConf = new SparkConf().setMaster("local[8]")
            .set("spark.driver.host", "localhost")
            .setAppName("SeqVecTests");
    sc = new JavaSparkContext(sparkConf);
}
 
Example #16
Source File: JavaHypothesisTestingKolmogorovSmirnovTestExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf =
      new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
    KolmogorovSmirnovTestResult testResult =
      Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
    // summary of the test including the p-value, test statistic, and null hypothesis
    // if our p-value indicates significance, we can reject the null hypothesis
    System.out.println(testResult);
    // $example off$

    jsc.stop();
  }
 
Example #17
Source File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java    From net.jgp.labs.spark with Apache License 2.0 6 votes vote down vote up
private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
      "Streaming Ingestion File System Text File to Dataframe");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
      .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
      .getInputDirectory());

  msgDataStream.print();
  // Create JavaRDD<Row>
  msgDataStream.foreachRDD(new RowProcessor());

  jssc.start();
  try {
    jssc.awaitTermination();
  } catch (InterruptedException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example #18
Source File: SparkTestEnvironment.java    From geowave with Apache License 2.0 5 votes vote down vote up
@Override
public void setup() throws Exception {
  if (defaultSession == null) {
    final SparkConf addonOptions = new SparkConf();
    addonOptions.setMaster("local[*]");
    addonOptions.setAppName("CoreGeoWaveSparkITs");
    defaultSession = GeoWaveSparkConf.createDefaultSession(addonOptions);
    if (defaultSession == null) {
      LOGGER.error("Unable to create default spark session for tests");
      return;
    }
  }
}
 
Example #19
Source File: SparkContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
private static JavaSparkContext createSparkContext(SparkContextOptions contextOptions) {
  if (usesProvidedSparkContext) {
    LOG.info("Using a provided Spark Context");
    JavaSparkContext jsc = contextOptions.getProvidedSparkContext();
    if (jsc == null || jsc.sc().isStopped()) {
      LOG.error("The provided Spark context " + jsc + " was not created or was stopped");
      throw new RuntimeException("The provided Spark context was not created or was stopped");
    }
    return jsc;
  } else {
    LOG.info("Creating a brand new Spark Context.");
    SparkConf conf = new SparkConf();
    if (!conf.contains("spark.master")) {
      // set master if not set.
      conf.setMaster(contextOptions.getSparkMaster());
    }

    if (contextOptions.getFilesToStage() != null && !contextOptions.getFilesToStage().isEmpty()) {
      conf.setJars(contextOptions.getFilesToStage().toArray(new String[0]));
    }

    conf.setAppName(contextOptions.getAppName());
    // register immutable collections serializers because the SDK uses them.
    conf.set("spark.kryo.registrator", SparkRunnerKryoRegistrator.class.getName());
    return new JavaSparkContext(conf);
  }
}
 
Example #20
Source File: FileStreamingEx.java    From Apache-Spark-2x-for-Java-Developers with MIT License 5 votes vote down vote up
public static void main(String[] args) {
   	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
   	//Logger rootLogger = LogManager.getRootLogger();
  		//rootLogger.setLevel(Level.WARN); 
       SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
       String inputDirectory="E:\\hadoop\\streamFolder\\";
    
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
      // streamingContext.checkpoint("E:\\hadoop\\checkpoint");
       Logger rootLogger = LogManager.getRootLogger();
  		rootLogger.setLevel(Level.WARN); 
  		
  		JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
  		streamfile.print();
  		streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
  		
  			   		
  		JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
  	 streamedFile.print();
  		
  	 streamingContext.start();
  	 

       try {
		streamingContext.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
Example #21
Source File: JavaEmbeddedIgniteRDDWithLocalStoreSelfTest.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * Creates default spark context
 *
 * @return Context.
 */
private JavaSparkContext createContext() {
    SparkConf conf = new SparkConf();

    conf.set("spark.executor.instances", String.valueOf(GRID_CNT));

    return new JavaSparkContext("local[" + GRID_CNT + "]", "test", conf);
}
 
Example #22
Source File: SparkSession.java    From incubator-nemo with Apache License 2.0 5 votes vote down vote up
@Override
public synchronized SparkSession getOrCreate() {
  if (!options.containsKey("spark.master")) { // default spark_master option.
    return this.master("local[*]").getOrCreate();
  }
  if (!options.containsKey("spark.driver.allowMultipleContexts")) {
    return this.config("spark.driver.allowMultipleContexts", "true").getOrCreate();
  }

  UserGroupInformation.setLoginUser(UserGroupInformation.createRemoteUser("ubuntu"));

  // Set up spark context with given options.
  final SparkConf sparkConf = new SparkConf();
  if (!options.containsKey("spark.app.name")) {
    try {
      // get and override configurations from JobLauncher.
      final Configuration configurations = JobLauncher.getBuiltJobConf();
      final Injector injector = Tang.Factory.getTang().newInjector(configurations);
      options.put("spark.app.name", injector.getNamedInstance(JobConf.JobId.class));
    } catch (final InjectionException e) {
      throw new RuntimeException(e);
    }
  }
  options.forEach(sparkConf::set);
  final SparkContext sparkContext = new org.apache.nemo.compiler.frontend.spark.core.SparkContext(sparkConf);
  super.sparkContext(sparkContext);

  return SparkSession.from(super.getOrCreate(), this.options);
}
 
Example #23
Source File: SpringSparkDemoApplication.java    From articles with Apache License 2.0 5 votes vote down vote up
@Bean
public SparkConf sparkConf()
{
    return new SparkConf()
        .setAppName(appName)
        .setSparkHome(sparkHome)
        .setMaster(masterUri);
}
 
Example #24
Source File: KafkaImportApplicationIntegrationTest.java    From bpmn.ai with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testKafkaStreamingImportProcessLevel() throws Exception {
    //run main class
    String args[] = {"-kb", KAFKA_HOST + ":" + KAFKA_PORT, "-fd", IMPORT_TEST_OUTPUT_DIRECTORY_PROCESS, "-bm", "true", "-sr", "false", "-dl", "process", "-wd", "./src/test/resources/config/kafka_import_process/", "-sm", "overwrite"};
    SparkConf sparkConf = new SparkConf();
    sparkConf.setMaster("local[*]");
    SparkSession.builder().config(sparkConf).getOrCreate();
    KafkaImportApplication.main(args);

    //start Spark session
    SparkSession sparkSession = SparkSession.builder()
            .master("local[*]")
            .appName("IntegrationTest")
            .getOrCreate();

    //generate Dataset and create hash to compare
    Dataset<Row> importedDataset = sparkSession.read().load(IMPORT_TEST_OUTPUT_DIRECTORY_PROCESS);

    //check that dataset contains 43 lines
    assertEquals(43, importedDataset.count());

    //check hash of dataset
    String hash = BpmnaiUtils.getInstance().md5CecksumOfObject(importedDataset.collect());
    assertEquals("15254E402E5D700FB125E2BD670FE716", hash);

    //close Spark session
    sparkSession.close();
}
 
Example #25
Source File: SparkUtils.java    From DataVec with Apache License 2.0 5 votes vote down vote up
/**
 * Register the DataVec writable classes for Kryo
 */
public static void registerKryoClasses(SparkConf conf) {
    List<Class<?>> classes = Arrays.<Class<?>>asList(BooleanWritable.class, ByteWritable.class,
                    DoubleWritable.class, FloatWritable.class, IntWritable.class, LongWritable.class,
                    NullWritable.class, Text.class);

    conf.registerKryoClasses((Class<?>[]) classes.toArray());
}
 
Example #26
Source File: JavaSqlNetworkWordCount.java    From SparkDemo with MIT License 5 votes vote down vote up
public static SparkSession getInstance(SparkConf sparkConf) {
  if (instance == null) {
    instance = SparkSession
      .builder()
      .config(sparkConf)
      .getOrCreate();
  }
  return instance;
}
 
Example #27
Source File: PSRpcFactory.java    From systemds with Apache License 2.0 5 votes vote down vote up
public static SparkPSProxy createSparkPSProxy(SparkConf conf, int port, LongAccumulator aRPC) throws IOException {
	long rpcTimeout = conf.contains("spark.rpc.askTimeout") ?
		conf.getTimeAsMs("spark.rpc.askTimeout") :
		conf.getTimeAsMs("spark.network.timeout", "120s");
	String host = conf.get("spark.driver.host");
	TransportContext context = createTransportContext(conf, new LocalParamServer());
	return new SparkPSProxy(context.createClientFactory().createClient(host, port), rpcTimeout, aRPC);
}
 
Example #28
Source File: MetroAnalysisJob.java    From hui-bigdata-spark with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    SparkConf sparkConf = new SparkConf()
            .setAppName("test")
            .setMaster("local[4]");

    JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);

    MetroAnalysisJob metroAnalysisJob = new MetroAnalysisJob();

    metroAnalysisJob.execute(sparkContext, args);
}
 
Example #29
Source File: JavaEmbeddedIgniteRDDSelfTest.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * Creates default spark context
 *
 * @return Context.
 */
private JavaSparkContext createContext() {
    SparkConf conf = new SparkConf();

    conf.set("spark.executor.instances", String.valueOf(GRID_CNT));

    return new JavaSparkContext("local[" + GRID_CNT + "]", "test", conf);
}
 
Example #30
Source File: GeoWaveSparkConf.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static SparkSession createSessionFromParams(
    final String appName,
    String master,
    final String host,
    final String jars) {
  // Grab default config for GeoWave
  SparkConf defaultConfig = GeoWaveSparkConf.getDefaultConfig();
  // Apply master from default
  if (master == null) {
    master = "yarn";
  }

  // Apply user options if set, correctly handling host for yarn.
  if (appName != null) {
    defaultConfig = defaultConfig.setAppName(appName);
  }
  defaultConfig = defaultConfig.setMaster(master);
  if (host != null) {
    if (master != "yarn") {
      defaultConfig = defaultConfig.set("spark.driver.host", host);
    } else {
      LOGGER.warn(
          "Attempting to set spark driver host for yarn master. Normally this is handled via hadoop configuration. Remove host or set another master designation and try again.");
    }
  }

  if (jars != null) {
    defaultConfig = defaultConfig.set("spark.jars", jars);
  }

  // Finally return the session from builder
  return GeoWaveSparkConf.internalCreateSession(defaultConfig, null);
}