org.apache.spark.SparkConf Java Examples
The following examples show how to use
org.apache.spark.SparkConf.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceByKeyAndWindow.java From sparkResearch with Apache License 2.0 | 6 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("reduceByKeyAndWindow").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10)); //检查点设置 streamingContext.checkpoint("hdfs://localhost:9300"); //数据源 JavaDStream<String> dStream = streamingContext.socketTextStream("localhost", 8080); JavaPairDStream<String, Long> ipPairDstream = dStream.mapToPair(new GetIp()); JavaPairDStream<String, Long> result = ipPairDstream.reduceByKeyAndWindow(new AddLongs(), new SubtractLongs(), Durations.seconds(30), Durations.seconds(10)); try { streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example #2
Source File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java From net.jgp.labs.spark with Apache License 2.0 | 6 votes |
private void start() { // Create a local StreamingContext with two working thread and batch // interval of // 1 second SparkConf conf = new SparkConf().setMaster("local[2]").setAppName( "Streaming Ingestion File System Text File to Dataframe"); JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations .seconds(5)); JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils .getInputDirectory()); msgDataStream.print(); // Create JavaRDD<Row> msgDataStream.foreachRDD(new RowProcessor()); jssc.start(); try { jssc.awaitTermination(); } catch (InterruptedException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
Example #3
Source File: SparkExtensionTest.java From component-runtime with Apache License 2.0 | 6 votes |
public static void main(final String[] args) { final SparkConf conf = new SparkConf().setAppName(SparkClusterRuleTest.SubmittableMain.class.getName()).setMaster(args[0]); final JavaSparkContext context = new JavaSparkContext(conf); context .parallelize(singletonList("a b")) .flatMap((FlatMapFunction<String, String>) text -> asList(text.split(" ")).iterator()) .mapToPair(word -> new Tuple2<>(word, 1)) .reduceByKey((a, b) -> a + b) .foreach(result -> { try (final FileWriter writer = new FileWriter(args[1], true)) { writer.write(result._1 + " -> " + result._2 + '\n'); } }); }
Example #4
Source File: SparkApplication.java From kylin-on-parquet-v2 with Apache License 2.0 | 6 votes |
private void autoSetSparkConf(SparkConf sparkConf) throws Exception { logger.info("Start set spark conf automatically."); SparkConfHelper helper = new SparkConfHelper(); helper.setFetcher(KylinBuildEnv.get().clusterInfoFetcher()); Path shareDir = config.getJobTmpShareDir(project, jobId); String contentSize = chooseContentSize(shareDir); // add content size with unit helper.setOption(SparkConfHelper.SOURCE_TABLE_SIZE, contentSize); helper.setOption(SparkConfHelper.LAYOUT_SIZE, Integer.toString(layoutSize)); Map<String, String> configOverride = config.getSparkConfigOverride(); helper.setConf(SparkConfHelper.DEFAULT_QUEUE, configOverride.get(SparkConfHelper.DEFAULT_QUEUE)); helper.setOption(SparkConfHelper.REQUIRED_CORES, calculateRequiredCores()); helper.setConf(SparkConfHelper.COUNT_DISTICT, hasCountDistinct().toString()); helper.generateSparkConf(); helper.applySparkConf(sparkConf); }
Example #5
Source File: SparkSequenceVectorsTest.java From deeplearning4j with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { if (sequencesCyclic == null) { sequencesCyclic = new ArrayList<>(); // 10 sequences in total for (int с = 0; с < 10; с++) { Sequence<VocabWord> sequence = new Sequence<>(); for (int e = 0; e < 10; e++) { // we will have 9 equal elements, with total frequency of 10 sequence.addElement(new VocabWord(1.0, "" + e, (long) e)); } // and 1 element with frequency of 20 sequence.addElement(new VocabWord(1.0, "0", 0L)); sequencesCyclic.add(sequence); } } SparkConf sparkConf = new SparkConf().setMaster("local[8]") .set("spark.driver.host", "localhost") .setAppName("SeqVecTests"); sc = new JavaSparkContext(sparkConf); }
Example #6
Source File: JavaHypothesisTestingKolmogorovSmirnovTestExample.java From SparkDemo with MIT License | 6 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25)); KolmogorovSmirnovTestResult testResult = Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0); // summary of the test including the p-value, test statistic, and null hypothesis // if our p-value indicates significance, we can reject the null hypothesis System.out.println(testResult); // $example off$ jsc.stop(); }
Example #7
Source File: StreamingRsvpsDStreamCountWindow.java From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License | 6 votes |
public static void main(String[] args) throws InterruptedException { System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE); final SparkConf conf = new SparkConf() .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES) .setAppName(APPLICATION_NAME) .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI) .set("spark.streaming.kafka.consumer.cache.enabled", "false"); final JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS)); streamingContext.checkpoint(CHECKPOINT_FOLDER); final JavaInputDStream<ConsumerRecord<String, String>> meetupStream = KafkaUtils.createDirectStream( streamingContext, LocationStrategies.PreferConsistent(), ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES) ); // transformations, streaming algorithms, etc JavaDStream<Long> countStream = meetupStream.countByWindow( new Duration(WINDOW_LENGTH_MS), new Duration(SLIDING_INTERVAL_MS)); countStream.foreachRDD((JavaRDD<Long> countRDD) -> { MongoSpark.save( countRDD.map( r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}") ) ); }); // some time later, after outputs have completed meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> { OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges(); ((CanCommitOffsets) meetupStream.inputDStream()) .commitAsync(offsetRanges, new MeetupOffsetCommitCallback()); }); streamingContext.start(); streamingContext.awaitTermination(); }
Example #8
Source File: WordCount.java From Apache-Spark-2x-for-Java-Developers with MIT License | 6 votes |
public static void wordCountJava8( String filename ) { // Define a configuration to use to interact with Spark SparkConf conf = new SparkConf().setMaster("local").setAppName("Work Count App"); // Create a Java version of the Spark Context from the configuration JavaSparkContext sc = new JavaSparkContext(conf); // Load the input data, which is a text file read from the command line JavaRDD<String> input = sc.textFile( filename ); // Java 8 with lambdas: split the input string into words // TODO here a change has happened JavaRDD<String> words = input.flatMap( s -> Arrays.asList( s.split( " " ) ).iterator() ); // Java 8 with lambdas: transform the collection of words into pairs (word and 1) and then count them JavaPairRDD<Object, Object> counts = words.mapToPair( t -> new Tuple2( t, 1 ) ).reduceByKey( (x, y) -> (int)x + (int)y ); // Save the word count back out to a text file, causing evaluation. counts.saveAsTextFile( "output" ); }
Example #9
Source File: WordCount.java From tutorials with MIT License | 6 votes |
public static void main(String[] args) throws Exception { if (args.length < 1) { System.err.println("Usage: JavaWordCount <file>"); System.exit(1); } SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount") .setMaster("local"); JavaSparkContext ctx = new JavaSparkContext(sparkConf); JavaRDD<String> lines = ctx.textFile(args[0], 1); JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator()); JavaPairRDD<String, Integer> wordAsTuple = words.mapToPair(word -> new Tuple2<>(word, 1)); JavaPairRDD<String, Integer> wordWithCount = wordAsTuple.reduceByKey((Integer i1, Integer i2)->i1 + i2); List<Tuple2<String, Integer>> output = wordWithCount.collect(); for (Tuple2<?, ?> tuple : output) { System.out.println(tuple._1() + ": " + tuple._2()); } ctx.stop(); }
Example #10
Source File: ContainsDProteinChainTest.java From mmtf-spark with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsDProteinChainTest.class.getSimpleName()); sc = new JavaSparkContext(conf); // 2ONX: only L-protein chain // 1JLP: single L-protein chains with non-polymer capping group (NH2) // 5X6H: L-protein and DNA chain // 5L2G: DNA chain // 2MK1: D-saccharide // 2V5W: Chain C: GLY-GLY-GLY matches both D-protein and L-protein // 5XDP: L-protein and D-protein (modified) // 5GOD: 2 L-protein + 2 D-protein List<String> pdbIds = Arrays.asList("2ONX","1JLP","5X6H","5L2G","2MK1","2V5W","5XDP","5GOD"); pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc); }
Example #11
Source File: Word2VecPerformerVoid.java From deeplearning4j with Apache License 2.0 | 6 votes |
public void setup(SparkConf conf) { useAdaGrad = conf.getBoolean(ADAGRAD, false); negative = conf.getDouble(NEGATIVE, 5); numWords = conf.getInt(NUM_WORDS, 1); window = conf.getInt(WINDOW, 5); alpha = conf.getDouble(ALPHA, 0.025f); minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f); totalWords = conf.getInt(NUM_WORDS, 1); iterations = conf.getInt(ITERATIONS, 5); vectorLength = conf.getInt(VECTOR_LENGTH, 100); initExpTable(); if (negative > 0 && conf.contains(TABLE)) { ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes()); DataInputStream dis = new DataInputStream(bis); table = Nd4j.read(dis); } }
Example #12
Source File: NotFilterExample.java From mmtf-spark with Apache License 2.0 | 6 votes |
public static void main(String[] args) throws FileNotFoundException { String path = MmtfReader.getMmtfReducedPath(); SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(NotFilterExample.class.getSimpleName()); JavaSparkContext sc = new JavaSparkContext(conf); long count = MmtfReader .readSequenceFile(path, sc) // read MMTF hadoop sequence file .filter(new ContainsLProteinChain()) // retain pdb entries that exclusively contain L-peptide chains // a NotFilter can be used to reverse a filter .filter(new NotFilter(new ContainsDnaChain())) // should not contain any DNA chains .count(); System.out.println("# PDB entries with L-protein and without DNA chains: " + count); sc.close(); }
Example #13
Source File: SparkDistributor.java From DataGenerator with Apache License 2.0 | 6 votes |
@Override public void distribute(final List<Frontier> frontierList) { JavaSparkContext sc = new JavaSparkContext(new SparkConf().setAppName("dg-spark").setMaster(masterURL)); generatedMaps = sc .parallelize(frontierList) .flatMap(new FlatMapFunction<Frontier, Map<String, String>>() { @Override public Iterable<Map<String, String>> call(Frontier frontier) { LinkedList<Map<String, String>> storage = new LinkedList<>(); frontier.searchForScenarios(new CatchAndStoreProcessing(storage), searchExitFlag); return storage; } }) .flatMap(new FlatMapFunction<Map<String, String>, Map<String, String>>() { @Override public Iterable<Map<String, String>> call(Map<String, String> initialVars) { return SparkDistributor.dataConsumer.transformAndReturn(initialVars); } }); }
Example #14
Source File: TestManifestFileSerialization.java From iceberg with Apache License 2.0 | 6 votes |
@Test public void testManifestFileKryoSerialization() throws IOException { File data = temp.newFile(); Assert.assertTrue(data.delete()); Kryo kryo = new KryoSerializer(new SparkConf()).newKryo(); ManifestFile manifest = writeManifest(FILE_A); try (Output out = new Output(new FileOutputStream(data))) { kryo.writeClassAndObject(out, manifest); kryo.writeClassAndObject(out, manifest.copy()); kryo.writeClassAndObject(out, GenericManifestFile.copyOf(manifest).build()); } try (Input in = new Input(new FileInputStream(data))) { for (int i = 0; i < 3; i += 1) { Object obj = kryo.readClassAndObject(in); Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile); checkManifestFile(manifest, (ManifestFile) obj); } } }
Example #15
Source File: SparkTrainWorker.java From ytk-learn with MIT License | 6 votes |
public SparkTrainWorker( SparkConf conf, String modelName, String configPath, String configFile, String pyTransformScript, boolean needPyTransform, String loginName, String hostName, int hostPort, int slaveNum, int threadNum) throws Exception { super(modelName, configPath, configFile, pyTransformScript, needPyTransform, loginName, hostName, hostPort, threadNum); this.slaveNum = slaveNum; conf.set("spark.files.fetchTimeout", "3200"); conf.set("spark.network.timeout", "3200"); conf.set("spark.dynamicAllocation.executorIdleTimeout", "3200"); conf.set("spark.dynamicAllocation.schedulerBacklogTimeout", "300"); conf.set("spark.core.connection.auth.wait.timeout", "3200"); conf.set("spark.memory.fraction", "0.01"); }
Example #16
Source File: JavaKernelDensityEstimationExample.java From SparkDemo with MIT License | 6 votes |
public static void main(String[] args) { SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample"); JavaSparkContext jsc = new JavaSparkContext(conf); // $example on$ // an RDD of sample data JavaRDD<Double> data = jsc.parallelize( Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0)); // Construct the density estimator with the sample data // and a standard deviation for the Gaussian kernels KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0); // Find density estimates for the given values double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0}); System.out.println(Arrays.toString(densities)); // $example off$ jsc.stop(); }
Example #17
Source File: PolymerCompositionTest.java From mmtf-spark with Apache License 2.0 | 6 votes |
@Before public void setUp() throws Exception { SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.class.getSimpleName()); sc = new JavaSparkContext(conf); // 2ONX: only L-protein chain // 1JLP: single L-protein chains with non-polymer capping group (NH2) // 5X6H: L-protein and DNA chain (with std. nucleotides) // 5L2G: DNA chain (with non-std. nucleotide) // 2MK1: D-saccharide // 5UZT: RNA chain (with std. nucleotides) // 1AA6: contains SEC, selenocysteine (21st amino acid) // 1NTH: contains PYL, pyrrolysine (22nd amino acid) List<String> pdbIds = Arrays.asList("2ONX","1JLP","5X6H","5L2G","2MK1","5UZT","1AA6","1NTH"); pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc); }
Example #18
Source File: GeoWaveSparkConf.java From geowave with Apache License 2.0 | 5 votes |
public static SparkSession createSessionFromParams( final String appName, String master, final String host, final String jars) { // Grab default config for GeoWave SparkConf defaultConfig = GeoWaveSparkConf.getDefaultConfig(); // Apply master from default if (master == null) { master = "yarn"; } // Apply user options if set, correctly handling host for yarn. if (appName != null) { defaultConfig = defaultConfig.setAppName(appName); } defaultConfig = defaultConfig.setMaster(master); if (host != null) { if (master != "yarn") { defaultConfig = defaultConfig.set("spark.driver.host", host); } else { LOGGER.warn( "Attempting to set spark driver host for yarn master. Normally this is handled via hadoop configuration. Remove host or set another master designation and try again."); } } if (jars != null) { defaultConfig = defaultConfig.set("spark.jars", jars); } // Finally return the session from builder return GeoWaveSparkConf.internalCreateSession(defaultConfig, null); }
Example #19
Source File: BoxClient.java From render with GNU General Public License v2.0 | 5 votes |
public void run(final SparkConf sparkConf) throws IOException { final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf); LogUtilities.logSparkClusterInfo(sparkContext); setupForRun(); boolean foundBoxesRenderedForPriorRun = false; if (parameters.cleanUpPriorRun) { foundBoxesRenderedForPriorRun = cleanUpPriorRun(sparkContext); } final JavaRDD<BoxData> distributedBoxDataRdd = partitionBoxes(sparkContext, foundBoxesRenderedForPriorRun); final Broadcast<BoxGenerator> broadcastBoxGenerator = sparkContext.broadcast(boxGenerator); if (parameters.validateLabelsOnly) { validateLabelBoxes(sparkContext, distributedBoxDataRdd); } else { for (int level = 0; level <= parameters.box.maxLevel; level++) { renderBoxesForLevel(level, distributedBoxDataRdd, broadcastBoxGenerator); } } if (parameters.box.isOverviewNeeded() && (! parameters.explainPlan) && (! parameters.validateLabelsOnly)) { renderOverviewImages(sparkContext, broadcastBoxGenerator); } LogUtilities.logSparkClusterInfo(sparkContext); // log cluster info again here to add run stats to driver log sparkContext.stop(); }
Example #20
Source File: SpringSparkDemoApplication.java From articles with Apache License 2.0 | 5 votes |
@Bean public SparkSession spark(SparkConf sparkConf) { SparkSession sparkSession = SparkSession.builder() .sparkContext(javaSparkContext(sparkConf).sc()) .config(sparkConf) .getOrCreate(); logger.info("Using Spark Version {}", sparkSession.version()); return sparkSession; }
Example #21
Source File: MarkDuplicatesSparkUtilsUnitTest.java From gatk with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testChangingContigsOnHeaderlessSAMRecord() { final SparkConf conf = new SparkConf().set("spark.kryo.registrator", "org.broadinstitute.hellbender.tools.spark.transforms.markduplicates.MarkDuplicatesSparkUtilsUnitTest$TestGATKRegistrator"); final SAMRecord read = ((SAMRecordToGATKReadAdapter) ArtificialReadUtils.createHeaderlessSamBackedRead("read1", "1", 100, 50)).getEncapsulatedSamRecord(); final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX,2500, null); final OpticalDuplicateFinder roundTrippedRead = SparkTestUtils.roundTripInKryo(finder, OpticalDuplicateFinder.class, conf); Assert.assertEquals(roundTrippedRead.opticalDuplicatePixelDistance, finder.opticalDuplicatePixelDistance); }
Example #22
Source File: Grep.java From flink-perf with Apache License 2.0 | 5 votes |
public static void main(String[] args) { String master = args[0]; String inFile = args[1]; String outFile = args[2]; String patterns[] = new String[args.length-3]; System.arraycopy(args,3,patterns,0,args.length-3); System.err.println("Starting spark with master="+master+" in="+inFile); System.err.println("Using patterns: "+ Arrays.toString(patterns)); SparkConf conf = new SparkConf().setAppName("Grep job").setMaster(master).set("spark.hadoop.validateOutputSpecs", "false"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> file = sc.textFile(inFile); for(int p = 0; p < patterns.length; p++) { final String pattern = patterns[p]; JavaRDD<String> res = file.filter(new Function<String, Boolean>() { private static final long serialVersionUID = 1L; Pattern p = Pattern.compile(pattern); @Override public Boolean call(String value) throws Exception { if (value == null || value.length() == 0) { return false; } final Matcher m = p.matcher(value); if (m.find()) { return true; } return false; } }); res.saveAsTextFile(outFile+"_"+pattern); } }
Example #23
Source File: SparkStreamingSqlEngine.java From sylph with Apache License 2.0 | 5 votes |
private static Serializable compile(String jobId, SqlFlow sqlFlow, ConnectorStore connectorStore, SparkJobConfig sparkJobConfig, URLClassLoader jobClassLoader) throws JVMException { int batchDuration = sparkJobConfig.getSparkStreamingBatchDuration(); final AtomicBoolean isCompile = new AtomicBoolean(true); final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> { logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============"); SparkConf sparkConf = isCompile.get() ? new SparkConf().setMaster("local[*]").setAppName("sparkCompile") : new SparkConf(); SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate(); StreamingContext ssc = new StreamingContext(sparkSession.sparkContext(), Duration.apply(batchDuration)); //build sql SqlAnalyse analyse = new SparkStreamingSqlAnalyse(ssc, connectorStore, isCompile.get()); try { buildSql(analyse, jobId, sqlFlow); } catch (Exception e) { throwsException(e); } return ssc; }; JVMLauncher<Boolean> launcher = JVMLaunchers.<Boolean>newJvm() .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset())) .setCallable(() -> { System.out.println("************ job start ***************"); appGetter.get(); return true; }) .addUserURLClassLoader(jobClassLoader) .setClassLoader(jobClassLoader) .notDepThisJvmClassPath() .build(); launcher.startAndGet(); isCompile.set(false); return (Serializable) appGetter; }
Example #24
Source File: TestKerberosUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void testGetKerberosPrincFromSpark() { SparkConf sparkConf = new SparkConf(false); sparkConf.set("spark.yarn.principal", "boom-oo-ya-ta-ta-ta"); KerberosUtils.setSparkConf(sparkConf); Map<String, Object> configMap = new HashMap<>(); Config config = ConfigFactory.parseMap(configMap); String principal = KerberosUtils.getKerberosPrincipal(config); assertEquals("boom-oo-ya-ta-ta-ta", principal); }
Example #25
Source File: TransformationRDD.java From hui-bigdata-spark with Apache License 2.0 | 5 votes |
/** * 元素转换,在每一个分区内部进行元素转换. * demo计算目的:算平方。(参数1是分区的索引) * * @since hui_project 1.0.0 */ public void testMapPartitionsWithIndex() { SparkConf sparkConf = new SparkConf().setMaster("local[4]").setAppName("test"); JavaSparkContext sparkContext = new JavaSparkContext(sparkConf); JavaRDD<Integer> parallelize = sparkContext.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 3); JavaRDD<Tuple2<Integer, Integer>> rdd = parallelize.mapPartitionsWithIndex((x, y) -> getSquareWithIndex(x, y), false); checkResult(rdd.collect()); }
Example #26
Source File: KinesisConsumer.java From real-time-analytics-spark-streaming with Apache License 2.0 | 5 votes |
public static SparkSession getInstance(SparkConf sparkConf) { if (instance == null) { synchronized (JavaSparkSessionSingleton.class) { if (instance == null) { instance = SparkSession.builder().config(sparkConf).getOrCreate(); } } } return instance; }
Example #27
Source File: TestKerberosUtils.java From envelope with Apache License 2.0 | 5 votes |
@Test public void testGetKerberosPrincFromConfig() { SparkConf sparkConf = new SparkConf(false); sparkConf.set("spark.yarn.principal", "boom-oo-ya-ta-ta-ta"); KerberosUtils.setSparkConf(sparkConf); Map<String, Object> configMap = new HashMap<>(); configMap.put(USER_PRINC_CONFIG, "foo"); Config config = ConfigFactory.parseMap(configMap); String principal = KerberosUtils.getKerberosPrincipal(config); assertEquals("foo", principal); }
Example #28
Source File: SparkRunnerTestUtils.java From components with Apache License 2.0 | 5 votes |
public Pipeline createPipeline() { SparkContextOptions sparkOpts = options.as(SparkContextOptions.class); sparkOpts.setFilesToStage(emptyList()); SparkConf conf = new SparkConf(); conf.setAppName(appName); conf.setMaster("local[2]"); conf.set("spark.driver.allowMultipleContexts", "true"); JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf)); sparkOpts.setProvidedSparkContext(jsc); sparkOpts.setUsesProvidedSparkContext(true); sparkOpts.setRunner(SparkRunner.class); return Pipeline.create(sparkOpts); }
Example #29
Source File: WordCountRecoverableEx.java From Apache-Spark-2x-for-Java-Developers with MIT License | 5 votes |
protected static JavaStreamingContext createContext(String ip, int port, String checkpointDirectory) { SparkConf sparkConf = new SparkConf().setAppName("WordCountRecoverableEx").setMaster("local[*]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1)); streamingContext.checkpoint(checkpointDirectory); // Initial state RDD input to mapWithState @SuppressWarnings("unchecked") List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1)); JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples); JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream(ip,port, StorageLevels.MEMORY_AND_DISK_SER); JavaDStream<String> words = StreamingLines.flatMap(str -> Arrays.asList(str.split(" ")).iterator()); JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str -> new Tuple2<>(str, 1)) .reduceByKey((count1, count2) -> count1 + count2); // Update the cumulative count function Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) { int sum = one.orElse(0) + (state.exists() ? state.get() : 0); Tuple2<String, Integer> output = new Tuple2<>(word, sum); state.update(sum); return output; } }; // DStream made of get cumulative counts that get updated in every batch JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts .mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD)); stateDstream.print(); return streamingContext; }
Example #30
Source File: SparkConnectorCreatorTest.java From Explorer with Apache License 2.0 | 5 votes |
@Before public void setUp(){ keysToInspect = new ArrayList<>(); keysToInspect.add(AttributteNames.CT_MASTER); properties = new Properties(); creator = new ConnectorCreator<SparkConf>(new SparkConfComparator()," Porperty spark master is not filled "); }