org.apache.spark.SparkConf Java Examples

The following examples show how to use org.apache.spark.SparkConf. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReduceByKeyAndWindow.java    From sparkResearch with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    SparkConf sparkConf = new SparkConf().setAppName("reduceByKeyAndWindow").setMaster("local[2]");
    JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10));
    //检查点设置
    streamingContext.checkpoint("hdfs://localhost:9300");
    //数据源
    JavaDStream<String> dStream = streamingContext.socketTextStream("localhost", 8080);

    JavaPairDStream<String, Long> ipPairDstream = dStream.mapToPair(new GetIp());

    JavaPairDStream<String, Long> result = ipPairDstream.reduceByKeyAndWindow(new AddLongs(),
            new SubtractLongs(), Durations.seconds(30), Durations.seconds(10));

    try {
        streamingContext.start();
        streamingContext.awaitTermination();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}
 
Example #2
Source File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java    From net.jgp.labs.spark with Apache License 2.0 6 votes vote down vote up
private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
      "Streaming Ingestion File System Text File to Dataframe");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
      .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
      .getInputDirectory());

  msgDataStream.print();
  // Create JavaRDD<Row>
  msgDataStream.foreachRDD(new RowProcessor());

  jssc.start();
  try {
    jssc.awaitTermination();
  } catch (InterruptedException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example #3
Source File: SparkExtensionTest.java    From component-runtime with Apache License 2.0 6 votes vote down vote up
public static void main(final String[] args) {
    final SparkConf conf =
            new SparkConf().setAppName(SparkClusterRuleTest.SubmittableMain.class.getName()).setMaster(args[0]);
    final JavaSparkContext context = new JavaSparkContext(conf);

    context
            .parallelize(singletonList("a b"))
            .flatMap((FlatMapFunction<String, String>) text -> asList(text.split(" ")).iterator())
            .mapToPair(word -> new Tuple2<>(word, 1))
            .reduceByKey((a, b) -> a + b)
            .foreach(result -> {
                try (final FileWriter writer = new FileWriter(args[1], true)) {
                    writer.write(result._1 + " -> " + result._2 + '\n');
                }
            });
}
 
Example #4
Source File: SparkApplication.java    From kylin-on-parquet-v2 with Apache License 2.0 6 votes vote down vote up
private void autoSetSparkConf(SparkConf sparkConf) throws Exception {
    logger.info("Start set spark conf automatically.");
    SparkConfHelper helper = new SparkConfHelper();
    helper.setFetcher(KylinBuildEnv.get().clusterInfoFetcher());
    Path shareDir = config.getJobTmpShareDir(project, jobId);
    String contentSize = chooseContentSize(shareDir);

    // add content size with unit
    helper.setOption(SparkConfHelper.SOURCE_TABLE_SIZE, contentSize);
    helper.setOption(SparkConfHelper.LAYOUT_SIZE, Integer.toString(layoutSize));
    Map<String, String> configOverride = config.getSparkConfigOverride();
    helper.setConf(SparkConfHelper.DEFAULT_QUEUE, configOverride.get(SparkConfHelper.DEFAULT_QUEUE));
    helper.setOption(SparkConfHelper.REQUIRED_CORES, calculateRequiredCores());
    helper.setConf(SparkConfHelper.COUNT_DISTICT, hasCountDistinct().toString());
    helper.generateSparkConf();
    helper.applySparkConf(sparkConf);
}
 
Example #5
Source File: SparkSequenceVectorsTest.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
    if (sequencesCyclic == null) {
        sequencesCyclic = new ArrayList<>();

        // 10 sequences in total
        for (int с = 0; с < 10; с++) {

            Sequence<VocabWord> sequence = new Sequence<>();

            for (int e = 0; e < 10; e++) {
                // we will have 9 equal elements, with total frequency of 10
                sequence.addElement(new VocabWord(1.0, "" + e, (long) e));
            }

            // and 1 element with frequency of 20
            sequence.addElement(new VocabWord(1.0, "0", 0L));
            sequencesCyclic.add(sequence);
        }
    }

    SparkConf sparkConf = new SparkConf().setMaster("local[8]")
            .set("spark.driver.host", "localhost")
            .setAppName("SeqVecTests");
    sc = new JavaSparkContext(sparkConf);
}
 
Example #6
Source File: JavaHypothesisTestingKolmogorovSmirnovTestExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf =
      new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
    KolmogorovSmirnovTestResult testResult =
      Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
    // summary of the test including the p-value, test statistic, and null hypothesis
    // if our p-value indicates significance, we can reject the null hypothesis
    System.out.println(testResult);
    // $example off$

    jsc.stop();
  }
 
Example #7
Source File: StreamingRsvpsDStreamCountWindow.java    From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

        final SparkConf conf = new SparkConf()
                .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
                .setAppName(APPLICATION_NAME)
                .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI)
                .set("spark.streaming.kafka.consumer.cache.enabled", "false");

        final JavaStreamingContext streamingContext
                = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS));

        streamingContext.checkpoint(CHECKPOINT_FOLDER);

        final JavaInputDStream<ConsumerRecord<String, String>> meetupStream =
                KafkaUtils.createDirectStream(
                        streamingContext,
                        LocationStrategies.PreferConsistent(),
                        ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES)
                );
                
        // transformations, streaming algorithms, etc
        JavaDStream<Long> countStream  
            = meetupStream.countByWindow(
                 new Duration(WINDOW_LENGTH_MS), 
                 new Duration(SLIDING_INTERVAL_MS));        

        countStream.foreachRDD((JavaRDD<Long> countRDD) -> {                
            MongoSpark.save(        
                    countRDD.map(
                        r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}")
                    )
            );            
        });
        
        // some time later, after outputs have completed
        meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> {        
            OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges();            

            ((CanCommitOffsets) meetupStream.inputDStream())
                .commitAsync(offsetRanges, new MeetupOffsetCommitCallback());
        });
        
        streamingContext.start();
        streamingContext.awaitTermination();    
    }
 
Example #8
Source File: WordCount.java    From Apache-Spark-2x-for-Java-Developers with MIT License 6 votes vote down vote up
public static void wordCountJava8( String filename )
{
    // Define a configuration to use to interact with Spark
    SparkConf conf = new SparkConf().setMaster("local").setAppName("Work Count App");

    // Create a Java version of the Spark Context from the configuration
    JavaSparkContext sc = new JavaSparkContext(conf);

    // Load the input data, which is a text file read from the command line
    JavaRDD<String> input = sc.textFile( filename );

    // Java 8 with lambdas: split the input string into words
   // TODO here a change has happened 
    JavaRDD<String> words = input.flatMap( s -> Arrays.asList( s.split( " " ) ).iterator() );

    // Java 8 with lambdas: transform the collection of words into pairs (word and 1) and then count them
    JavaPairRDD<Object, Object> counts = words.mapToPair( t -> new Tuple2( t, 1 ) ).reduceByKey( (x, y) -> (int)x + (int)y );

    // Save the word count back out to a text file, causing evaluation.
    counts.saveAsTextFile( "output" );
}
 
Example #9
Source File: WordCount.java    From tutorials with MIT License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage: JavaWordCount <file>");
        System.exit(1);
    }
    SparkConf sparkConf = new SparkConf().setAppName("JavaWordCount")
        .setMaster("local");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    JavaRDD<String> words = lines.flatMap(s -> Arrays.asList(SPACE.split(s)).iterator());
    JavaPairRDD<String, Integer> wordAsTuple = words.mapToPair(word -> new Tuple2<>(word, 1));
    JavaPairRDD<String, Integer> wordWithCount = wordAsTuple.reduceByKey((Integer i1, Integer i2)->i1 + i2);
    List<Tuple2<String, Integer>> output = wordWithCount.collect();
    for (Tuple2<?, ?> tuple : output) {
         System.out.println(tuple._1() + ": " + tuple._2());
    }
    ctx.stop();
}
 
Example #10
Source File: ContainsDProteinChainTest.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
	SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(ContainsDProteinChainTest.class.getSimpleName());
    sc = new JavaSparkContext(conf);
    
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain
    // 5L2G: DNA chain
    // 2MK1: D-saccharide
    // 2V5W: Chain C: GLY-GLY-GLY matches both D-protein and L-protein
    // 5XDP: L-protein and D-protein (modified)
    // 5GOD: 2 L-protein + 2 D-protein
    List<String> pdbIds = Arrays.asList("2ONX","1JLP","5X6H","5L2G","2MK1","2V5W","5XDP","5GOD");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
 
Example #11
Source File: Word2VecPerformerVoid.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
public void setup(SparkConf conf) {
    useAdaGrad = conf.getBoolean(ADAGRAD, false);
    negative = conf.getDouble(NEGATIVE, 5);
    numWords = conf.getInt(NUM_WORDS, 1);
    window = conf.getInt(WINDOW, 5);
    alpha = conf.getDouble(ALPHA, 0.025f);
    minAlpha = conf.getDouble(MIN_ALPHA, 1e-2f);
    totalWords = conf.getInt(NUM_WORDS, 1);
    iterations = conf.getInt(ITERATIONS, 5);
    vectorLength = conf.getInt(VECTOR_LENGTH, 100);

    initExpTable();

    if (negative > 0 && conf.contains(TABLE)) {
        ByteArrayInputStream bis = new ByteArrayInputStream(conf.get(TABLE).getBytes());
        DataInputStream dis = new DataInputStream(bis);
        table = Nd4j.read(dis);
    }
}
 
Example #12
Source File: NotFilterExample.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws FileNotFoundException {

		String path = MmtfReader.getMmtfReducedPath();
	    
	    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(NotFilterExample.class.getSimpleName());
	    JavaSparkContext sc = new JavaSparkContext(conf);
	    
	    long count = MmtfReader
	    		.readSequenceFile(path, sc) // read MMTF hadoop sequence file
	    		.filter(new ContainsLProteinChain()) // retain pdb entries that exclusively contain L-peptide chains
	    		// a NotFilter can be used to reverse a filter
	    		.filter(new NotFilter(new ContainsDnaChain())) // should not contain any DNA chains
	    		.count();
	    
	    System.out.println("# PDB entries with L-protein and without DNA chains: " + count);
	    sc.close();
	}
 
Example #13
Source File: SparkDistributor.java    From DataGenerator with Apache License 2.0 6 votes vote down vote up
@Override
public void distribute(final List<Frontier> frontierList) {
    JavaSparkContext sc = new JavaSparkContext(new SparkConf().setAppName("dg-spark").setMaster(masterURL));

    generatedMaps = sc
            .parallelize(frontierList)
            .flatMap(new FlatMapFunction<Frontier, Map<String, String>>() {
                @Override
                public Iterable<Map<String, String>> call(Frontier frontier) {
                    LinkedList<Map<String, String>> storage = new LinkedList<>();
                    frontier.searchForScenarios(new CatchAndStoreProcessing(storage), searchExitFlag);

                    return storage;
                }
            })
            .flatMap(new FlatMapFunction<Map<String, String>, Map<String, String>>() {
                @Override
                public Iterable<Map<String, String>> call(Map<String, String> initialVars) {
                    return SparkDistributor.dataConsumer.transformAndReturn(initialVars);
                }
            });
}
 
Example #14
Source File: TestManifestFileSerialization.java    From iceberg with Apache License 2.0 6 votes vote down vote up
@Test
public void testManifestFileKryoSerialization() throws IOException {
  File data = temp.newFile();
  Assert.assertTrue(data.delete());

  Kryo kryo = new KryoSerializer(new SparkConf()).newKryo();

  ManifestFile manifest = writeManifest(FILE_A);

  try (Output out = new Output(new FileOutputStream(data))) {
    kryo.writeClassAndObject(out, manifest);
    kryo.writeClassAndObject(out, manifest.copy());
    kryo.writeClassAndObject(out, GenericManifestFile.copyOf(manifest).build());
  }

  try (Input in = new Input(new FileInputStream(data))) {
    for (int i = 0; i < 3; i += 1) {
      Object obj = kryo.readClassAndObject(in);
      Assert.assertTrue("Should be a ManifestFile", obj instanceof ManifestFile);
      checkManifestFile(manifest, (ManifestFile) obj);
    }
  }
}
 
Example #15
Source File: SparkTrainWorker.java    From ytk-learn with MIT License 6 votes vote down vote up
public SparkTrainWorker(
                        SparkConf conf,
                        String modelName,
                        String configPath,
                        String configFile,
                        String pyTransformScript,
                        boolean needPyTransform,
                        String loginName,
                        String hostName,
                        int hostPort,
                        int slaveNum,
                        int threadNum) throws Exception {
    super(modelName, configPath, configFile, pyTransformScript, needPyTransform,
            loginName, hostName, hostPort, threadNum);
    this.slaveNum = slaveNum;

    conf.set("spark.files.fetchTimeout", "3200");
    conf.set("spark.network.timeout", "3200");
    conf.set("spark.dynamicAllocation.executorIdleTimeout", "3200");
    conf.set("spark.dynamicAllocation.schedulerBacklogTimeout", "300");
    conf.set("spark.core.connection.auth.wait.timeout", "3200");
    conf.set("spark.memory.fraction", "0.01");
}
 
Example #16
Source File: JavaKernelDensityEstimationExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    // an RDD of sample data
    JavaRDD<Double> data = jsc.parallelize(
      Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));

    // Construct the density estimator with the sample data
    // and a standard deviation for the Gaussian kernels
    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);

    // Find density estimates for the given values
    double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});

    System.out.println(Arrays.toString(densities));
    // $example off$

    jsc.stop();
  }
 
Example #17
Source File: PolymerCompositionTest.java    From mmtf-spark with Apache License 2.0 6 votes vote down vote up
@Before
public void setUp() throws Exception {
	SparkConf conf = new SparkConf().setMaster("local[*]").setAppName(PolymerCompositionTest.class.getSimpleName());
    sc = new JavaSparkContext(conf);
    
    // 2ONX: only L-protein chain
    // 1JLP: single L-protein chains with non-polymer capping group (NH2)
    // 5X6H: L-protein and DNA chain (with std. nucleotides)
    // 5L2G: DNA chain (with non-std. nucleotide)
    // 2MK1: D-saccharide
    // 5UZT: RNA chain (with std. nucleotides)
    // 1AA6: contains SEC, selenocysteine (21st amino acid)
    // 1NTH: contains PYL, pyrrolysine (22nd amino acid)
    List<String> pdbIds = Arrays.asList("2ONX","1JLP","5X6H","5L2G","2MK1","5UZT","1AA6","1NTH");
    pdb = MmtfReader.downloadReducedMmtfFiles(pdbIds, sc);
}
 
Example #18
Source File: GeoWaveSparkConf.java    From geowave with Apache License 2.0 5 votes vote down vote up
public static SparkSession createSessionFromParams(
    final String appName,
    String master,
    final String host,
    final String jars) {
  // Grab default config for GeoWave
  SparkConf defaultConfig = GeoWaveSparkConf.getDefaultConfig();
  // Apply master from default
  if (master == null) {
    master = "yarn";
  }

  // Apply user options if set, correctly handling host for yarn.
  if (appName != null) {
    defaultConfig = defaultConfig.setAppName(appName);
  }
  defaultConfig = defaultConfig.setMaster(master);
  if (host != null) {
    if (master != "yarn") {
      defaultConfig = defaultConfig.set("spark.driver.host", host);
    } else {
      LOGGER.warn(
          "Attempting to set spark driver host for yarn master. Normally this is handled via hadoop configuration. Remove host or set another master designation and try again.");
    }
  }

  if (jars != null) {
    defaultConfig = defaultConfig.set("spark.jars", jars);
  }

  // Finally return the session from builder
  return GeoWaveSparkConf.internalCreateSession(defaultConfig, null);
}
 
Example #19
Source File: BoxClient.java    From render with GNU General Public License v2.0 5 votes vote down vote up
public void run(final SparkConf sparkConf)
        throws IOException {

    final JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);

    LogUtilities.logSparkClusterInfo(sparkContext);

    setupForRun();

    boolean foundBoxesRenderedForPriorRun = false;
    if (parameters.cleanUpPriorRun) {
        foundBoxesRenderedForPriorRun = cleanUpPriorRun(sparkContext);
    }

    final JavaRDD<BoxData> distributedBoxDataRdd = partitionBoxes(sparkContext,
                                                                  foundBoxesRenderedForPriorRun);

    final Broadcast<BoxGenerator> broadcastBoxGenerator = sparkContext.broadcast(boxGenerator);

    if (parameters.validateLabelsOnly) {
        validateLabelBoxes(sparkContext, distributedBoxDataRdd);
    } else {
        for (int level = 0; level <= parameters.box.maxLevel; level++) {
            renderBoxesForLevel(level, distributedBoxDataRdd, broadcastBoxGenerator);
        }
    }

    if (parameters.box.isOverviewNeeded() && (! parameters.explainPlan) && (! parameters.validateLabelsOnly)) {
        renderOverviewImages(sparkContext,
                             broadcastBoxGenerator);
    }

    LogUtilities.logSparkClusterInfo(sparkContext); // log cluster info again here to add run stats to driver log

    sparkContext.stop();
}
 
Example #20
Source File: SpringSparkDemoApplication.java    From articles with Apache License 2.0 5 votes vote down vote up
@Bean
public SparkSession spark(SparkConf sparkConf)
{
    SparkSession sparkSession = SparkSession.builder()
        .sparkContext(javaSparkContext(sparkConf).sc())
        .config(sparkConf)
        .getOrCreate();

    logger.info("Using Spark Version {}", sparkSession.version());

    return sparkSession;
}
 
Example #21
Source File: MarkDuplicatesSparkUtilsUnitTest.java    From gatk with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
@Test
public void testChangingContigsOnHeaderlessSAMRecord() {
    final SparkConf conf = new SparkConf().set("spark.kryo.registrator",
            "org.broadinstitute.hellbender.tools.spark.transforms.markduplicates.MarkDuplicatesSparkUtilsUnitTest$TestGATKRegistrator");
    final SAMRecord read = ((SAMRecordToGATKReadAdapter) ArtificialReadUtils.createHeaderlessSamBackedRead("read1", "1", 100, 50)).getEncapsulatedSamRecord();
    final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX,2500, null);

    final OpticalDuplicateFinder roundTrippedRead = SparkTestUtils.roundTripInKryo(finder, OpticalDuplicateFinder.class, conf);
    Assert.assertEquals(roundTrippedRead.opticalDuplicatePixelDistance, finder.opticalDuplicatePixelDistance);
}
 
Example #22
Source File: Grep.java    From flink-perf with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
	String master = args[0];
	String inFile = args[1];
	String outFile = args[2];

	String patterns[] = new String[args.length-3];
	System.arraycopy(args,3,patterns,0,args.length-3);
	System.err.println("Starting spark with master="+master+" in="+inFile);
	System.err.println("Using patterns: "+ Arrays.toString(patterns));

	SparkConf conf = new SparkConf().setAppName("Grep job").setMaster(master).set("spark.hadoop.validateOutputSpecs", "false");
	JavaSparkContext sc = new JavaSparkContext(conf);

	JavaRDD<String> file = sc.textFile(inFile);
	for(int p = 0; p < patterns.length; p++) {
		final String pattern = patterns[p];
		JavaRDD<String> res = file.filter(new Function<String, Boolean>() {
			private static final long serialVersionUID = 1L;
			Pattern p = Pattern.compile(pattern);

			@Override
			public Boolean call(String value) throws Exception {
				if (value == null || value.length() == 0) {
					return false;
				}
				final Matcher m = p.matcher(value);
				if (m.find()) {
					return true;
				}
				return false;
			}
		});
		res.saveAsTextFile(outFile+"_"+pattern);
	}
}
 
Example #23
Source File: SparkStreamingSqlEngine.java    From sylph with Apache License 2.0 5 votes vote down vote up
private static Serializable compile(String jobId, SqlFlow sqlFlow, ConnectorStore connectorStore, SparkJobConfig sparkJobConfig, URLClassLoader jobClassLoader)
        throws JVMException
{
    int batchDuration = sparkJobConfig.getSparkStreamingBatchDuration();
    final AtomicBoolean isCompile = new AtomicBoolean(true);
    final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> {
        logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============");
        SparkConf sparkConf = isCompile.get() ?
                new SparkConf().setMaster("local[*]").setAppName("sparkCompile")
                : new SparkConf();
        SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
        StreamingContext ssc = new StreamingContext(sparkSession.sparkContext(), Duration.apply(batchDuration));

        //build sql
        SqlAnalyse analyse = new SparkStreamingSqlAnalyse(ssc, connectorStore, isCompile.get());
        try {
            buildSql(analyse, jobId, sqlFlow);
        }
        catch (Exception e) {
            throwsException(e);
        }
        return ssc;
    };

    JVMLauncher<Boolean> launcher = JVMLaunchers.<Boolean>newJvm()
            .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset()))
            .setCallable(() -> {
                System.out.println("************ job start ***************");
                appGetter.get();
                return true;
            })
            .addUserURLClassLoader(jobClassLoader)
            .setClassLoader(jobClassLoader)
            .notDepThisJvmClassPath()
            .build();

    launcher.startAndGet();
    isCompile.set(false);
    return (Serializable) appGetter;
}
 
Example #24
Source File: TestKerberosUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetKerberosPrincFromSpark() {
  SparkConf sparkConf = new SparkConf(false);
  sparkConf.set("spark.yarn.principal", "boom-oo-ya-ta-ta-ta");
  KerberosUtils.setSparkConf(sparkConf);
  Map<String, Object> configMap = new HashMap<>();
  Config config = ConfigFactory.parseMap(configMap);

  String principal = KerberosUtils.getKerberosPrincipal(config);

  assertEquals("boom-oo-ya-ta-ta-ta", principal);
}
 
Example #25
Source File: TransformationRDD.java    From hui-bigdata-spark with Apache License 2.0 5 votes vote down vote up
/**
 * 元素转换,在每一个分区内部进行元素转换.
 * demo计算目的:算平方。(参数1是分区的索引)
 *
 * @since hui_project 1.0.0
 */
public void testMapPartitionsWithIndex() {
    SparkConf sparkConf = new SparkConf().setMaster("local[4]").setAppName("test");
    JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);
    JavaRDD<Integer> parallelize = sparkContext.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 3);
    JavaRDD<Tuple2<Integer, Integer>> rdd = parallelize.mapPartitionsWithIndex((x, y) -> getSquareWithIndex(x, y), false);
    checkResult(rdd.collect());
}
 
Example #26
Source File: KinesisConsumer.java    From real-time-analytics-spark-streaming with Apache License 2.0 5 votes vote down vote up
public static SparkSession getInstance(SparkConf sparkConf) {
    if (instance == null) {
        synchronized (JavaSparkSessionSingleton.class) {
            if (instance == null) {
                instance = SparkSession.builder().config(sparkConf).getOrCreate();
            }
        }
    }

    return instance;
}
 
Example #27
Source File: TestKerberosUtils.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Test
public void testGetKerberosPrincFromConfig() {
  SparkConf sparkConf = new SparkConf(false);
  sparkConf.set("spark.yarn.principal", "boom-oo-ya-ta-ta-ta");
  KerberosUtils.setSparkConf(sparkConf);
  Map<String, Object> configMap = new HashMap<>();
  configMap.put(USER_PRINC_CONFIG, "foo");
  Config config = ConfigFactory.parseMap(configMap);

  String principal = KerberosUtils.getKerberosPrincipal(config);

  assertEquals("foo", principal);
}
 
Example #28
Source File: SparkRunnerTestUtils.java    From components with Apache License 2.0 5 votes vote down vote up
public Pipeline createPipeline() {
    SparkContextOptions sparkOpts = options.as(SparkContextOptions.class);
    sparkOpts.setFilesToStage(emptyList());

    SparkConf conf = new SparkConf();
    conf.setAppName(appName);
    conf.setMaster("local[2]");
    conf.set("spark.driver.allowMultipleContexts", "true");
    JavaSparkContext jsc = new JavaSparkContext(new SparkContext(conf));
    sparkOpts.setProvidedSparkContext(jsc);
    sparkOpts.setUsesProvidedSparkContext(true);
    sparkOpts.setRunner(SparkRunner.class);

    return Pipeline.create(sparkOpts);
}
 
Example #29
Source File: WordCountRecoverableEx.java    From Apache-Spark-2x-for-Java-Developers with MIT License 5 votes vote down vote up
protected static JavaStreamingContext createContext(String ip, int port, String checkpointDirectory) {
	SparkConf sparkConf = new SparkConf().setAppName("WordCountRecoverableEx").setMaster("local[*]");
	JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
	streamingContext.checkpoint(checkpointDirectory);
	// Initial state RDD input to mapWithState
	@SuppressWarnings("unchecked")
	List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
	JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);

	JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream(ip,port, StorageLevels.MEMORY_AND_DISK_SER);

	JavaDStream<String> words = StreamingLines.flatMap(str -> Arrays.asList(str.split(" ")).iterator());

	JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str -> new Tuple2<>(str, 1))
			.reduceByKey((count1, count2) -> count1 + count2);

	// Update the cumulative count function
	Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc = new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
		@Override
		public Tuple2<String, Integer> call(String word, Optional<Integer> one, State<Integer> state) {
			int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
			Tuple2<String, Integer> output = new Tuple2<>(word, sum);
			state.update(sum);
			return output;
		}
	};

	// DStream made of get cumulative counts that get updated in every batch
	JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts
			.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

	stateDstream.print();
	return streamingContext;
}
 
Example #30
Source File: SparkConnectorCreatorTest.java    From Explorer with Apache License 2.0 5 votes vote down vote up
@Before
public void setUp(){
    keysToInspect = new ArrayList<>();
    keysToInspect.add(AttributteNames.CT_MASTER);
    properties = new Properties();
    creator = new ConnectorCreator<SparkConf>(new SparkConfComparator()," Porperty spark master is not filled ");
}