Java Code Examples for org.apache.spark.api.java.JavaSparkContext#stop()

The following examples show how to use org.apache.spark.api.java.JavaSparkContext#stop() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestKryoWarning.java    From deeplearning4j with Apache License 2.0 6 votes vote down vote up
private static void doTestCG(SparkConf sparkConf) {
    JavaSparkContext sc = new JavaSparkContext(sparkConf);

    try {

        ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder().graphBuilder().addInputs("in")
                        .addLayer("0", new OutputLayer.Builder().nIn(10).nOut(10).build(), "in").setOutputs("0")
                        .build();

        TrainingMaster tm = new ParameterAveragingTrainingMaster.Builder(1).build();

        SparkListenable scg = new SparkComputationGraph(sc, conf, tm);
    } finally {
        sc.stop();
    }
}
 
Example 2
Source File: JavaStandaloneIgniteRDDSelfTest.java    From ignite with Apache License 2.0 6 votes vote down vote up
/**
 * @throws Exception If failed.
 */
@Test
public void testQueryObjectsFromIgnite() throws Exception {
    JavaSparkContext sc = new JavaSparkContext("local[*]", "test");

    try {
        JavaIgniteContext<String, Entity> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());

        JavaIgniteRDD<String, Entity> cache = ic.fromCache(ENTITY_CACHE_NAME);

        cache.savePairs(sc.parallelize(F.range(0, 1001), 2).mapToPair(INT_TO_ENTITY_F));

        List<Entity> res = cache.objectSql("Entity", "name = ? and salary = ?", "name50", 5000)
            .map(STR_ENTITY_PAIR_TO_ENTITY_F).collect();

        assertEquals("Invalid result length", 1, res.size());
        assertEquals("Invalid result", 50, res.get(0).id());
        assertEquals("Invalid result", "name50", res.get(0).name());
        assertEquals("Invalid result", 5000, res.get(0).salary());
        assertEquals("Invalid count", 500, cache.objectSql("Entity", "id > 500").count());
    }
    finally {
        sc.stop();
    }
}
 
Example 3
Source File: JavaStandaloneIgniteRDDSelfTest.java    From ignite with Apache License 2.0 6 votes vote down vote up
/**
 * @throws Exception If failed.
 */
@Test
public void testStoreDataToIgnite() throws Exception {
    JavaSparkContext sc = new JavaSparkContext("local[*]", "test");

    try {
        JavaIgniteContext<String, String> ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider());

        ic.fromCache(ENTITY_CACHE_NAME)
            .savePairs(sc.parallelize(F.range(0, KEYS_CNT), 2).mapToPair(TO_PAIR_F));

        Ignite ignite = Ignition.ignite("grid-0");

        IgniteCache<String, String> cache = ignite.cache(ENTITY_CACHE_NAME);

        for (int i = 0; i < KEYS_CNT; i++) {
            String val = cache.get(String.valueOf(i));

            assertNotNull("Value was not put to cache for key: " + i, val);
            assertEquals("Invalid value stored for key: " + i, "val" + i, val);
        }
    }
    finally {
        sc.stop();
    }
}
 
Example 4
Source File: JavaKernelDensityEstimationExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaKernelDensityEstimationExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    // an RDD of sample data
    JavaRDD<Double> data = jsc.parallelize(
      Arrays.asList(1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0));

    // Construct the density estimator with the sample data
    // and a standard deviation for the Gaussian kernels
    KernelDensity kd = new KernelDensity().setSample(data).setBandwidth(3.0);

    // Find density estimates for the given values
    double[] densities = kd.estimate(new double[]{-1.0, 2.0, 5.0});

    System.out.println(Arrays.toString(densities));
    // $example off$

    jsc.stop();
  }
 
Example 5
Source File: JavaHypothesisTestingKolmogorovSmirnovTestExample.java    From SparkDemo with MIT License 6 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf =
      new SparkConf().setAppName("JavaHypothesisTestingKolmogorovSmirnovTestExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    JavaDoubleRDD data = jsc.parallelizeDoubles(Arrays.asList(0.1, 0.15, 0.2, 0.3, 0.25));
    KolmogorovSmirnovTestResult testResult =
      Statistics.kolmogorovSmirnovTest(data, "norm", 0.0, 1.0);
    // summary of the test including the p-value, test statistic, and null hypothesis
    // if our p-value indicates significance, we can reject the null hypothesis
    System.out.println(testResult);
    // $example off$

    jsc.stop();
  }
 
Example 6
Source File: JavaCorrelationsExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaCorrelationsExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    JavaDoubleRDD seriesX = jsc.parallelizeDoubles(
      Arrays.asList(1.0, 2.0, 3.0, 3.0, 5.0));  // a series

    // must have the same number of partitions and cardinality as seriesX
    JavaDoubleRDD seriesY = jsc.parallelizeDoubles(
      Arrays.asList(11.0, 22.0, 33.0, 33.0, 555.0));

    // compute the correlation using Pearson's method. Enter "spearman" for Spearman's method.
    // If a method is not specified, Pearson's method will be used by default.
    Double correlation = Statistics.corr(seriesX.srdd(), seriesY.srdd(), "pearson");
    System.out.println("Correlation is: " + correlation);

    // note that each Vector is a row and not a column
    JavaRDD<Vector> data = jsc.parallelize(
      Arrays.asList(
        Vectors.dense(1.0, 10.0, 100.0),
        Vectors.dense(2.0, 20.0, 200.0),
        Vectors.dense(5.0, 33.0, 366.0)
      )
    );

    // calculate the correlation matrix using Pearson's method.
    // Use "spearman" for Spearman's method.
    // If a method is not specified, Pearson's method will be used by default.
    Matrix correlMatrix = Statistics.corr(data.rdd(), "pearson");
    System.out.println(correlMatrix.toString());
    // $example off$

    jsc.stop();
  }
 
Example 7
Source File: ElasticsearchDependenciesJob.java    From spark-dependencies with Apache License 2.0 5 votes vote down vote up
void run(String[] spanIndices, String[] depIndices,String peerServiceTag) {
  JavaSparkContext sc = new JavaSparkContext(conf);
  try {
    for (int i = 0; i < spanIndices.length; i++) {
      String spanIndex = spanIndices[i];
      String depIndex = depIndices[i];
      log.info("Running Dependencies job for {}, reading from {} index, result storing to {}", day, spanIndex, depIndex);
      // Send raw query to ES to select only the docs / spans we want to consider for this job
      // This doesn't change the default behavior as the daily indexes only contain up to 24h of data
      String esQuery = String.format("{\"range\": {\"startTimeMillis\": { \"gte\": \"now-%s\" }}}", spanRange);
      JavaPairRDD<String, Iterable<Span>> traces = JavaEsSpark.esJsonRDD(sc, spanIndex, esQuery)
          .map(new ElasticTupleToSpan())
          .groupBy(Span::getTraceId);
      List<Dependency> dependencyLinks = DependenciesSparkHelper.derive(traces,peerServiceTag);
      EsMajorVersion esMajorVersion = getEsVersion();
      // Add type for ES < 7
      // WARN log is produced for older ES versions, however it's produced by spark-es library and not ES itself, it cannot be disabled
      //  WARN Resource: Detected type name in resource [jaeger-dependencies-2019-08-14/dependencies]. Type names are deprecated and will be removed in a later release.
      if (esMajorVersion.before(EsMajorVersion.V_7_X)) {
        depIndex = depIndex + "/dependencies";
      }
      store(sc, dependencyLinks, depIndex);
      log.info("Done, {} dependency objects created", dependencyLinks.size());
      if (dependencyLinks.size() > 0) {
        // we do not derive dependencies for old prefix "prefix:" if new prefix "prefix-" contains data
        break;
      }
    }
  } finally {
    sc.stop();
  }
}
 
Example 8
Source File: JavaDemo.java    From spark-on-cassandra-quickstart with Apache License 2.0 5 votes vote down vote up
private void run() {
    JavaSparkContext sc = new JavaSparkContext(conf);
    generateData(sc);
    compute(sc);
    showResults(sc);
    sc.stop();
}
 
Example 9
Source File: SplitFasta.java    From ViraPipe with MIT License 5 votes vote down vote up
public static void main(String[] args) throws IOException {
    Options options = new Options();
    Option pathOpt = new Option( "in", true, "Path to fastq file in hdfs." );
    Option opOpt = new Option( "out", true, "HDFS path for output files. If not present, the output files are not moved to HDFS." );
    options.addOption(  new Option( "partitions", "Divide or merge to n partitions" ) );
    options.addOption( pathOpt );
    options.addOption( opOpt );

    CommandLineParser parser = new BasicParser();
    CommandLine cmd = null;
    try {
        // parse the command line arguments
        cmd = parser.parse( options, args );

    }
    catch( ParseException exp ) {
        // oops, something went wrong
        System.err.println( "Parsing failed.  Reason: " + exp.getMessage() );
    }

    String out = (cmd.hasOption("out")==true)? cmd.getOptionValue("out"):null;
    String in = (cmd.hasOption("in")==true)? cmd.getOptionValue("in"):null;
    String partitions = (cmd.hasOption("partitions")==true)? cmd.getOptionValue("partitions"):null;

    SparkConf conf = new SparkConf().setAppName("SplitFasta");
    JavaSparkContext sc = new JavaSparkContext(conf);
    sc.hadoopConfiguration().set("textinputformat.record.delimiter", ">");

    JavaRDD<String> rdd = sc.textFile(in);
    JavaRDD<String> crdd = rdd.map(v->">"+v.trim()).repartition(Integer.valueOf(partitions));

    crdd.saveAsTextFile(out);
    sc.stop();
}
 
Example 10
Source File: JavaEmbeddedIgniteRDDSelfTest.java    From ignite with Apache License 2.0 5 votes vote down vote up
/**
 * @throws Exception If failed.
 */
@Test
public void testStoreDataToIgnite() throws Exception {
    JavaSparkContext sc = createContext();

    JavaIgniteContext<String, String> ic = null;

    try {
        ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);

        ic.fromCache(PARTITIONED_CACHE_NAME)
            .savePairs(sc.parallelize(F.range(0, KEYS_CNT), GRID_CNT).mapToPair(TO_PAIR_F), true, false);

        Ignite ignite = ic.ignite();

        IgniteCache<String, String> cache = ignite.cache(PARTITIONED_CACHE_NAME);

        for (int i = 0; i < KEYS_CNT; i++) {
            String val = cache.get(String.valueOf(i));

            assertNotNull("Value was not put to cache for key: " + i, val);
            assertEquals("Invalid value stored for key: " + i, "val" + i, val);
        }
    }
    finally {
        if (ic != null)
            ic.close(true);

        sc.stop();
    }
}
 
Example 11
Source File: BatchProcessor.java    From lambda-arch with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    Properties prop = PropertyFileReader.readPropertyFile("iot-spark.properties");
    String file = prop.getProperty("com.iot.app.hdfs") + "iot-data-parque";
    String[] jars = {prop.getProperty("com.iot.app.jar")};

    JavaSparkContext sparkContext = getSparkContext(prop, jars);
    SQLContext sqlContext = new SQLContext(sparkContext);
    Dataset<Row> dataFrame = getDataFrame(sqlContext, file);
    JavaRDD<IoTData> rdd = dataFrame.javaRDD().map(getRowIoTDataFunction());
    BatchHeatMapProcessor processor = new BatchHeatMapProcessor();
    processor.processHeatMap(rdd);
    sparkContext.close();
    sparkContext.stop();
}
 
Example 12
Source File: JavaSVDExample.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) {
  SparkConf conf = new SparkConf().setAppName("SVD Example");
  SparkContext sc = new SparkContext(conf);
  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(sc);

  // $example on$
  double[][] array = {{1.12, 2.05, 3.12}, {5.56, 6.28, 8.94}, {10.2, 8.0, 20.5}};
  LinkedList<Vector> rowsList = new LinkedList<>();
  for (int i = 0; i < array.length; i++) {
    Vector currentRow = Vectors.dense(array[i]);
    rowsList.add(currentRow);
  }
  JavaRDD<Vector> rows = jsc.parallelize(rowsList);

  // Create a RowMatrix from JavaRDD<Vector>.
  RowMatrix mat = new RowMatrix(rows.rdd());

  // Compute the top 3 singular values and corresponding singular vectors.
  SingularValueDecomposition<RowMatrix, Matrix> svd = mat.computeSVD(3, true, 1.0E-9d);
  RowMatrix U = svd.U();
  Vector s = svd.s();
  Matrix V = svd.V();
  // $example off$
  Vector[] collectPartitions = (Vector[]) U.rows().collect();
  System.out.println("U factor is:");
  for (Vector vector : collectPartitions) {
    System.out.println("\t" + vector);
  }
  System.out.println("Singular values are: " + s);
  System.out.println("V factor is:\n" + V);

  jsc.stop();
}
 
Example 13
Source File: JavaGradientBoostingClassificationExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
  // $example on$
  SparkConf sparkConf = new SparkConf()
    .setAppName("JavaGradientBoostedTreesClassificationExample");
  JavaSparkContext jsc = new JavaSparkContext(sparkConf);

  // Load and parse the data file.
  String datapath = "data/mllib/sample_libsvm_data.txt";
  JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
  // Split the data into training and test sets (30% held out for testing)
  JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
  JavaRDD<LabeledPoint> trainingData = splits[0];
  JavaRDD<LabeledPoint> testData = splits[1];

  // Train a GradientBoostedTrees model.
  // The defaultParams for Classification use LogLoss by default.
  BoostingStrategy boostingStrategy = BoostingStrategy.defaultParams("Classification");
  boostingStrategy.setNumIterations(3); // Note: Use more iterations in practice.
  boostingStrategy.getTreeStrategy().setNumClasses(2);
  boostingStrategy.getTreeStrategy().setMaxDepth(5);
  // Empty categoricalFeaturesInfo indicates all features are continuous.
  Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
  boostingStrategy.treeStrategy().setCategoricalFeaturesInfo(categoricalFeaturesInfo);

  final GradientBoostedTreesModel model =
    GradientBoostedTrees.train(trainingData, boostingStrategy);

  // Evaluate model on test instances and compute test error
  JavaPairRDD<Double, Double> predictionAndLabel =
    testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
      @Override
      public Tuple2<Double, Double> call(LabeledPoint p) {
        return new Tuple2<>(model.predict(p.features()), p.label());
      }
    });
  Double testErr =
    1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
      @Override
      public Boolean call(Tuple2<Double, Double> pl) {
        return !pl._1().equals(pl._2());
      }
    }).count() / testData.count();
  System.out.println("Test Error: " + testErr);
  System.out.println("Learned classification GBT model:\n" + model.toDebugString());

  // Save and load model
  model.save(jsc.sc(), "target/tmp/myGradientBoostingClassificationModel");
  GradientBoostedTreesModel sameModel = GradientBoostedTreesModel.load(jsc.sc(),
    "target/tmp/myGradientBoostingClassificationModel");
  // $example off$

  jsc.stop();
}
 
Example 14
Source File: SamToFastq.java    From ViraPipe with MIT License 4 votes vote down vote up
public static void main(String[] args) throws IOException {
  SparkConf conf = new SparkConf().setAppName("SamToFastq");
  sc = new JavaSparkContext(conf);

  String in = args[0];
  String out = args[1];

  JavaPairRDD<LongWritable, SAMRecordWritable> bamPairRDD = sc.newAPIHadoopFile(in, AnySAMInputFormat.class, LongWritable.class, SAMRecordWritable.class, sc.hadoopConfiguration());
  //Map to SAMRecord RDD
  JavaRDD<SAMRecord> samRDD = bamPairRDD.map(v1 -> v1._2().get());

  JavaPairRDD<Text, SequencedFragment> fastqrdd = mapSAMRecordsToFastq(samRDD);

  fastqrdd.saveAsNewAPIHadoopFile(out, Text.class, SequencedFragment.class, FastqOutputFormat.class, sc.hadoopConfiguration());

  sc.stop();

}
 
Example 15
Source File: JavaRandomForestClassificationExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {
  // $example on$
  SparkConf sparkConf = new SparkConf().setAppName("JavaRandomForestClassificationExample");
  JavaSparkContext jsc = new JavaSparkContext(sparkConf);
  // Load and parse the data file.
  String datapath = "data/mllib/sample_libsvm_data.txt";
  JavaRDD<LabeledPoint> data = MLUtils.loadLibSVMFile(jsc.sc(), datapath).toJavaRDD();
  // Split the data into training and test sets (30% held out for testing)
  JavaRDD<LabeledPoint>[] splits = data.randomSplit(new double[]{0.7, 0.3});
  JavaRDD<LabeledPoint> trainingData = splits[0];
  JavaRDD<LabeledPoint> testData = splits[1];

  // Train a RandomForest model.
  // Empty categoricalFeaturesInfo indicates all features are continuous.
  Integer numClasses = 2;
  HashMap<Integer, Integer> categoricalFeaturesInfo = new HashMap<>();
  Integer numTrees = 3; // Use more in practice.
  String featureSubsetStrategy = "auto"; // Let the algorithm choose.
  String impurity = "gini";
  Integer maxDepth = 5;
  Integer maxBins = 32;
  Integer seed = 12345;

  final RandomForestModel model = RandomForest.trainClassifier(trainingData, numClasses,
    categoricalFeaturesInfo, numTrees, featureSubsetStrategy, impurity, maxDepth, maxBins,
    seed);

  // Evaluate model on test instances and compute test error
  JavaPairRDD<Double, Double> predictionAndLabel =
    testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
      @Override
      public Tuple2<Double, Double> call(LabeledPoint p) {
        return new Tuple2<>(model.predict(p.features()), p.label());
      }
    });
  Double testErr =
    1.0 * predictionAndLabel.filter(new Function<Tuple2<Double, Double>, Boolean>() {
      @Override
      public Boolean call(Tuple2<Double, Double> pl) {
        return !pl._1().equals(pl._2());
      }
    }).count() / testData.count();
  System.out.println("Test Error: " + testErr);
  System.out.println("Learned classification forest model:\n" + model.toDebugString());

  // Save and load model
  model.save(jsc.sc(), "target/tmp/myRandomForestClassificationModel");
  RandomForestModel sameModel = RandomForestModel.load(jsc.sc(),
    "target/tmp/myRandomForestClassificationModel");
  // $example off$

  jsc.stop();
}
 
Example 16
Source File: JavaEmbeddedIgniteRDDSelfTest.java    From ignite with Apache License 2.0 4 votes vote down vote up
/**
 * @throws Exception If failed.
 */
@Test
public void testQueryFieldsFromIgnite() throws Exception {
    JavaSparkContext sc = createContext();

    JavaIgniteContext<String, Entity> ic = null;

    try {
        ic = new JavaIgniteContext<>(sc, new IgniteConfigProvider(), false);

        JavaIgniteRDD<String, Entity> cache = ic.fromCache(PARTITIONED_CACHE_NAME);

        cache.savePairs(sc.parallelize(F.range(0, 1001), GRID_CNT).mapToPair(INT_TO_ENTITY_F), true, false);

        Dataset<Row> df =
            cache.sql("select id, name, salary from Entity where name = ? and salary = ?", "name50", 5000);

        df.printSchema();

        Row[] res = (Row[])df.collect();

        assertEquals("Invalid result length", 1, res.length);
        assertEquals("Invalid result", 50, res[0].get(0));
        assertEquals("Invalid result", "name50", res[0].get(1));
        assertEquals("Invalid result", 5000, res[0].get(2));

        Column exp = new Column("NAME").equalTo("name50").and(new Column("SALARY").equalTo(5000));

        Dataset<Row> df0 = cache.sql("select id, name, salary from Entity").where(exp);

        df.printSchema();

        Row[] res0 = (Row[])df0.collect();

        assertEquals("Invalid result length", 1, res0.length);
        assertEquals("Invalid result", 50, res0[0].get(0));
        assertEquals("Invalid result", "name50", res0[0].get(1));
        assertEquals("Invalid result", 5000, res0[0].get(2));

        assertEquals("Invalid count", 500, cache.sql("select id from Entity where id > 500").count());
    }
    finally {
        if (ic != null)
            ic.close(true);

        sc.stop();
    }
}
 
Example 17
Source File: TestCompareParameterAveragingSparkVsSingleMachine.java    From deeplearning4j with Apache License 2.0 4 votes vote down vote up
@Test
    public void testAverageEveryStepGraphCNN() {
        //Idea: averaging every step with SGD (SGD updater + optimizer) is mathematically identical to doing the learning
        // on a single machine for synchronous distributed training
        //BUT: This is *ONLY* the case if all workers get an identical number of examples. This won't be the case if
        // we use RDD.randomSplit (which is what occurs if we use .fit(JavaRDD<DataSet> on a data set that needs splitting),
        // which might give a number of examples that isn't divisible by number of workers (like 39 examples on 4 executors)
        //This is also ONLY the case using SGD updater

        int miniBatchSizePerWorker = 10;
        int nWorkers = 4;


        for (boolean saveUpdater : new boolean[] {true, false}) {
            JavaSparkContext sc = getContext(nWorkers);

            try {
                //Do training locally, for 3 minibatches
                int[] seeds = {1, 2, 3};

                ComputationGraph net = new ComputationGraph(getGraphConfCNN(12345, new Sgd(0.5)));
                net.init();
                INDArray initialParams = net.params().dup();

                for (int i = 0; i < seeds.length; i++) {
                    DataSet ds = getOneDataSetCNN(miniBatchSizePerWorker * nWorkers, seeds[i]);
                    if (!saveUpdater)
                        net.setUpdater(null);
                    net.fit(ds);
                }
                INDArray finalParams = net.params().dup();

                //Do training on Spark with one executor, for 3 separate minibatches
                TrainingMaster tm = getTrainingMaster(1, miniBatchSizePerWorker, saveUpdater);
                SparkComputationGraph sparkNet = new SparkComputationGraph(sc, getGraphConfCNN(12345, new Sgd(0.5)), tm);
                sparkNet.setCollectTrainingStats(true);
                INDArray initialSparkParams = sparkNet.getNetwork().params().dup();

                for (int i = 0; i < seeds.length; i++) {
                    List<DataSet> list =
                                    getOneDataSetAsIndividalExamplesCNN(miniBatchSizePerWorker * nWorkers, seeds[i]);
                    JavaRDD<DataSet> rdd = sc.parallelize(list);

                    sparkNet.fit(rdd);
                }

//                System.out.println(sparkNet.getSparkTrainingStats().statsAsString());
                sparkNet.getSparkTrainingStats().statsAsString();

                INDArray finalSparkParams = sparkNet.getNetwork().params().dup();

//                System.out.println("Initial (Local) params:  " + Arrays.toString(initialParams.data().asFloat()));
//                System.out.println("Initial (Spark) params:  " + Arrays.toString(initialSparkParams.data().asFloat()));
//                System.out.println("Final (Local) params:    " + Arrays.toString(finalParams.data().asFloat()));
//                System.out.println("Final (Spark) params:    " + Arrays.toString(finalSparkParams.data().asFloat()));
                assertArrayEquals(initialParams.data().asFloat(), initialSparkParams.data().asFloat(), 1e-8f);
                assertArrayEquals(finalParams.data().asFloat(), finalSparkParams.data().asFloat(), 1e-6f);

                double sparkScore = sparkNet.getScore();
                assertTrue(sparkScore > 0.0);

                assertEquals(net.score(), sparkScore, 1e-3);
            } finally {
                sc.stop();
            }
        }
    }
 
Example 18
Source File: SparkKickoff.java    From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License 4 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

        final SparkConf conf = new SparkConf()
                .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
                .setAppName(APPLICATION_NAME)
                .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI);

        JavaSparkContext sparkContext = new JavaSparkContext(conf);

        // business logic                      

        sparkContext.stop();
        sparkContext.close();
    }
 
Example 19
Source File: JavaKMeansExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) {

    SparkConf conf = new SparkConf().setAppName("JavaKMeansExample");
    JavaSparkContext jsc = new JavaSparkContext(conf);

    // $example on$
    // Load and parse data
    String path = "data/mllib/kmeans_data.txt";
    JavaRDD<String> data = jsc.textFile(path);
    JavaRDD<Vector> parsedData = data.map(
      new Function<String, Vector>() {
        public Vector call(String s) {
          String[] sarray = s.split(" ");
          double[] values = new double[sarray.length];
          for (int i = 0; i < sarray.length; i++) {
            values[i] = Double.parseDouble(sarray[i]);
          }
          return Vectors.dense(values);
        }
      }
    );
    parsedData.cache();

    // Cluster the data into two classes using KMeans
    int numClusters = 2;
    int numIterations = 20;
    KMeansModel clusters = KMeans.train(parsedData.rdd(), numClusters, numIterations);

    System.out.println("Cluster centers:");
    for (Vector center: clusters.clusterCenters()) {
      System.out.println(" " + center);
    }
    double cost = clusters.computeCost(parsedData.rdd());
    System.out.println("Cost: " + cost);

    // Evaluate clustering by computing Within Set Sum of Squared Errors
    double WSSSE = clusters.computeCost(parsedData.rdd());
    System.out.println("Within Set Sum of Squared Errors = " + WSSSE);

    // Save and load model
    clusters.save(jsc.sc(), "target/org/apache/spark/JavaKMeansExample/KMeansModel");
    KMeansModel sameModel = KMeansModel.load(jsc.sc(),
      "target/org/apache/spark/JavaKMeansExample/KMeansModel");
    // $example off$

    jsc.stop();
  }
 
Example 20
Source File: FeatureClient.java    From render with GNU General Public License v2.0 3 votes vote down vote up
public void run(final SparkConf conf) throws IOException, URISyntaxException {

        final JavaSparkContext sparkContext = new JavaSparkContext(conf);

        final String sparkAppId = sparkContext.getConf().getAppId();
        final String executorsJson = LogUtilities.getExecutorsApiJson(sparkAppId);

        LOG.info("run: appId is {}, executors data is {}", sparkAppId, executorsJson);

        for (final String pairJsonFileName : parameters.pairJson) {
            generateFeatureListsForPairFile(sparkContext, pairJsonFileName);
        }

        sparkContext.stop();
    }