Java Code Examples for org.apache.spark.api.java.JavaSparkContext.fromSparkContext()

The following are Jave code examples for showing how to use fromSparkContext() of the org.apache.spark.api.java.JavaSparkContext class. You can vote up the examples you like. Your votes will be used in our system to get more good examples.
Example 1
Project: MinoanER   File: EntityBasedCNPMapPhaseTest.java   Source Code and License Vote up 6 votes
@Before
public void setUp() {
    System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Documents\\hadoop_home"); //only for local mode
    
    spark = SparkSession.builder()
        .appName("test") 
        .config("spark.sql.warehouse.dir", "/file:/tmp")                
        .config("spark.executor.instances", 1)
        .config("spark.executor.cores", 1)
        .config("spark.executor.memory", "1G")            
        .config("spark.driver.maxResultSize", "1g")
        .config("spark.master", "local")
        .getOrCreate();        
    
    
    
    jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); 
}
 
Example 2
Project: MinoanER   File: BlocksFromEntityIndexTest.java   Source Code and License Vote up 6 votes
@Before
public void setUp() {
    System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Documents\\hadoop_home"); //only for local mode
    
    spark = SparkSession.builder()
        .appName("test") 
        .config("spark.sql.warehouse.dir", "/file:/tmp")                
        .config("spark.executor.instances", 1)
        .config("spark.executor.cores", 1)
        .config("spark.executor.memory", "1G")            
        .config("spark.driver.maxResultSize", "1g")
        .config("spark.master", "local")
        .getOrCreate();        
    
    
    
    jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); 
}
 
Example 3
Project: MinoanER   File: BlockFilteringAdvancedTest.java   Source Code and License Vote up 6 votes
@Before
public void setUp() {        
    System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Documents\\hadoop_home"); //only for local mode
    
    spark = SparkSession.builder()
        .appName("test") 
        .config("spark.sql.warehouse.dir", "/file:/tmp")                
        .config("spark.executor.instances", 1)
        .config("spark.executor.cores", 1)
        .config("spark.executor.memory", "1G")            
        .config("spark.driver.maxResultSize", "1g")
        .config("spark.master", "local")
        .getOrCreate();        
    
    
    
    jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); 
}
 
Example 4
Project: oryx2   File: AbstractSparkLayer.java   Source Code and License Vote up 5 votes
protected final JavaStreamingContext buildStreamingContext() {
  log.info("Starting SparkContext with interval {} seconds", generationIntervalSec);

  SparkConf sparkConf = new SparkConf();

  // Only for tests, really
  if (sparkConf.getOption("spark.master").isEmpty()) {
    log.info("Overriding master to {} for tests", streamingMaster);
    sparkConf.setMaster(streamingMaster);
  }
  // Only for tests, really
  if (sparkConf.getOption("spark.app.name").isEmpty()) {
    String appName = "Oryx" + getLayerName();
    if (id != null) {
      appName = appName + "-" + id;
    }
    log.info("Overriding app name to {} for tests", appName);
    sparkConf.setAppName(appName);
  }
  extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString()));

  // Turn this down to prevent long blocking at shutdown
  sparkConf.setIfMissing(
      "spark.streaming.gracefulStopTimeout",
      Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS)));
  sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec));
  long generationIntervalMS =
      TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS);

  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
  return new JavaStreamingContext(jsc, new Duration(generationIntervalMS));
}
 
Example 5
Project: oryx2   File: AbstractSparkIT.java   Source Code and License Vote up 5 votes
@BeforeClass
public static void setUp() {
  SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("SparkIT");
  javaSparkContext = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
}
 
Example 6
Project: MinoanER   File: MetaBlockingOnlyValuesCBS.java   Source Code and License Vote up 4 votes
public static void main(String[] args) {
    String tmpPath;
    String master;
    String inputPath;        
    String outputPath;
    
    if (args.length == 0) {
        System.setProperty("hadoop.home.dir", "C:\\Users\\VASILIS\\Documents\\hadoop_home"); //only for local mode
        
        tmpPath = "/file:C:\\tmp";
        master = "local[2]";
        inputPath = "/file:C:\\Users\\VASILIS\\Documents\\OAEI_Datasets\\exportedBlocks\\testInput";            
        outputPath = "/file:C:\\Users\\VASILIS\\Documents\\OAEI_Datasets\\exportedBlocks\\testOutput";            
    } else {            
        tmpPath = "/file:/tmp/";
        //master = "spark://master:7077";
        inputPath = args[0];            
        outputPath = args[1];
        // delete existing output directories
        try {                                
            Utils.deleteHDFSPath(outputPath);
        } catch (IOException | URISyntaxException ex) {
            Logger.getLogger(MetaBlockingOnlyValuesCBS.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    String appName = "MetaBlocking CBS only values on "+inputPath.substring(inputPath.lastIndexOf("/", inputPath.length()-2)+1);
    SparkSession spark = Utils.setUpSpark(appName, 288, 8, 3, tmpPath);
    int PARALLELISM = spark.sparkContext().getConf().getInt("spark.default.parallelism", 152);        
    JavaSparkContext jsc = JavaSparkContext.fromSparkContext(spark.sparkContext()); 
    
    
    ////////////////////////
    //start the processing//
    ////////////////////////
    
    //Block Filtering
    System.out.println("\n\nStarting BlockFiltering, reading from "+inputPath);
    LongAccumulator BLOCK_ASSIGNMENTS_ACCUM = jsc.sc().longAccumulator();
    BlockFilteringAdvanced bf = new BlockFilteringAdvanced();
    JavaPairRDD<Integer,IntArrayList> entityIndex = bf.run(jsc.textFile(inputPath), BLOCK_ASSIGNMENTS_ACCUM); 
    entityIndex.cache();        
            
    //Blocks From Entity Index
    System.out.println("\n\nStarting BlocksFromEntityIndex...");
            
    LongAccumulator CLEAN_BLOCK_ACCUM = jsc.sc().longAccumulator();
    LongAccumulator NUM_COMPARISONS_ACCUM = jsc.sc().longAccumulator();
    
    BlocksFromEntityIndex bFromEI = new BlocksFromEntityIndex();
    JavaPairRDD<Integer, IntArrayList> blocksFromEI = bFromEI.run(entityIndex, CLEAN_BLOCK_ACCUM, NUM_COMPARISONS_ACCUM);
    blocksFromEI.persist(StorageLevel.DISK_ONLY());
    
    blocksFromEI.count(); //the simplest action just to run blocksFromEI and get the actual value for the counters below
    
    double BCin = (double) BLOCK_ASSIGNMENTS_ACCUM.value() / entityIndex.count(); //BCin = average number of block assignments per entity
    final int K = Math.max(1, ((Double)Math.floor(BCin)).intValue()); //K = |_BCin -1_|
    System.out.println(BLOCK_ASSIGNMENTS_ACCUM.value()+" block assignments");
    System.out.println(CLEAN_BLOCK_ACCUM.value()+" clean blocks");
    System.out.println(NUM_COMPARISONS_ACCUM.value()+" comparisons");
    System.out.println("BCin = "+BCin);
    System.out.println("K = "+K);
    
    entityIndex.unpersist();
    
    //CNP
    System.out.println("\n\nStarting CNP...");
    CNPCBSValuesOnly cnp = new CNPCBSValuesOnly();
    JavaPairRDD<Integer,IntArrayList> metablockingResults = cnp.run(blocksFromEI, K);
    
    metablockingResults
            .mapValues(x -> x.toString()).saveAsTextFile(outputPath); //only to see the output and add an action (saving to file may not be needed)
    System.out.println("Job finished successfully. Output written in "+outputPath);
}