Java Code Examples for org.apache.spark.api.java.JavaSparkContext#addJar()

The following examples show how to use org.apache.spark.api.java.JavaSparkContext#addJar() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkSegmentGenerationJobRunner.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
protected void addDepsJarToDistributedCache(JavaSparkContext sparkContext, String depsJarDir)
    throws IOException {
  if (depsJarDir != null) {
    URI depsJarDirURI = URI.create(depsJarDir);
    if (depsJarDirURI.getScheme() == null) {
      depsJarDirURI = new File(depsJarDir).toURI();
    }
    PinotFS pinotFS = PinotFSFactory.create(depsJarDirURI.getScheme());
    String[] files = pinotFS.listFiles(depsJarDirURI, true);
    for (String file : files) {
      if (!pinotFS.isDirectory(URI.create(file))) {
        if (file.endsWith(".jar")) {
          LOGGER.info("Adding deps jar: {} to distributed cache", file);
          sparkContext.addJar(file);
        }
      }
    }
  }
}
 
Example 2
Source File: PinotSparkJobPreparationHelper.java    From incubator-pinot with Apache License 2.0 6 votes vote down vote up
public static void addDepsJarToDistributedCacheHelper(FileSystem fileSystem, JavaSparkContext sparkContext,
    Path depsJarDir)
    throws IOException {
  FileStatus[] fileStatuses = fileSystem.listStatus(depsJarDir);
  for (FileStatus fileStatus : fileStatuses) {
    if (fileStatus.isDirectory()) {
      addDepsJarToDistributedCacheHelper(fileSystem, sparkContext, fileStatus.getPath());
    } else {
      Path depJarPath = fileStatus.getPath();
      if (depJarPath.getName().endsWith(".jar")) {
        _logger.info("Adding deps jar: {} to distributed cache", depJarPath);
        sparkContext.addJar(depJarPath.toUri().getPath());
      }
    }
  }
}
 
Example 3
Source File: JavaHBaseBulkDeleteExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out.println("JavaHBaseBulkDeleteExample  {master} {tableName} ");
  }

  String master = args[0];
  String tableName = args[1];

  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkDeleteExample");
  jsc.addJar("SparkHBase.jar");

  List<byte[]> list = new ArrayList<byte[]>();
  list.add(Bytes.toBytes("1"));
  list.add(Bytes.toBytes("2"));
  list.add(Bytes.toBytes("3"));
  list.add(Bytes.toBytes("4"));
  list.add(Bytes.toBytes("5"));

  JavaRDD<byte[]> rdd = jsc.parallelize(list);

  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  hbaseContext.bulkDelete(rdd, tableName, new DeleteFunction(), 4);

}
 
Example 4
Source File: TestJavaLocalMainExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void setUp() {
  jsc = new JavaSparkContext("local", "JavaHBaseContextSuite");
  jsc.addJar("SparkHBase.jar");
  
  tempDir = Files.createTempDir();
  tempDir.deleteOnExit();

  htu = HBaseTestingUtility.createLocalHTU();
  try {
    System.out.println("cleaning up test dir");

    htu.cleanupTestDir();

    System.out.println("starting minicluster");

    htu.startMiniZKCluster();
    htu.startMiniHBaseCluster(1, 1);

    System.out.println(" - minicluster started");

    try {
      htu.deleteTable(Bytes.toBytes(tableName));
    } catch (Exception e) {
      System.out.println(" - no table " + tableName + " found");
    }

    System.out.println(" - creating table " + tableName);
    htu.createTable(Bytes.toBytes(tableName), Bytes.toBytes(columnFamily));
    System.out.println(" - created table");
  } catch (Exception e1) {
    throw new RuntimeException(e1);
  }
}
 
Example 5
Source File: JavaHBaseStreamingBulkPutExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseBulkPutExample  {master} {host} {post} {tableName} {columnFamily}");
  }

  String master = args[0];
  String host = args[1];
  String port = args[2];
  String tableName = args[3];
  String columnFamily = args[4];

  System.out.println("master:" + master);
  System.out.println("host:" + host);
  System.out.println("port:" + Integer.parseInt(port));
  System.out.println("tableName:" + tableName);
  System.out.println("columnFamily:" + columnFamily);
  
  SparkConf sparkConf = new SparkConf();
  sparkConf.set("spark.cleaner.ttl", "120000");
  
  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkPutExample");
  jsc.addJar("SparkHBase.jar");
  
  JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000));

  JavaReceiverInputDStream<String> javaDstream = jssc.socketTextStream(host, Integer.parseInt(port));
  
  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  hbaseContext.streamBulkPut(javaDstream, tableName, new PutFunction(), true);
}
 
Example 6
Source File: JavaHBaseBulkGetExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseBulkGetExample  {master} {tableName}");
  }

  String master = args[0];
  String tableName = args[1];

  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkGetExample");
  jsc.addJar("SparkHBase.jar");

  List<byte[]> list = new ArrayList<byte[]>();
  list.add(Bytes.toBytes("1"));
  list.add(Bytes.toBytes("2"));
  list.add(Bytes.toBytes("3"));
  list.add(Bytes.toBytes("4"));
  list.add(Bytes.toBytes("5"));

  JavaRDD<byte[]> rdd = jsc.parallelize(list);

  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  hbaseContext.bulkGet(tableName, 2, rdd, new GetFunction(),
      new ResultFunction());
}
 
Example 7
Source File: JavaHBaseBulkIncrementExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseBulkIncrementExample  {master} {tableName} {columnFamily}");
  }

  String master = args[0];
  String tableName = args[1];
  String columnFamily = args[2];

  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkIncrementExample");
  jsc.addJar("SparkHBase.jar");

  List<String> list = new ArrayList<String>();
  list.add("1," + columnFamily + ",counter,1");
  list.add("2," + columnFamily + ",counter,2");
  list.add("3," + columnFamily + ",counter,3");
  list.add("4," + columnFamily + ",counter,4");
  list.add("5," + columnFamily + ",counter,5");

  JavaRDD<String> rdd = jsc.parallelize(list);

  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  hbaseContext.bulkIncrement(rdd, tableName, new IncrementFunction(), 4);

}
 
Example 8
Source File: JavaHBaseBulkPutExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseBulkPutExample  {master} {tableName} {columnFamily}");
  }

  String master = args[0];
  String tableName = args[1];
  String columnFamily = args[2];

  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkPutExample");
  jsc.addJar("SparkHBase.jar");

  List<String> list = new ArrayList<String>();
  list.add("1," + columnFamily + ",a,1");
  list.add("2," + columnFamily + ",a,2");
  list.add("3," + columnFamily + ",a,3");
  list.add("4," + columnFamily + ",a,4");
  list.add("5," + columnFamily + ",a,5");

  JavaRDD<String> rdd = jsc.parallelize(list);

  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  hbaseContext.bulkPut(rdd, tableName, new PutFunction(), true);
}
 
Example 9
Source File: JavaHBaseDistributedScan.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseDistributedScan  {master} {tableName}");
  }

  String master = args[0];
  String tableName = args[1];

  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseDistributedScan");
  jsc.addJar("SparkHBase.jar");


  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  Scan scan = new Scan();
  scan.setCaching(100);
  
  JavaRDD<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> javaRdd = hbaseContext.hbaseRDD(tableName, scan);
  
  List<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> results = javaRdd.collect();
  
  results.size();
}
 
Example 10
Source File: SparkSessionRollup.java    From aerospike-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    com.aerospike.client.Log.setCallback(new AerospikeLogger());
    com.aerospike.client.Log.setLevel(com.aerospike.client.Log.Level.DEBUG);
    
    SparkConf conf = new SparkConf()
        .setAppName(appName)
        .set("spark.executor.memory", "2g")
        .setMaster(master);
    JavaSparkContext sc = new JavaSparkContext(conf);
    sc.addJar("build/libs/spark_session_rollup.jar");

    JavaRDD<String> entries = sc.textFile("hdfs://localhost:54310/tmp/input");

    JavaPairRDD<Long, Iterable<Long>> userhits =
        entries.mapToPair(new ExtractHits()).groupByKey();

    JavaPairRDD<String, Session> sessions =
        userhits.flatMapToPair(new FindSessions());

    System.err.println(sessions.count());

    JobConf job = new JobConf();
    job.setOutputKeyClass(String.class);
    job.setOutputValueClass(Session.class);
    job.setOutputFormat(SessionOutputFormat.class);

    AerospikeConfigUtil.setOutputHost(job, "localhost");
    AerospikeConfigUtil.setOutputPort(job, 3000);
    AerospikeConfigUtil.setOutputNamespace(job, "test");
    AerospikeConfigUtil.setOutputSetName(job, "sessions3");

    sessions.saveAsHadoopDataset(job);
}
 
Example 11
Source File: GraknSparkComputer.java    From grakn with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
protected void loadJar(final Configuration hadoopConfiguration, final File file, final Object... params) {
    JavaSparkContext sparkContext = (JavaSparkContext) params[0];
    sparkContext.addJar(file.getAbsolutePath());
}
 
Example 12
Source File: SparkGraphComputer.java    From tinkerpop with Apache License 2.0 4 votes vote down vote up
@Override
protected void loadJar(final Configuration hadoopConfiguration, final File file, final Object... params) {
    final JavaSparkContext sparkContext = (JavaSparkContext) params[0];
    sparkContext.addJar(file.getAbsolutePath());
}
 
Example 13
Source File: JavaHBaseMapGetPutExample.java    From learning-hadoop with Apache License 2.0 4 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseBulkGetExample  {master} {tableName}");
  }

  String master = args[0];
  String tableName = args[1];

  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkGetExample");
  jsc.addJar("SparkHBase.jar");

  List<byte[]> list = new ArrayList<byte[]>();
  list.add(Bytes.toBytes("1"));
  list.add(Bytes.toBytes("2"));
  list.add(Bytes.toBytes("3"));
  list.add(Bytes.toBytes("4"));
  list.add(Bytes.toBytes("5"));

  //All Spark
  JavaRDD<byte[]> rdd = jsc.parallelize(list);

  //All HBase
  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  //This is me
  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  //This is me
  hbaseContext.foreachPartition(rdd, null);
  
  hbaseContext.foreach(rdd, new VoidFunction<Tuple2<byte[], HConnection>>() {


    public void call(Tuple2<byte[], HConnection> t)
        throws Exception {
      HTableInterface table1 = t._2.getTable(Bytes.toBytes("Foo"));
      
      byte[] b = t._1;
      Result r = table1.get(new Get(b));
      if (r.getExists()) {
        table1.put(new Put(b));
      }
      
    }
  });
  
}