Java Code Examples for org.apache.spark.api.java.JavaSparkContext#addJar()
The following examples show how to use
org.apache.spark.api.java.JavaSparkContext#addJar() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkSegmentGenerationJobRunner.java From incubator-pinot with Apache License 2.0 | 6 votes |
protected void addDepsJarToDistributedCache(JavaSparkContext sparkContext, String depsJarDir) throws IOException { if (depsJarDir != null) { URI depsJarDirURI = URI.create(depsJarDir); if (depsJarDirURI.getScheme() == null) { depsJarDirURI = new File(depsJarDir).toURI(); } PinotFS pinotFS = PinotFSFactory.create(depsJarDirURI.getScheme()); String[] files = pinotFS.listFiles(depsJarDirURI, true); for (String file : files) { if (!pinotFS.isDirectory(URI.create(file))) { if (file.endsWith(".jar")) { LOGGER.info("Adding deps jar: {} to distributed cache", file); sparkContext.addJar(file); } } } } }
Example 2
Source File: PinotSparkJobPreparationHelper.java From incubator-pinot with Apache License 2.0 | 6 votes |
public static void addDepsJarToDistributedCacheHelper(FileSystem fileSystem, JavaSparkContext sparkContext, Path depsJarDir) throws IOException { FileStatus[] fileStatuses = fileSystem.listStatus(depsJarDir); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isDirectory()) { addDepsJarToDistributedCacheHelper(fileSystem, sparkContext, fileStatus.getPath()); } else { Path depJarPath = fileStatus.getPath(); if (depJarPath.getName().endsWith(".jar")) { _logger.info("Adding deps jar: {} to distributed cache", depJarPath); sparkContext.addJar(depJarPath.toUri().getPath()); } } } }
Example 3
Source File: JavaHBaseBulkDeleteExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out.println("JavaHBaseBulkDeleteExample {master} {tableName} "); } String master = args[0]; String tableName = args[1]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkDeleteExample"); jsc.addJar("SparkHBase.jar"); List<byte[]> list = new ArrayList<byte[]>(); list.add(Bytes.toBytes("1")); list.add(Bytes.toBytes("2")); list.add(Bytes.toBytes("3")); list.add(Bytes.toBytes("4")); list.add(Bytes.toBytes("5")); JavaRDD<byte[]> rdd = jsc.parallelize(list); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.bulkDelete(rdd, tableName, new DeleteFunction(), 4); }
Example 4
Source File: TestJavaLocalMainExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void setUp() { jsc = new JavaSparkContext("local", "JavaHBaseContextSuite"); jsc.addJar("SparkHBase.jar"); tempDir = Files.createTempDir(); tempDir.deleteOnExit(); htu = HBaseTestingUtility.createLocalHTU(); try { System.out.println("cleaning up test dir"); htu.cleanupTestDir(); System.out.println("starting minicluster"); htu.startMiniZKCluster(); htu.startMiniHBaseCluster(1, 1); System.out.println(" - minicluster started"); try { htu.deleteTable(Bytes.toBytes(tableName)); } catch (Exception e) { System.out.println(" - no table " + tableName + " found"); } System.out.println(" - creating table " + tableName); htu.createTable(Bytes.toBytes(tableName), Bytes.toBytes(columnFamily)); System.out.println(" - created table"); } catch (Exception e1) { throw new RuntimeException(e1); } }
Example 5
Source File: JavaHBaseStreamingBulkPutExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkPutExample {master} {host} {post} {tableName} {columnFamily}"); } String master = args[0]; String host = args[1]; String port = args[2]; String tableName = args[3]; String columnFamily = args[4]; System.out.println("master:" + master); System.out.println("host:" + host); System.out.println("port:" + Integer.parseInt(port)); System.out.println("tableName:" + tableName); System.out.println("columnFamily:" + columnFamily); SparkConf sparkConf = new SparkConf(); sparkConf.set("spark.cleaner.ttl", "120000"); JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkPutExample"); jsc.addJar("SparkHBase.jar"); JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000)); JavaReceiverInputDStream<String> javaDstream = jssc.socketTextStream(host, Integer.parseInt(port)); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.streamBulkPut(javaDstream, tableName, new PutFunction(), true); }
Example 6
Source File: JavaHBaseBulkGetExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkGetExample {master} {tableName}"); } String master = args[0]; String tableName = args[1]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkGetExample"); jsc.addJar("SparkHBase.jar"); List<byte[]> list = new ArrayList<byte[]>(); list.add(Bytes.toBytes("1")); list.add(Bytes.toBytes("2")); list.add(Bytes.toBytes("3")); list.add(Bytes.toBytes("4")); list.add(Bytes.toBytes("5")); JavaRDD<byte[]> rdd = jsc.parallelize(list); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.bulkGet(tableName, 2, rdd, new GetFunction(), new ResultFunction()); }
Example 7
Source File: JavaHBaseBulkIncrementExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkIncrementExample {master} {tableName} {columnFamily}"); } String master = args[0]; String tableName = args[1]; String columnFamily = args[2]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkIncrementExample"); jsc.addJar("SparkHBase.jar"); List<String> list = new ArrayList<String>(); list.add("1," + columnFamily + ",counter,1"); list.add("2," + columnFamily + ",counter,2"); list.add("3," + columnFamily + ",counter,3"); list.add("4," + columnFamily + ",counter,4"); list.add("5," + columnFamily + ",counter,5"); JavaRDD<String> rdd = jsc.parallelize(list); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.bulkIncrement(rdd, tableName, new IncrementFunction(), 4); }
Example 8
Source File: JavaHBaseBulkPutExample.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkPutExample {master} {tableName} {columnFamily}"); } String master = args[0]; String tableName = args[1]; String columnFamily = args[2]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkPutExample"); jsc.addJar("SparkHBase.jar"); List<String> list = new ArrayList<String>(); list.add("1," + columnFamily + ",a,1"); list.add("2," + columnFamily + ",a,2"); list.add("3," + columnFamily + ",a,3"); list.add("4," + columnFamily + ",a,4"); list.add("5," + columnFamily + ",a,5"); JavaRDD<String> rdd = jsc.parallelize(list); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); hbaseContext.bulkPut(rdd, tableName, new PutFunction(), true); }
Example 9
Source File: JavaHBaseDistributedScan.java From learning-hadoop with Apache License 2.0 | 5 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseDistributedScan {master} {tableName}"); } String master = args[0]; String tableName = args[1]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseDistributedScan"); jsc.addJar("SparkHBase.jar"); Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); Scan scan = new Scan(); scan.setCaching(100); JavaRDD<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> javaRdd = hbaseContext.hbaseRDD(tableName, scan); List<Tuple2<byte[], List<Tuple3<byte[], byte[], byte[]>>>> results = javaRdd.collect(); results.size(); }
Example 10
Source File: SparkSessionRollup.java From aerospike-hadoop with Apache License 2.0 | 5 votes |
public static void main(String[] args) { com.aerospike.client.Log.setCallback(new AerospikeLogger()); com.aerospike.client.Log.setLevel(com.aerospike.client.Log.Level.DEBUG); SparkConf conf = new SparkConf() .setAppName(appName) .set("spark.executor.memory", "2g") .setMaster(master); JavaSparkContext sc = new JavaSparkContext(conf); sc.addJar("build/libs/spark_session_rollup.jar"); JavaRDD<String> entries = sc.textFile("hdfs://localhost:54310/tmp/input"); JavaPairRDD<Long, Iterable<Long>> userhits = entries.mapToPair(new ExtractHits()).groupByKey(); JavaPairRDD<String, Session> sessions = userhits.flatMapToPair(new FindSessions()); System.err.println(sessions.count()); JobConf job = new JobConf(); job.setOutputKeyClass(String.class); job.setOutputValueClass(Session.class); job.setOutputFormat(SessionOutputFormat.class); AerospikeConfigUtil.setOutputHost(job, "localhost"); AerospikeConfigUtil.setOutputPort(job, 3000); AerospikeConfigUtil.setOutputNamespace(job, "test"); AerospikeConfigUtil.setOutputSetName(job, "sessions3"); sessions.saveAsHadoopDataset(job); }
Example 11
Source File: GraknSparkComputer.java From grakn with GNU Affero General Public License v3.0 | 4 votes |
@Override protected void loadJar(final Configuration hadoopConfiguration, final File file, final Object... params) { JavaSparkContext sparkContext = (JavaSparkContext) params[0]; sparkContext.addJar(file.getAbsolutePath()); }
Example 12
Source File: SparkGraphComputer.java From tinkerpop with Apache License 2.0 | 4 votes |
@Override protected void loadJar(final Configuration hadoopConfiguration, final File file, final Object... params) { final JavaSparkContext sparkContext = (JavaSparkContext) params[0]; sparkContext.addJar(file.getAbsolutePath()); }
Example 13
Source File: JavaHBaseMapGetPutExample.java From learning-hadoop with Apache License 2.0 | 4 votes |
public static void main(String args[]) { if (args.length == 0) { System.out .println("JavaHBaseBulkGetExample {master} {tableName}"); } String master = args[0]; String tableName = args[1]; JavaSparkContext jsc = new JavaSparkContext(master, "JavaHBaseBulkGetExample"); jsc.addJar("SparkHBase.jar"); List<byte[]> list = new ArrayList<byte[]>(); list.add(Bytes.toBytes("1")); list.add(Bytes.toBytes("2")); list.add(Bytes.toBytes("3")); list.add(Bytes.toBytes("4")); list.add(Bytes.toBytes("5")); //All Spark JavaRDD<byte[]> rdd = jsc.parallelize(list); //All HBase Configuration conf = HBaseConfiguration.create(); conf.addResource(new Path("/etc/hbase/conf/core-site.xml")); conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml")); //This is me JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf); //This is me hbaseContext.foreachPartition(rdd, null); hbaseContext.foreach(rdd, new VoidFunction<Tuple2<byte[], HConnection>>() { public void call(Tuple2<byte[], HConnection> t) throws Exception { HTableInterface table1 = t._2.getTable(Bytes.toBytes("Foo")); byte[] b = t._1; Result r = table1.get(new Get(b)); if (r.getExists()) { table1.put(new Put(b)); } } }); }