Java Code Examples for org.apache.spark.api.java.function.Function2
The following examples show how to use
org.apache.spark.api.java.function.Function2. These examples are extracted from open source projects.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: sparkResearch Source File: KafkaStreaming.java License: Apache License 2.0 | 8 votes |
public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("KafkaWordCount").setMaster("local[2]"); JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(10000)); //设置检查点 streamingContext.checkpoint("HDFS URL"); Map<String, Integer> topicThread = new HashMap<>(1); topicThread.put(TOPIC, THREAD); JavaPairInputDStream<String, String> dStream = KafkaUtils.createStream(streamingContext, HOST, GROP, topicThread); JavaDStream<String> words = dStream.flatMap((FlatMapFunction<Tuple2<String, String>, String>) stringStringTuple2 -> Arrays.asList(SPACE.split(stringStringTuple2._2)).iterator()); //统计 JavaPairDStream<String, Integer> result = words.mapToPair((PairFunction<String, String, Integer>) s -> new Tuple2<>(s, 1)).reduceByKey((Function2<Integer, Integer, Integer>) (v1, v2) -> v1 + v2); try { result.print(); streamingContext.start(); streamingContext.awaitTermination(); } catch (InterruptedException e) { e.printStackTrace(); } }
Example 2
Source Project: kylin-on-parquet-v2 Source File: SparkCubingByLayer.java License: Apache License 2.0 | 6 votes |
private Long getRDDCountSum(JavaPairRDD<ByteArray, Object[]> rdd, final int countMeasureIndex) { final ByteArray ONE = new ByteArray(); Long count = rdd.mapValues(new Function<Object[], Long>() { @Override public Long call(Object[] objects) throws Exception { return (Long) objects[countMeasureIndex]; } }).reduce(new Function2<Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>>() { @Override public Tuple2<ByteArray, Long> call(Tuple2<ByteArray, Long> longTuple2, Tuple2<ByteArray, Long> longTuple22) throws Exception { return new Tuple2<>(ONE, longTuple2._2() + longTuple22._2()); } })._2(); return count; }
Example 3
Source Project: kylin-on-parquet-v2 Source File: NGlobalDictionaryV2Test.java License: Apache License 2.0 | 6 votes |
private void runWithSparkBuildGlobalDict(NGlobalDictionaryV2 dict, List<String> stringSet) throws IOException { KylinConfig config = KylinConfig.getInstanceFromEnv(); dict.prepareWrite(); List<Row> rowList = Lists.newLinkedList(); for (String str : stringSet) { rowList.add(RowFactory.create(str)); } Dataset<Row> ds = ss.createDataFrame(rowList, new StructType(new StructField[] { DataTypes.createStructField("col1", DataTypes.StringType, true) })); ds.toJavaRDD().mapToPair((PairFunction<Row, String, String>) row -> { if (row.get(0) == null) return new Tuple2<>(null, null); return new Tuple2<>(row.get(0).toString(), null); }).sortByKey().partitionBy(new HashPartitioner(BUCKET_SIZE)).mapPartitionsWithIndex( (Function2<Integer, Iterator<Tuple2<String, String>>, Iterator<Object>>) (bucketId, tuple2Iterator) -> { NBucketDictionary bucketDict = dict.loadBucketDictionary(bucketId); while (tuple2Iterator.hasNext()) { Tuple2<String, String> tuple2 = tuple2Iterator.next(); bucketDict.addRelativeValue(tuple2._1); } bucketDict.saveBucketDict(bucketId); return Lists.newArrayList().iterator(); }, true).count(); dict.writeMetaDict(BUCKET_SIZE, config.getGlobalDictV2MaxVersions(), config.getGlobalDictV2VersionTTL()); }
Example 4
Source Project: incubator-nemo Source File: ReduceTransform.java License: Apache License 2.0 | 6 votes |
/** * Reduce the iterator elements into a single object. * * @param elements the iterator of elements. * @param func function to apply for reduction. * @param <T> type of the elements. * @return the reduced element. */ @Nullable public static <T> T reduceIterator(final Iterator<T> elements, final Function2<T, T, T> func) { if (!elements.hasNext()) { // nothing to be done return null; } T res = elements.next(); while (elements.hasNext()) { try { res = func.call(res, elements.next()); } catch (Exception e) { throw new RuntimeException(e); } } return res; }
Example 5
Source Project: SparkDemo Source File: Reduce.java License: MIT License | 6 votes |
private static void reduce(JavaSparkContext sc) { List<Integer> numberList=Arrays.asList(1,2,3,4,5,6,7,8,9,10); JavaRDD<Integer> javaRDD = sc.parallelize(numberList); /** * ===================================================== * | 累加求和 | * ===================================================== */ Integer num = javaRDD.reduce(new Function2<Integer, Integer, Integer>() { /** * @param num1上一次计算结果 return的值 * @param num2 当前值 */ @Override public Integer call(Integer num1, Integer num2) throws Exception { // System.out.println(num1+"======"+num2); return num1 + num2; } }); System.out.println(num); sc.close(); }
Example 6
Source Project: vn.vitk Source File: Tokenizer.java License: GNU General Public License v3.0 | 6 votes |
/** * Counts the number of non-space characters in this data set. This utility method * is used to check the tokenization result. * @param lines * @return number of characters */ int numCharacters(JavaRDD<String> lines) { JavaRDD<Integer> lengths = lines.map(new Function<String, Integer>() { private static final long serialVersionUID = -2189399343462982586L; @Override public Integer call(String line) throws Exception { line = line.replaceAll("[\\s_]+", ""); return line.length(); } }); return lengths.reduce(new Function2<Integer, Integer, Integer>() { private static final long serialVersionUID = -8438072946884289401L; @Override public Integer call(Integer e0, Integer e1) throws Exception { return e0 + e1; } }); }
Example 7
Source Project: nemo Source File: ReduceTransform.java License: Apache License 2.0 | 6 votes |
/** * Reduce the iterator elements into a single object. * @param elements the iterator of elements. * @param func function to apply for reduction. * @param <T> type of the elements. * @return the reduced element. */ @Nullable public static <T> T reduceIterator(final Iterator<T> elements, final Function2<T, T, T> func) { if (!elements.hasNext()) { // nothing to be done return null; } T res = elements.next(); while (elements.hasNext()) { try { res = func.call(res, elements.next()); } catch (Exception e) { throw new RuntimeException(e); } } return res; }
Example 8
Source Project: kylin Source File: SparkCubingByLayer.java License: Apache License 2.0 | 6 votes |
private Long getRDDCountSum(JavaPairRDD<ByteArray, Object[]> rdd, final int countMeasureIndex) { final ByteArray ONE = new ByteArray(); Long count = rdd.mapValues(new Function<Object[], Long>() { @Override public Long call(Object[] objects) throws Exception { return (Long) objects[countMeasureIndex]; } }).reduce(new Function2<Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>>() { @Override public Tuple2<ByteArray, Long> call(Tuple2<ByteArray, Long> longTuple2, Tuple2<ByteArray, Long> longTuple22) throws Exception { return new Tuple2<>(ONE, longTuple2._2() + longTuple22._2()); } })._2(); return count; }
Example 9
Source Project: incubator-retired-blur Source File: BlurBulkLoadSparkProcessor.java License: Apache License 2.0 | 6 votes |
@Override protected Function2<JavaPairRDD<String, RowMutation>, Time, Void> getFunction() { return new Function2<JavaPairRDD<String, RowMutation>, Time, Void>() { // Blur Thrift Client @Override public Void call(JavaPairRDD<String, RowMutation> rdd, Time time) throws Exception { Iface client = getBlurClient(); for (Tuple2<String, RowMutation> tuple : rdd.collect()) { if (tuple != null) { try { RowMutation rm = tuple._2; // Index using enqueue mutate call client.enqueueMutate(rm); } catch (Exception ex) { LOG.error("Unknown error while trying to call enqueueMutate.", ex); throw ex; } } } return null; } }; }
Example 10
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public SparkJavaPairRDD<K, V> reduceByKey(final Function2<V, V, V> func) { // Explicit conversion final PairRDDFunctions<K, V> pairRdd = RDD.rddToPairRDDFunctions( rdd, ClassTag$.MODULE$.apply(Object.class), ClassTag$.MODULE$.apply(Object.class), null); final RDD<Tuple2<K, V>> reducedRdd = pairRdd.reduceByKey(func); return SparkJavaPairRDD.fromRDD(reducedRdd); }
Example 11
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public <C> SparkJavaPairRDD<K, C> combineByKey(final Function<V, C> createCombiner, final Function2<C, V, C> mergeValue, final Function2<C, C, C> mergeCombiners, final Partitioner partitioner, final boolean mapSideCombine, final Serializer serializer) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 12
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public <C> SparkJavaPairRDD<K, C> combineByKey(final Function<V, C> createCombiner, final Function2<C, V, C> mergeValue, final Function2<C, C, C> mergeCombiners, final Partitioner partitioner) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 13
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public <C> SparkJavaPairRDD<K, C> combineByKey(final Function<V, C> createCombiner, final Function2<C, V, C> mergeValue, final Function2<C, C, C> mergeCombiners, final int numPartitions) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 14
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public <U> SparkJavaPairRDD<K, U> aggregateByKey(final U zeroValue, final Partitioner partitioner, final Function2<U, V, U> seqFunc, final Function2<U, U, U> combFunc) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 15
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public <U> SparkJavaPairRDD<K, U> aggregateByKey(final U zeroValue, final int numPartitions, final Function2<U, V, U> seqFunc, final Function2<U, U, U> combFunc) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 16
Source Project: SparkDemo Source File: JavaLogQuery.java License: MIT License | 5 votes |
public static void main(String[] args) { SparkSession spark = SparkSession .builder() .appName("JavaLogQuery") .getOrCreate(); JavaSparkContext jsc = new JavaSparkContext(spark.sparkContext()); JavaRDD<String> dataSet = (args.length == 1) ? jsc.textFile(args[0]) : jsc.parallelize(exampleApacheLogs); JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.mapToPair(new PairFunction<String, Tuple3<String, String, String>, Stats>() { @Override public Tuple2<Tuple3<String, String, String>, Stats> call(String s) { return new Tuple2<>(extractKey(s), extractStats(s)); } }); JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey(new Function2<Stats, Stats, Stats>() { @Override public Stats call(Stats stats, Stats stats2) { return stats.merge(stats2); } }); List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect(); for (Tuple2<?,?> t : output) { System.out.println(t._1() + "\t" + t._2()); } spark.stop(); }
Example 17
Source Project: SparkDemo Source File: JavaCustomReceiver.java License: MIT License | 5 votes |
public static void main(String[] args) throws Exception { if (args.length < 2) { System.err.println("Usage: JavaCustomReceiver <hostname> <port>"); System.exit(1); } StreamingExamples.setStreamingLogLevels(); // Create the context with a 1 second batch size SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver"); JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000)); // Create an input stream with the custom receiver on target ip:port and count the // words in input stream of \n delimited text (eg. generated by 'nc') JavaReceiverInputDStream<String> lines = ssc.receiverStream( new JavaCustomReceiver(args[0], Integer.parseInt(args[1]))); JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String x) { return Arrays.asList(SPACE.split(x)).iterator(); } }); JavaPairDStream<String, Integer> wordCounts = words.mapToPair( new PairFunction<String, String, Integer>() { @Override public Tuple2<String, Integer> call(String s) { return new Tuple2<>(s, 1); } }).reduceByKey(new Function2<Integer, Integer, Integer>() { @Override public Integer call(Integer i1, Integer i2) { return i1 + i2; } }); wordCounts.print(); ssc.start(); ssc.awaitTermination(); }
Example 18
Source Project: SparkDemo Source File: MapPartitionsWithIndex.java License: MIT License | 5 votes |
private static void mapPartitionsWithIndex(JavaSparkContext sc) { List<String> names = Arrays.asList("张三1", "李四1", "王五1", "张三2", "李四2", "王五2", "张三3", "李四3", "王五3", "张三4"); // 初始化,分为3个分区 JavaRDD<String> namesRDD = sc.parallelize(names, 3); JavaRDD<String> mapPartitionsWithIndexRDD = namesRDD .mapPartitionsWithIndex(new Function2<Integer, Iterator<String>, Iterator<String>>() { private static final long serialVersionUID = 1L; public Iterator<String> call(Integer v1, Iterator<String> v2) throws Exception { List<String> list = new ArrayList<String>(); while (v2.hasNext()) { list.add("分区索引:" + v1 + "\t" + v2.next()); } return list.iterator(); } }, true); // 从集群获取数据到本地内存中 List<String> result = mapPartitionsWithIndexRDD.collect(); for (String s : result) { System.out.println(s); } sc.close(); }
Example 19
Source Project: rheem Source File: FunctionCompiler.java License: Apache License 2.0 | 5 votes |
/** * Create an appropriate {@link Function} for deploying the given {@link ReduceDescriptor} * on Apache Spark. */ public <T> Function2<T, T, T> compile(ReduceDescriptor<T> descriptor, SparkExecutionOperator operator, OptimizationContext.OperatorContext operatorContext, ChannelInstance[] inputs) { final BinaryOperator<T> javaImplementation = descriptor.getJavaImplementation(); if (javaImplementation instanceof FunctionDescriptor.ExtendedSerializableBinaryOperator) { return new ExtendedBinaryOperatorAdapter<>( (FunctionDescriptor.ExtendedSerializableBinaryOperator<T>) javaImplementation, new SparkExecutionContext(operator, inputs, operatorContext.getOptimizationContext().getIterationNumber()) ); } else { return new BinaryOperatorAdapter<>(javaImplementation); } }
Example 20
Source Project: spark-streaming-direct-kafka Source File: Functions.java License: Apache License 2.0 | 5 votes |
/** * @return a function that returns the second of two values * @param <T> element type */ public static <T> Function2<T,T,T> last() { return new Function2<T,T,T>() { @Override public T call(T current, T next) { return next; } }; }
Example 21
Source Project: nemo Source File: JavaPairRDD.java License: Apache License 2.0 | 5 votes |
@Override public JavaPairRDD<K, V> reduceByKey(final Function2<V, V, V> func) { final DAGBuilder<IRVertex, IREdge> builder = new DAGBuilder<>(dag); final IRVertex reduceByKeyVertex = new OperatorVertex(new ReduceByKeyTransform<K, V>(func)); builder.addVertex(reduceByKeyVertex, loopVertexStack); final IREdge newEdge = new IREdge(getEdgeCommunicationPattern(lastVertex, reduceByKeyVertex), lastVertex, reduceByKeyVertex, new SparkCoder(serializer)); newEdge.setProperty(KeyExtractorProperty.of(new SparkKeyExtractor())); builder.connectVertices(newEdge); return new JavaPairRDD<>(this.sparkContext, builder.buildWithoutSourceSinkCheck(), reduceByKeyVertex); }
Example 22
Source Project: DDF Source File: MLSupporter.java License: Apache License 2.0 | 5 votes |
@Override public long[][] getConfusionMatrix(IModel model, double threshold) throws DDFException { SparkDDF ddf = (SparkDDF) this.getDDF(); SparkDDF predictions = (SparkDDF) ddf.ML.applyModel(model, true, false); // Now get the underlying RDD to compute JavaRDD<double[]> yTrueYPred = (JavaRDD<double[]>) predictions.getJavaRDD(double[].class); final double threshold1 = threshold; long[] cm = yTrueYPred.map(new Function<double[], long[]>() { @Override public long[] call(double[] params) { byte isPos = toByte(params[0] > threshold1); byte predPos = toByte(params[1] > threshold1); long[] result = new long[] { 0L, 0L, 0L, 0L }; result[isPos << 1 | predPos] = 1L; return result; } }).reduce(new Function2<long[], long[], long[]>() { @Override public long[] call(long[] a, long[] b) { return new long[] { a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3] }; } }); return new long[][] { new long[] { cm[3], cm[2] }, new long[] { cm[1], cm[0] } }; }
Example 23
Source Project: examples Source File: CountLines.java License: Apache License 2.0 | 5 votes |
@SuppressWarnings("serial") public static void main(String[] args) { SparkConf sparkConf = new SparkConf().setAppName("JavaHBaseBulkGetExample ").setMaster("local[2]"); JavaSparkContext jsc = new JavaSparkContext(sparkConf); JavaRDD<String> textFile = jsc.textFile("hdfs://localhost/user/cloudera/data.txt"); JavaPairRDD<String, Integer> pairs = textFile.mapToPair(new PairFunction<String, String, Integer>() { public Tuple2<String, Integer> call(String s) { return new Tuple2<String, Integer>(s.substring(0, s.indexOf("|")), 1); } }); JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() { public Integer call(Integer a, Integer b) { return a + b; } }); System.out.println ("We have generaged " + counts.count() + " users"); jsc.close(); }
Example 24
Source Project: BigDataArchitect Source File: WordCountJava.java License: Apache License 2.0 | 4 votes |
public static void main(String[] args) throws FileNotFoundException { SparkConf conf = new SparkConf(); conf.setAppName("java-wordcount"); conf.setMaster("local"); JavaSparkContext jsc = new JavaSparkContext(conf); JavaRDD<String> fileRDD = jsc.textFile("bigdata-spark/data/testdata.txt"); JavaRDD<String> words = fileRDD.flatMap(new FlatMapFunction<String, String>() { public Iterator<String> call(String line) throws Exception { return Arrays.asList(line.split(" ")).iterator(); } }); JavaPairRDD<String, Integer> pairWord = words.mapToPair(new PairFunction<String, String, Integer>() { public Tuple2<String, Integer> call(String word) throws Exception { return new Tuple2<String, Integer>(word, 1); } }); JavaPairRDD<String, Integer> res = pairWord.reduceByKey(new Function2<Integer, Integer, Integer>() { public Integer call(Integer oldV, Integer v) throws Exception { return oldV + v; } }); res.foreach(new VoidFunction<Tuple2<String, Integer>>() { public void call(Tuple2<String, Integer> value) throws Exception { System.out.println(value._1+"\t"+value._2); } }); // // RandomAccessFile rfile = new RandomAccessFile("ooxx","rw"); // //// rfile.seek(222); // FileChannel channel = rfile.getChannel(); // // linux fd write(fd) read(fd) // // // ByteBuffer b1 = ByteBuffer.allocate(1024); // ByteBuffer b2 = ByteBuffer.allocateDirect(1024); // MappedByteBuffer buffer = channel.map(FileChannel.MapMode.READ_WRITE, 80, 120); // }
Example 25
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 4 votes |
@Override public SparkJavaPairRDD<K, V> reduceByKey(final Partitioner partitioner, final Function2<V, V, V> func) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 26
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 4 votes |
@Override public Map<K, V> reduceByKeyLocally(final Function2<V, V, V> func) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 27
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 4 votes |
@Override public <U> SparkJavaPairRDD<K, U> aggregateByKey(final U zeroValue, final Function2<U, V, U> seqFunc, final Function2<U, U, U> combFunc) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 28
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 4 votes |
@Override public SparkJavaPairRDD<K, V> foldByKey(final V zeroValue, final Partitioner partitioner, final Function2<V, V, V> func) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 29
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 4 votes |
@Override public SparkJavaPairRDD<K, V> foldByKey(final V zeroValue, final int numPartitions, final Function2<V, V, V> func) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }
Example 30
Source Project: incubator-nemo Source File: SparkJavaPairRDD.java License: Apache License 2.0 | 4 votes |
@Override public SparkJavaPairRDD<K, V> foldByKey(final V zeroValue, final Function2<V, V, V> func) { throw new UnsupportedOperationException(NOT_YET_SUPPORTED); }