package spark; import org.apache.commons.lang3.StringUtils; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.FlatMapFunction; import org.apache.spark.api.java.function.Function; import java.util.Arrays; import java.util.Iterator; import java.util.regex.Pattern; /** * Created by 張燿峰 * Spark常见案例 * * @author 孤 * @date 2019/3/15 * @Varsion 1.0 */ public class Chapter4 { private static final Pattern PATTERN = Pattern.compile(" "); /** * 计算RDD中各值的平方 */ public void map(JavaSparkContext sparkContext) { JavaRDD<Integer> num = sparkContext.parallelize(Arrays.asList(1, 2, 3, 4, 5, 6)); //新生成的RDD元素 JavaRDD<Integer> result = num.map(new Function<Integer, Integer>() { @Override public Integer call(Integer v1) throws Exception { return v1 * v1; } }); System.out.println(StringUtils.join(result.collect(),",")); } /** * flatMap分割字符串 */ public void flatMap(JavaSparkContext sparkContext){ JavaRDD<String> lines = sparkContext.parallelize(Arrays.asList("hello world", "hi")); JavaRDD<String> flatMapResult = lines.flatMap(new FlatMapFunction<String, String>() { @Override public Iterator<String> call(String s) throws Exception { return Arrays.asList(PATTERN.split(s)).iterator(); } }); flatMapResult.first(); //结果:hello } public void reduce(JavaSparkContext sparkContext){ JavaRDD<Integer> lines = sparkContext.parallelize(Arrays.asList(1,2,3,4)); JavaRDD<Integer> toLines = sparkContext.parallelize(Arrays.asList(1,2,3,4)); } }