import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.PairFunction; import scala.Tuple2; /** * Created by 張燿峰 * SeqnenceFile操作案例 * * @author 孤 * @date 2019/3/22 * @Varsion 1.0 */ public class SequenceFile { protected static void run(JavaSparkContext sparkContext) { JavaPairRDD<Text, IntWritable> javaPairRDD = sparkContext.sequenceFile("url", Text.class, IntWritable.class); JavaPairRDD<String, Integer> pairRDD = javaPairRDD.mapToPair(new sequenceToConvert()); //写 pairRDD.saveAsHadoopFile("url",Text.class,IntWritable.class,SequenceFileOutputFormat.class); } static class sequenceToConvert implements PairFunction<Tuple2<Text, IntWritable>, String, Integer> { @Override public Tuple2<String, Integer> call(Tuple2<Text, IntWritable> textIntWritableTuple2) { return new Tuple2<>(textIntWritableTuple2._1.toString(), textIntWritableTuple2._2.get()); } } }