Java Code Examples for org.apache.spark.streaming.api.java.JavaDStream#print()

The following examples show how to use org.apache.spark.streaming.api.java.JavaDStream#print() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SparkStreamDemo.java    From sparkResearch with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    //创建两个核心的本地线程,批处理的间隔为1秒
    SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("sparkStreamIng");
    JavaStreamingContext javaStreamingContext = new JavaStreamingContext(conf, Durations.seconds(1));
    //创建一个连接到IP:localhost,PORT:8080的DStream
    JavaReceiverInputDStream<String> dStream = javaStreamingContext.socketTextStream("localhost", 8080);
    JavaDStream<String> errorLine = dStream.filter(new Function<String, Boolean>() {
        @Override
        public Boolean call(String v1) throws Exception {
            return v1.contains("error");
        }
    });
    //打印包含error的行
    errorLine.print();
    try {
        //开始计算
        javaStreamingContext.start();
        //等待计算完成
        javaStreamingContext.awaitTermination();
    } catch (InterruptedException e) {
        e.printStackTrace();
    }
}
 
Example 2
Source File: StreamingIngestionFileSystemTextFileApp.java    From net.jgp.labs.spark with Apache License 2.0 6 votes vote down vote up
private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
      "NetworkWordCount");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
      .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
      .getInputDirectory());
  msgDataStream.print();

  jssc.start();
  try {
    jssc.awaitTermination();
  } catch (InterruptedException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example 3
Source File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java    From net.jgp.labs.spark with Apache License 2.0 6 votes vote down vote up
private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
      "Streaming Ingestion File System Text File to Dataframe");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
      .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
      .getInputDirectory());

  msgDataStream.print();
  // Create JavaRDD<Row>
  msgDataStream.foreachRDD(new RowProcessor());

  jssc.start();
  try {
    jssc.awaitTermination();
  } catch (InterruptedException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example 4
Source File: FileStreamingEx.java    From Apache-Spark-2x-for-Java-Developers with MIT License 5 votes vote down vote up
public static void main(String[] args) {
   	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
   	//Logger rootLogger = LogManager.getRootLogger();
  		//rootLogger.setLevel(Level.WARN); 
       SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
       String inputDirectory="E:\\hadoop\\streamFolder\\";
    
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
      // streamingContext.checkpoint("E:\\hadoop\\checkpoint");
       Logger rootLogger = LogManager.getRootLogger();
  		rootLogger.setLevel(Level.WARN); 
  		
  		JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
  		streamfile.print();
  		streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
  		
  			   		
  		JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
  	 streamedFile.print();
  		
  	 streamingContext.start();
  	 

       try {
		streamingContext.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
Example 5
Source File: SparkStreamingFromNetworkExample.java    From SparkOnALog with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  if (args.length < 3) {
    System.err.println("Usage: NetworkWordCount <master> <hostname> <port>\n" +
        "In local mode, <master> should be 'local[n]' with n > 1");
    System.exit(1);
  }

  // Create the context with a 1 second batch size
  JavaStreamingContext ssc = new JavaStreamingContext(args[0], "NetworkWordCount",
          new Duration(5000), System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));

  // Create a NetworkInputDStream on target ip:port and count the
  // words in input stream of \n delimited test (eg. generated by 'nc')
  JavaDStream<String> lines = ssc.socketTextStream(args[1], Integer.parseInt(args[2]));
  
  lines.map(new Function<String, String> () {

@Override
public String call(String arg0) throws Exception {
	System.out.println("arg0" + arg0);
	return arg0;
}}).print();
  
  lines.print();
  ssc.start();


}
 
Example 6
Source File: SparkMLTrainingAndScoringOnline.java    From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License 4 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

                System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

                final SparkConf conf = new SparkConf()
                    .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
                    .setAppName(APPLICATION_NAME)
                    .set("spark.sql.caseSensitive", CASE_SENSITIVE);                               

                JavaStreamingContext streamingContext = new JavaStreamingContext(conf,
                    new Duration(BATCH_DURATION_INTERVAL_MS));
                
                JavaInputDStream<ConsumerRecord<String, String>> meetupStream = 
                    KafkaUtils.createDirectStream(
                                streamingContext, 
				LocationStrategies.PreferConsistent(),
                                ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES)
                    );

                JavaDStream<String> meetupStreamValues = 
		    meetupStream.map(v -> {                     
                        return v.value();
                    });

                // Prepare the training data as strings of type: (y,[x1,x2,x3,...,xn])
                // Where n is the number of features, y is a binary label, 
                // and n must be the same for train and test.
                // e.g. "(response, [group_lat, group_long])";
                JavaDStream<String> trainData = meetupStreamValues.map(e -> {
                        
                        JSONParser jsonParser = new JSONParser();
                        JSONObject json = (JSONObject)jsonParser.parse(e);

                        String result = "(" 
                            + (String.valueOf(json.get("response")).equals("yes") ? "1.0,[":"0.0,[") 
                            + ((JSONObject)json.get("group")).get("group_lat") + "," 
                            + ((JSONObject)json.get("group")).get("group_lon")
                            + "])";
                        
                        return result;
                });

                trainData.print();

                JavaDStream<LabeledPoint> labeledPoints = trainData.map(LabeledPoint::parse);
        
                StreamingLogisticRegressionWithSGD streamingLogisticRegressionWithSGD 
			= new StreamingLogisticRegressionWithSGD()
                            .setInitialWeights(Vectors.zeros(2));

                streamingLogisticRegressionWithSGD.trainOn(labeledPoints);

                JavaPairDStream<Double, Vector> values = 
			labeledPoints.mapToPair(f -> new Tuple2<>(f.label(), f.features()));

                streamingLogisticRegressionWithSGD.predictOnValues(values).print();

                // some time later, after outputs have completed
                meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> {        
                    OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges();            

                ((CanCommitOffsets) meetupStream.inputDStream())
                    .commitAsync(offsetRanges, new MeetupOffsetCommitCallback());
                });

                streamingContext.start();
                streamingContext.awaitTermination();
        }
 
Example 7
Source File: JavaStreamingTestExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length != 3) {
    System.err.println("Usage: JavaStreamingTestExample " +
      "<dataDir> <batchDuration> <numBatchesTimeout>");
      System.exit(1);
  }

  String dataDir = args[0];
  Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
  int numBatchesTimeout = Integer.parseInt(args[2]);

  SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
  JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);

  ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());

  // $example on$
  JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(
    new Function<String, BinarySample>() {
      @Override
      public BinarySample call(String line) {
        String[] ts = line.split(",");
        boolean label = Boolean.parseBoolean(ts[0]);
        double value = Double.parseDouble(ts[1]);
        return new BinarySample(label, value);
      }
    });

  StreamingTest streamingTest = new StreamingTest()
    .setPeacePeriod(0)
    .setWindowSize(0)
    .setTestMethod("welch");

  JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
  out.print();
  // $example off$

  // Stop processing if test becomes significant or we time out
  timeoutCounter = numBatchesTimeout;

  out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() {
    @Override
    public void call(JavaRDD<StreamingTestResult> rdd) {
      timeoutCounter -= 1;

      boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() {
        @Override
        public Boolean call(StreamingTestResult v) {
          return v.pValue() < 0.05;
        }
      }).isEmpty();

      if (timeoutCounter <= 0 || anySignificant) {
        rdd.context().stop();
      }
    }
  });

  ssc.start();
  ssc.awaitTermination();
}
 
Example 8
Source File: StreamingIngestionFileSystemTextFileToDataframeApp.java    From net.jgp.labs.spark with Apache License 2.0 4 votes vote down vote up
private void start() {
  // Create a local StreamingContext with two working thread and batch
  // interval of
  // 1 second
  SparkConf conf = new SparkConf().setMaster("local[2]").setAppName(
      "Streaming Ingestion File System Text File to Dataframe");
  JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations
      .seconds(5));

  JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils
      .getInputDirectory());

  msgDataStream.print();
  // Create JavaRDD<Row>
  msgDataStream.foreachRDD(new VoidFunction<JavaRDD<String>>() {
    private static final long serialVersionUID = -590010339928376829L;

    @Override
    public void call(JavaRDD<String> rdd) {
      JavaRDD<Row> rowRDD = rdd.map(new Function<String, Row>() {
        private static final long serialVersionUID = 5167089361335095997L;

        @Override
        public Row call(String msg) {
          Row row = RowFactory.create(msg);
          return row;
        }
      });
      // Create Schema
      StructType schema = DataTypes.createStructType(
          new StructField[] { DataTypes.createStructField("Message",
              DataTypes.StringType, true) });

      // Get Spark 2.0 session
      SparkSession spark = JavaSparkSessionSingleton.getInstance(rdd.context()
          .getConf());
      Dataset<Row> msgDataFrame = spark.createDataFrame(rowRDD, schema);
      msgDataFrame.show();
    }
  });

  jssc.start();
  try {
    jssc.awaitTermination();
  } catch (InterruptedException e) {
    // TODO Auto-generated catch block
    e.printStackTrace();
  }
}
 
Example 9
Source File: Throughput.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	SparkConf conf = new SparkConf().setAppName("throughput").setMaster("local[8]");
	JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(2000));

	JavaReceiverInputDStream<Tuple4<Long, Integer, Long, byte[]>> source = ssc.receiverStream(new Source(StorageLevel.MEMORY_ONLY()));
	JavaPairDStream<Long, Tuple3<Integer, Long, byte[]>> kvsource = source.mapToPair(new PairFunction<Tuple4<Long, Integer, Long, byte[]>, Long, Tuple3<Integer, Long, byte[]>>() {
		@Override
		public Tuple2<Long, Tuple3<Integer, Long, byte[]>> call(Tuple4<Long, Integer, Long, byte[]> longIntegerLongTuple4) throws Exception {
			return new Tuple2<Long, Tuple3<Integer, Long, byte[]>>(longIntegerLongTuple4._1(),
					new Tuple3<Integer, Long, byte[]>(longIntegerLongTuple4._2(), longIntegerLongTuple4._3(), longIntegerLongTuple4._4()));
		}
	});
	JavaDStream<Long> res = kvsource.repartition(3).mapPartitions(new FlatMapFunction<Iterator<Tuple2<Long,Tuple3<Integer,Long,byte[]>>>, Long>() {
		@Override
		public Iterable<Long> call(Iterator<Tuple2<Long, Tuple3<Integer, Long, byte[]>>> tuple2Iterator) throws Exception {
			long start = System.currentTimeMillis();
			long received = 0;
			while(tuple2Iterator.hasNext()) {
				received++;
				Tuple2<Long, Tuple3<Integer, Long, byte[]>> el = tuple2Iterator.next();

				if (el._2()._2() != 0) {
					long lat = System.currentTimeMillis() - el._2()._2();
					System.out.println("Latency " + lat + " ms");
				}
			}
			long sinceMs = (System.currentTimeMillis() - start);

			System.out.println("Finished Batch. Processed "+received+" elements in "+sinceMs+" ms.");

			return new Iterable<Long>() {
				@Override
				public Iterator<Long> iterator() {
					return new Iterator<Long>() {
						@Override
						public boolean hasNext() {
							return false;
						}

						@Override
						public Long next() {
							return 1L;
						}

						@Override
						public void remove() {

						}
					};
				}
			};
		}


	/*	@Override
		public Long call(Tuple2<Long, Tuple3<Integer, Long, byte[]>> v1) throws Exception {
			//	System.out.println("Recevied " + v1);
			if (start == 0) {

			}
			received++;
			if (received % logfreq == 0) {

				if (sinceSec == 0) {
					System.out.println("received " + received + " elements since 0");
					return 0L;
				}
				System.out.println("Received " + received + " elements since " + sinceSec + ". " +
						"Elements per second " + received / sinceSec + ", GB received " + ((received * (8 + 4 + 12)) / 1024 / 1024 / 1024));
			}
			if (v1._2()._2() != 0) {
				long lat = System.currentTimeMillis() - v1._2()._2();
				System.out.println("Latency " + lat + " ms");
			}

			return received;
		} */
	});


	//res.print();
	/*res.foreachRDD(new Function2<JavaRDD<Long>, Time, Void>() {
		@Override
		public Void call(JavaRDD<Long> integerJavaRDD, Time t) throws Exception {
			integerJavaRDD.saveAsTextFile("/home/robert/flink-workdir/flink-perf/out/"+t.toString());
			return null;
		}
	}); */
	res.print();
//	res.print();

	ssc.start();
}