Java Code Examples for org.apache.spark.streaming.api.java.JavaStreamingContext

The following are top voted examples for showing how to use org.apache.spark.streaming.api.java.JavaStreamingContext. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: kafka-sandbox   File: SparkStringConsumer.java   Source Code and License 14 votes vote down vote up
public static void main(String[] args) {

        SparkConf conf = new SparkConf()
                .setAppName("kafka-sandbox")
                .setMaster("local[*]");
        JavaSparkContext sc = new JavaSparkContext(conf);
        JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(2000));

        Set<String> topics = Collections.singleton("mytopic");
        Map<String, String> kafkaParams = new HashMap<>();
        kafkaParams.put("metadata.broker.list", "localhost:9092");

        JavaPairInputDStream<String, String> directKafkaStream = KafkaUtils.createDirectStream(ssc,
                String.class, String.class, StringDecoder.class, StringDecoder.class, kafkaParams, topics);

        directKafkaStream.foreachRDD(rdd -> {
            System.out.println("--- New RDD with " + rdd.partitions().size()
                    + " partitions and " + rdd.count() + " records");
            rdd.foreach(record -> System.out.println(record._2));
        });

        ssc.start();
        ssc.awaitTermination();
    }
 
Example 2
Project: gcp   File: Spark4KafkaNew.java   Source Code and License 11 votes vote down vote up
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Kafka-New");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT),
        Collections.singleton(EXAMPLE_TOPIC));

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
Example 3
Project: gcp   File: Spark2Streaming.java   Source Code and License 10 votes vote down vote up
public static void main(String[] args) throws InterruptedException {
  SparkConf sc = new SparkConf().setAppName("POC-Streaming");
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(2000))) {
    //JavaDStream<SampleXML> records = jsc.textFileStream("input/").map(new ParseXML());
    //textFileStream process lines of files, so xml has to be 1 line to work //alternative below

    JavaRDD<String> files = jsc.sparkContext().wholeTextFiles("input/").map(tuple -> tuple._2());
    Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
    rddQueue.add(files);
    JavaDStream<String> records = jsc.queueStream(rddQueue);

    records.foreachRDD(rdd -> System.out.printf("Amount of XMLs: %d\n", rdd.count()));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
Example 4
Project: laughing-octo-sansa   File: TwitterSparkCrawler.java   Source Code and License 9 votes vote down vote up
private void run(CompositeConfiguration conf) {
    // Spark conf
    SparkConf sparkConf = new SparkConf().setAppName("TwitterSparkCrawler").setMaster(conf.getString("spark.master"))
            .set("spark.serializer", conf.getString("spark.serializer"));
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(conf.getLong("stream.duration")));

    // Twitter4J
    // IMPORTANT: put keys in twitter4J.properties
    Configuration twitterConf = ConfigurationContext.getInstance();
    Authorization twitterAuth = AuthorizationFactory.getInstance(twitterConf);

    // Create twitter stream
    String[] filters = { "#Car" };
    TwitterUtils.createStream(jssc, twitterAuth, filters).print();
    // Start the computation
    jssc.start();
    jssc.awaitTermination();
}
 
Example 5
Project: gcp   File: Spark6BigQuery.java   Source Code and License 8 votes vote down vote up
public static void main(String[] args) throws InterruptedException, IOException {
  SparkConf sc = new SparkConf().setAppName("POC-BigQuery");
  
  try(JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {
    JavaPairInputDStream<String, String> stream = KafkaUtils.createDirectStream(
        jsc, String.class, String.class, StringDecoder.class, StringDecoder.class,
        Collections.singletonMap("metadata.broker.list", KAFKA_HOST_PORT), Collections.singleton(EXAMPLE_TOPIC));

    Configuration conf = new Configuration();
    BigQueryConfiguration.configureBigQueryOutput(conf, BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA);
    conf.set("mapreduce.job.outputformat.class", BigQueryOutputFormat.class.getName());

    JavaDStream<ExampleXML> records = stream.map(t -> t._2()).map(new ParseXML());
    records.foreachRDD(rdd -> {
      System.out.printf("Amount of XMLs: %d\n", rdd.count());
      long time = System.currentTimeMillis();
      rdd.mapToPair(new PrepToBQ()).saveAsNewAPIHadoopDataset(conf);
      System.out.printf("Sent to BQ in %fs\n", (System.currentTimeMillis()-time)/1000f);
    });
    
    jsc.start();
    jsc.awaitTermination();
  }
}
 
Example 6
Project: movie-recommender   File: SparkModule.java   Source Code and License 8 votes vote down vote up
@Provides
JavaInputDStream<ConsumerRecord<String, RawRating>> providesKafkaInputStream(JavaStreamingContext streamingContext) {
    Map<String, Object> kafkaParams = new HashedMap();
    kafkaParams.put("bootstrap.servers", "localhost:9092");
    kafkaParams.put("key.deserializer", StringDeserializer.class);
    kafkaParams.put("value.deserializer", JsonDeserializer.class);
    kafkaParams.put("serializedClass", RawRating.class);
    kafkaParams.put("group.id", "rating_stream");
    kafkaParams.put("auto.offset.reset", "latest");
    kafkaParams.put("enable.auto.commit", false);
    Collection<String> topics = Arrays.asList("topicA", "topicB");

    return KafkaUtils.createDirectStream(
            streamingContext,
            LocationStrategies.PreferConsistent(),
            ConsumerStrategies.<String, RawRating>Subscribe(topics, kafkaParams)
    );
}
 
Example 7
Project: beam   File: SparkRunnerStreamingContextFactory.java   Source Code and License 8 votes vote down vote up
private void checkpoint(JavaStreamingContext jssc, CheckpointDir checkpointDir) {
  Path rootCheckpointPath = checkpointDir.getRootCheckpointDir();
  Path sparkCheckpointPath = checkpointDir.getSparkCheckpointDir();
  Path beamCheckpointPath = checkpointDir.getBeamCheckpointDir();

  try {
    FileSystem fileSystem =
        rootCheckpointPath.getFileSystem(jssc.sparkContext().hadoopConfiguration());
    if (!fileSystem.exists(rootCheckpointPath)) {
      fileSystem.mkdirs(rootCheckpointPath);
    }
    if (!fileSystem.exists(sparkCheckpointPath)) {
      fileSystem.mkdirs(sparkCheckpointPath);
    }
    if (!fileSystem.exists(beamCheckpointPath)) {
      fileSystem.mkdirs(beamCheckpointPath);
    }
  } catch (IOException e) {
    throw new RuntimeException("Failed to create checkpoint dir", e);
  }

  jssc.checkpoint(sparkCheckpointPath.toString());
}
 
Example 8
Project: Apache-Spark-2x-for-Java-Developers   File: WordCountRecoverableEx.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws Exception {
	System.setProperty("hadoop.home.dir", "E:\\hadoop");

	final String ip = "10.0.75.1";
	final int port = Integer.parseInt("9000");
	final String checkpointDirectory = "E:\\hadoop\\checkpoint";
	// Function to create JavaStreamingContext without any output operations
	// (used to detect the new context)
	Function0<JavaStreamingContext> createContextFunc = new Function0<JavaStreamingContext>() {
		@Override
		public JavaStreamingContext call() {
			return createContext(ip, port, checkpointDirectory);
		}
	};

	JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(checkpointDirectory, createContextFunc);
	ssc.start();
	ssc.awaitTermination();
}
 
Example 9
Project: SparkToParquet   File: AppMain.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) throws IOException {
	Flags.setFromCommandLineArgs(THE_OPTIONS, args);

	// 初始化Spark Conf.
	SparkConf conf = new SparkConf().setAppName("A SECTONG Application: Apache Log Analysis with Spark");
	JavaSparkContext sc = new JavaSparkContext(conf);
	JavaStreamingContext jssc = new JavaStreamingContext(sc, Flags.getInstance().getSlideInterval());
	SQLContext sqlContext = new SQLContext(sc);

	// 初始化参数
	HashSet<String> topicsSet = new HashSet<String>(Arrays.asList(Flags.getInstance().getKafka_topic().split(",")));
	HashMap<String, String> kafkaParams = new HashMap<String, String>();
	kafkaParams.put("metadata.broker.list", Flags.getInstance().getKafka_broker());

	// 从Kafka Stream获取数据
	JavaPairInputDStream<String, String> messages = KafkaUtils.createDirectStream(jssc, String.class, String.class,
			StringDecoder.class, StringDecoder.class, kafkaParams, topicsSet);

	JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
		private static final long serialVersionUID = 5266880065425088203L;

		public String call(Tuple2<String, String> tuple2) {
			return tuple2._2();
		}
	});

	JavaDStream<ApacheAccessLog> accessLogsDStream = lines.flatMap(line -> {
		List<ApacheAccessLog> list = new ArrayList<>();
		try {
			// 映射每一行
			list.add(ApacheAccessLog.parseFromLogLine(line));
			return list;
		} catch (RuntimeException e) {
			return list;
		}
	}).cache();

	accessLogsDStream.foreachRDD(rdd -> {

		// rdd to DataFrame
		DataFrame df = sqlContext.createDataFrame(rdd, ApacheAccessLog.class);
		// 写入Parquet文件
		df.write().partitionBy("ipAddress", "method", "responseCode").mode(SaveMode.Append).parquet(Flags.getInstance().getParquetFile());

		return null;
	});

	// 启动Streaming服务器
	jssc.start(); // 启动计算
	jssc.awaitTermination(); // 等待终止
}
 
Example 10
Project: arks-api   File: WordCount.java   Source Code and License 7 votes vote down vote up
public static void main(String[] args) 
{
 SparkConf conf = new SparkConf();
 conf.setAppName("Wordcount Background");
 conf.setMaster("local");
  
 
 JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(15));
 
 
 JavaDStream<String> lines = ssc.textFileStream("/home/rahul/DATASET");
 JavaDStream<String> words = lines.flatMap(WORDS_EXTRACTOR);
 JavaPairDStream<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairDStream<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 
 counter.print();
 
 ssc.start();
 
 ssc.awaitTermination();
 

 /*JavaRDD<String> file = context.textFile("/home/rahul/Desktop/palestine.txt");
 JavaRDD<String> words = file.flatMap(WORDS_EXTRACTOR);
 JavaPairRDD<String, Integer> pairs = words.mapToPair(WORDS_MAPPER);
 JavaPairRDD<String, Integer> counter = pairs.reduceByKey(WORDS_REDUCER);
 counter.saveAsTextFile("/home/rahul/Desktop/wc"); 
 context.close();*/
}
 
Example 11
Project: incubator-blur   File: BlurLoadSparkProcessor.java   Source Code and License 7 votes vote down vote up
public void run() throws IOException {
  SparkConf conf = new SparkConf();
  conf.setAppName(getAppName());
  conf.set(SPARK_SERIALIZER, ORG_APACHE_SPARK_SERIALIZER_KRYO_SERIALIZER);
  JavaSparkUtil.packProjectJars(conf);
  setupSparkConf(conf);

  JavaStreamingContext ssc = new JavaStreamingContext(conf, getDuration());
  List<JavaDStream<T>> streamsList = getStreamsList(ssc);

  // Union all the streams if there is more than 1 stream
  JavaDStream<T> streams = unionStreams(ssc, streamsList);

  JavaPairDStream<String, RowMutation> pairDStream = streams.mapToPair(new PairFunction<T, String, RowMutation>() {
    public Tuple2<String, RowMutation> call(T t) {
      RowMutation rowMutation = convert(t);
      return new Tuple2<String, RowMutation>(rowMutation.getRowId(), rowMutation);
    }
  });

  pairDStream.foreachRDD(getFunction());

  ssc.start();
  ssc.awaitTermination();
}
 
Example 12
Project: Apache-Spark-2x-for-Java-Developers   File: StateFulProcessingExample.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

		System.setProperty("hadoop.home.dir", "C:\\softwares\\Winutils");

		SparkSession sparkSession = SparkSession.builder().master("local[*]").appName("Stateful Streaming Example")
				.config("spark.sql.warehouse.dir", "file:////C:/Users/sgulati/spark-warehouse").getOrCreate();

		JavaStreamingContext jssc= new JavaStreamingContext(new JavaSparkContext(sparkSession.sparkContext()),
				Durations.milliseconds(1000));
		JavaReceiverInputDStream<String> inStream = jssc.socketTextStream("10.204.136.223", 9999);
		jssc.checkpoint("C:\\Users\\sgulati\\spark-checkpoint");

		JavaDStream<FlightDetails> flightDetailsStream = inStream.map(x -> {
			ObjectMapper mapper = new ObjectMapper();
			return mapper.readValue(x, FlightDetails.class);
		});
		
		

		JavaPairDStream<String, FlightDetails> flightDetailsPairStream = flightDetailsStream
				.mapToPair(f -> new Tuple2<String, FlightDetails>(f.getFlightId(), f));

		Function3<String, Optional<FlightDetails>, State<List<FlightDetails>>, Tuple2<String, Double>> mappingFunc = (
				flightId, curFlightDetail, state) -> {
			List<FlightDetails> details = state.exists() ? state.get() : new ArrayList<>();

			boolean isLanded = false;

			if (curFlightDetail.isPresent()) {
				details.add(curFlightDetail.get());
				if (curFlightDetail.get().isLanded()) {
					isLanded = true;
				}
			}
			Double avgSpeed = details.stream().mapToDouble(f -> f.getTemperature()).average().orElse(0.0);

			if (isLanded) {
				state.remove();
			} else {
				state.update(details);
			}
			return new Tuple2<String, Double>(flightId, avgSpeed);
		};

		JavaMapWithStateDStream<String, FlightDetails, List<FlightDetails>, Tuple2<String, Double>> streamWithState = flightDetailsPairStream
				.mapWithState(StateSpec.function(mappingFunc).timeout(Durations.minutes(5)));
		
		streamWithState.print();
		jssc.start();
		jssc.awaitTermination();
	}
 
Example 13
Project: enmasse-iot-demo   File: AMQPTemperature.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        String messagingServiceHost = System.getenv("MESSAGING_SERVICE_HOST");
        if (messagingServiceHost != null) {
            host = messagingServiceHost;
        }
        LOG.info("host = {}", host);
        String messagingServicePort = System.getenv("MESSAGING_SERVICE_PORT");
        if (messagingServicePort != null) {
            port = Integer.valueOf(messagingServicePort);
        }
        LOG.info("port = {}", port);

        JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(CHECKPOINT_DIR, AMQPTemperature::createStreamingContext);

        ssc.start();
        ssc.awaitTermination();
    }
 
Example 14
Project: gcp   File: Spark8Organized.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException, IOException, JAXBException {
  SparkConf sc = new SparkConf().setAppName("Receiving-KafkaToBQ");

  try (JavaStreamingContext jsc = new JavaStreamingContext(sc, new Duration(60000))) {

    JavaPairDStream<String, String> stream = new KafkaInputWithOffsets(
        KAFKA_HOST_PORT, EXAMPLE_TOPIC, ZOOKEEPER_HOST, ZK_PATH).createResumableStream(jsc);

    stream.foreachRDD(IdleStop.create(jsc, 2, "XMLs count: %d\n"));

    stream
        .mapToPair(parseXml())
        .filter(t -> t != null)
        .mapToPair(prepToBq())
        .foreachRDD(BigQueryHelper.outputTo(BQ_EXAMPLE_TABLE, BQ_EXAMPLE_SCHEMA));

    jsc.start();
    jsc.awaitTermination();
  }
}
 
Example 15
Project: oryx2   File: BatchUpdateFunction.java   Source Code and License 6 votes vote down vote up
BatchUpdateFunction(Config config,
                    Class<K> keyClass,
                    Class<M> messageClass,
                    Class<? extends Writable> keyWritableClass,
                    Class<? extends Writable> messageWritableClass,
                    String dataDirString,
                    String modelDirString,
                    BatchLayerUpdate<K,M,U> updateInstance,
                    JavaStreamingContext streamingContext) {
  this.keyClass = keyClass;
  this.messageClass = messageClass;
  this.keyWritableClass = keyWritableClass;
  this.messageWritableClass = messageWritableClass;
  this.dataDirString = dataDirString;
  this.modelDirString = modelDirString;
  this.updateBroker = ConfigUtils.getOptionalString(config, "oryx.update-topic.broker");
  this.updateTopic = ConfigUtils.getOptionalString(config, "oryx.update-topic.message.topic");
  this.updateInstance = updateInstance;
  this.sparkContext = streamingContext.sparkContext();
}
 
Example 16
Project: enmasse-workshop   File: TemperatureAnalyzer.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        // getting AMQP messaging service connection information
        String messagingServiceHost = System.getenv("MESSAGING_SERVICE_HOST");
        if (messagingServiceHost != null) {
            host = messagingServiceHost;
        }
        String messagingServicePort = System.getenv("MESSAGING_SERVICE_PORT");
        if (messagingServicePort != null) {
            port = Integer.valueOf(messagingServicePort);
        }
        log.info("AMQP messaging service hostname {}:{}", host, port);

        // getting credentials for authentication
        username = System.getenv("SPARK_DRIVER_USERNAME");
        password = System.getenv("SPARK_DRIVER_PASSWORD");
        log.info("Credentials {}/{}", username, password);

        JavaStreamingContext ssc = JavaStreamingContext.getOrCreate(CHECKPOINT_DIR, TemperatureAnalyzer::createStreamingContext);

        ssc.start();
        ssc.awaitTermination();
    }
 
Example 17
Project: splice-community-sample-code   File: SparkStreamingMQTT.java   Source Code and License 6 votes vote down vote up
/**
 * This will start the spark stream that is reading from the MQTT queue
 *
 * @param broker     - MQTT broker url
 * @param topic      - MQTT topic name
 * @param numSeconds - Number of seconds between batch size
 */
public void processMQTT(final String broker, final String topic, final int numSeconds) {

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT start");

    // Create the spark application and set the name to MQTT
    SparkConf sparkConf = new SparkConf().setAppName("MQTT");

    // Create the spark streaming context with a 'numSeconds' second batch size
    jssc = new JavaStreamingContext(sparkConf, Durations.seconds(numSeconds));
    jssc.checkpoint(checkpointDirectory);

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT about to read the MQTTUtils.createStream");
    //2. MQTTUtils to collect MQTT messages
    JavaReceiverInputDStream<String> messages = MQTTUtils.createStream(jssc, broker, topic);

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT about to do foreachRDD");
    //process the messages on the queue and save them to the database
    messages.foreachRDD(new SaveRDD());

    LOG.info("************ SparkStreamingMQTTOutside.processMQTT prior to context.strt");
    // Start the context
    jssc.start();
    jssc.awaitTermination();
}
 
Example 18
Project: beam   File: TrackStreamingSourcesTest.java   Source Code and License 6 votes vote down vote up
@Test
public void testTrackSingle() {
  options.setRunner(SparkRunner.class);
  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  JavaStreamingContext jssc = new JavaStreamingContext(jsc,
      new org.apache.spark.streaming.Duration(options.getBatchIntervalMillis()));

  Pipeline p = Pipeline.create(options);

  CreateStream<Integer> emptyStream =
      CreateStream.of(
          VarIntCoder.of(),
          Duration.millis(options.getBatchIntervalMillis())).emptyBatch();

  p.apply(emptyStream).apply(ParDo.of(new PassthroughFn<>()));

  p.traverseTopologically(new StreamingSourceTracker(jssc, p, ParDo.MultiOutput.class,  0));
  assertThat(StreamingSourceTracker.numAssertions, equalTo(1));
}
 
Example 19
Project: net.jgp.labs.spark   File: StreamingIngestionFileSystemTextFileApp.java   Source Code and License 6 votes vote down vote up
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("NetworkWordCount");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());
	msgDataStream.print();

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
Example 20
Project: net.jgp.labs.spark   File: StreamingIngestionFileSystemTextFileToDataframeMultipleClassesApp.java   Source Code and License 6 votes vote down vote up
private void start() {
	// Create a local StreamingContext with two working thread and batch interval of
	// 1 second
	SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Streaming Ingestion File System Text File to Dataframe");
	JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

	JavaDStream<String> msgDataStream = jssc.textFileStream(StreamingUtils.getInputDirectory());

	msgDataStream.print();
	// Create JavaRDD<Row>
	msgDataStream.foreachRDD(new RowProcessor());	

	jssc.start();
	try {
		jssc.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}
 
Example 21
Project: spark_log_data   File: LogDataWebinar.java   Source Code and License 6 votes vote down vote up
private static JavaDStream<String> createDStream(JavaStreamingContext javaStreamingContext, String hostName, int port) {
        
        JavaReceiverInputDStream<SparkFlumeEvent> flumeEventStream = FlumeUtils.createStream(javaStreamingContext, hostName, port);
        
        // Set different storage level 
//        flumeEventStream.persist(StorageLevel.MEMORY_AND_DISK_SER());
        
        JavaDStream<String> dStream = flumeEventStream.map(new Function<SparkFlumeEvent, String>() {

            @Override
            public String call(SparkFlumeEvent sparkFlumeEvent) throws Exception {

                byte[] bodyArray = sparkFlumeEvent.event().getBody().array();
                String logTxt = new String(bodyArray, "UTF-8");
                logger.info(logTxt);

                return logTxt;
            }
        });
        // dStream.print();
        
        return dStream;
    }
 
Example 22
Project: nats-connector-spark   File: StandardNatsToSparkConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalPropertiesAndSubjects() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	properties.setProperty(PROP_URL, NATS_SERVER_URL);
	final JavaReceiverInputDStream<String> messages =  
			NatsToSparkConnector
				.receiveFromNats(String.class, StorageLevel.MEMORY_ONLY())
				.withProperties(properties)
				.withSubjects(DEFAULT_SUBJECT)
				.asStreamOf(ssc);

	validateTheReceptionOfMessages(ssc, messages);
}
 
Example 23
Project: nats-connector-spark   File: StandardNatsToSparkConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalPropertiesAndMultipleSubjects() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	final JavaReceiverInputDStream<String> messages = 
			NatsToSparkConnector
				.receiveFromNats(String.class, StorageLevel.MEMORY_ONLY())
				.withNatsURL(NATS_SERVER_URL)
				.withProperties(properties)
				.withSubjects(DEFAULT_SUBJECT, "EXTRA_SUBJECT")
				.asStreamOf(ssc);

	validateTheReceptionOfMessages(ssc, messages);
}
 
Example 24
Project: nats-connector-spark   File: StandardNatsToSparkConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalProperties() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	properties.setProperty(PROP_SUBJECTS, "sub1,"+DEFAULT_SUBJECT+" , sub2");
	properties.setProperty(PROP_URL, NATS_SERVER_URL);
	final JavaReceiverInputDStream<String> messages = 
			NatsToSparkConnector
				.receiveFromNats(String.class, StorageLevel.MEMORY_ONLY())
				.withProperties(properties)
				.asStreamOf(ssc);

	validateTheReceptionOfMessages(ssc, messages);
}
 
Example 25
Project: nats-connector-spark   File: StandardNatsToSparkKeyValueConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalPropertiesAndSubjects() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	properties.setProperty(PROP_URL, NATS_SERVER_URL);

	final JavaPairDStream<String, String> messages = 
			NatsToSparkConnector
				.receiveFromNats(String.class, StorageLevel.MEMORY_ONLY())
				.withProperties(properties)
				.withSubjects(DEFAULT_SUBJECT)
				.asStreamOfKeyValue(ssc);

	validateTheReceptionOfMessages(ssc, messages);
}
 
Example 26
Project: nats-connector-spark   File: StandardNatsToSparkKeyValueConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalPropertiesAndMultipleSubjects() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	final JavaPairDStream<String, String> messages = 
			NatsToSparkConnector
				.receiveFromNats(String.class, StorageLevel.MEMORY_ONLY())
				.withNatsURL(NATS_SERVER_URL)
				.withProperties(properties)
				.withSubjects(DEFAULT_SUBJECT, "EXTRA_SUBJECT")
				.asStreamOfKeyValue(ssc);

	validateTheReceptionOfMessages(ssc, messages);
}
 
Example 27
Project: nats-connector-spark   File: StandardNatsToSparkKeyValueConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalProperties() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	properties.setProperty(PROP_SUBJECTS, "sub1,"+DEFAULT_SUBJECT+" , sub2");
	properties.setProperty(PROP_URL, NATS_SERVER_URL);
	final JavaPairDStream<String, String> messages = 
			NatsToSparkConnector
				.receiveFromNats(String.class, StorageLevel.MEMORY_ONLY())
				.withProperties(properties)
				.asStreamOfKeyValue(ssc);

	validateTheReceptionOfMessages(ssc, messages);
}
 
Example 28
Project: nats-connector-spark   File: IntegerNatsToSparkConnectorTest.java   Source Code and License 6 votes vote down vote up
@Test(timeout=6000)
public void testNatsToSparkConnectorWithAdditionalPropertiesAndSubjects() throws InterruptedException {
	
	JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(200));

	final Properties properties = new Properties();
	properties.setProperty(PROP_URL, NATS_SERVER_URL);
	final JavaReceiverInputDStream<Integer> messages =  
			NatsToSparkConnector
				.receiveFromNats(Integer.class, StorageLevel.MEMORY_ONLY())
				.withProperties(properties)
				.withSubjects(DEFAULT_SUBJECT)
				.asStreamOf(ssc);

	validateTheReceptionOfIntegerMessages(ssc, messages);
}
 
Example 29
Project: spark-dataflow   File: StreamingTransformTranslator.java   Source Code and License 6 votes vote down vote up
private static <K, V> TransformEvaluator<KafkaIO.Read.Unbound<K, V>> kafka() {
  return new TransformEvaluator<KafkaIO.Read.Unbound<K, V>>() {
    @Override
    public void evaluate(KafkaIO.Read.Unbound<K, V> transform, EvaluationContext context) {
      StreamingEvaluationContext sec = (StreamingEvaluationContext) context;
      JavaStreamingContext jssc = sec.getStreamingContext();
      Class<K> keyClazz = transform.getKeyClass();
      Class<V> valueClazz = transform.getValueClass();
      Class<? extends Decoder<K>> keyDecoderClazz = transform.getKeyDecoderClass();
      Class<? extends Decoder<V>> valueDecoderClazz = transform.getValueDecoderClass();
      Map<String, String> kafkaParams = transform.getKafkaParams();
      Set<String> topics = transform.getTopics();
      JavaPairInputDStream<K, V> inputPairStream = KafkaUtils.createDirectStream(jssc, keyClazz,
              valueClazz, keyDecoderClazz, valueDecoderClazz, kafkaParams, topics);
      JavaDStream<WindowedValue<KV<K, V>>> inputStream =
          inputPairStream.map(new Function<Tuple2<K, V>, KV<K, V>>() {
        @Override
        public KV<K, V> call(Tuple2<K, V> t2) throws Exception {
          return KV.of(t2._1(), t2._2());
        }
      }).map(WindowingHelpers.<KV<K, V>>windowFunction());
      sec.setStream(transform, inputStream);
    }
  };
}
 
Example 30
Project: datacollector   File: MapRStreamingBinding.java   Source Code and License 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public JavaStreamingContext create() {
  sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition));
  JavaStreamingContext result = new JavaStreamingContext(sparkConf, new Duration(duration));
  Map<String, String> props = new HashMap<>();
  if (!autoOffsetValue.isEmpty()) {
    props.put(AbstractStreamingBinding.AUTO_OFFSET_RESET, autoOffsetValue);
  }
  logMessage("topic list " + topic, isRunningInMesos);
  logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos);
  props.putAll(extraKafkaConfigs);
  for (Map.Entry<String, String> map : props.entrySet()) {
    logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos);
  }
  props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
  props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
  props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
  JavaPairInputDStream<byte[], byte[]> dStream;
  if (offsetHelper.isSDCCheckPointing()) {
    JavaInputDStream stream =
        KafkaUtils.createDirectStream(
            result,
            byte[].class,
            byte[].class,
            Tuple2.class,
            props,
            MaprStreamsOffsetManagerImpl.get().getOffsetForDStream(topic, numberOfPartitions),
            MESSAGE_HANDLER_FUNCTION
        );
    ClassTag<byte[]> byteClassTag = scala.reflect.ClassTag$.MODULE$.apply(byte[].class);
    dStream = JavaPairInputDStream.fromInputDStream(stream.inputDStream(), byteClassTag, byteClassTag);
  } else {
    dStream =
        KafkaUtils.createDirectStream(result, byte[].class, byte[].class,
            props, new HashSet<>(Arrays.asList(topic.split(","))));
  }
  Driver$.MODULE$.foreach(dStream.dstream(), MaprStreamsOffsetManagerImpl.get());
  return result;
}
 
Example 31
Project: laughing-octo-sansa   File: TestSparkKafkaReceiverApproach.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) {
    if (args.length < 4) {
        System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
        System.exit(1);
    }

    SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
    // Create the context with a 1 second batch size
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));
    int numThreads = Integer.parseInt(args[3]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic : topics) {
        topicMap.put(topic, numThreads);
    }
    JavaPairReceiverInputDStream<String, String> messages = KafkaUtils.createStream(jssc, args[0], args[1],
            topicMap);
    JavaDStream<String> lines = messages.map(tuple2 -> tuple2._2());
    JavaDStream<String> words = lines.flatMap(x -> Lists.newArrayList(SPACE.split(x)));
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(s -> new Tuple2<String, Integer>(s, 1)).reduceByKey(
            (i1, i2) -> i1 + i2);
    wordCounts.print();
    jssc.start();
    jssc.awaitTermination();
}
 
Example 32
Project: laughing-octo-sansa   File: FacebookSparkCrawler.java   Source Code and License 6 votes vote down vote up
private void run(CompositeConfiguration conf) {
    // Spark conf
    SparkConf sparkConf = new SparkConf().setAppName("TwitterSparkCrawler").setMaster(conf.getString("spark.master"))
            .set("spark.serializer", conf.getString("spark.serializer"))
            .registerKryoClasses(new Class<?>[] { Parameter.class, BatchRequestBuilder.class, BatchRequest.class });
    JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(conf.getLong("stream.duration")));

    // Create facebook stream
    Parameter typeParam = Parameter.with("type", "event");
    FacebookUtils
            .createStream(jssc, conf.getString("access.token"),
                    new BatchRequestBuilder[] {
                            new BatchRequestBuilder("search").parameters(new Parameter[] { Parameter.with("q", "car"), typeParam }) })
            .print();

    // Start the computation
    jssc.start();
    jssc.awaitTermination();
}
 
Example 33
Project: WiseCrowdRec   File: SparkTwitterStreaming.java   Source Code and License 6 votes vote down vote up
public void sparkInit() {
  PropertyConfigurator.configure(SparkTwitterStreaming.class.getClassLoader().getResource("log4j.properties"));
  //		note: import org.apache.log4j.Logger;
  //		note: import org.apache.log4j.Level;
  //		Logger.getLogger("org").setLevel(Level.WARN);
  //		Logger.getLogger("akka").setLevel(Level.WARN);
  // Set spark streaming info
  ssc = new JavaStreamingContext(
    "local[2]", "SparkTwitterStreamingJava", 
    new Duration(1000), System.getenv("SPARK_HOME"), 
    JavaStreamingContext.jarOfClass(SparkTwitterStreaming.class));

  //	HDFS directory for checkpointing
  /*
   * checkpoint saves the RDD to an HDFS file
   * http://apache-spark-user-list.1001560.n3.nabble.com/checkpoint-and-not-running-out-of-disk-space-td1525.html
   * dfs.namenode.checkpoint.dir -> hdfs-site.xml
   */
  //		String checkpointDir = TutorialHelper.getHdfsUrl() + "/checkpoint/";

  String checkpointDir = "file:///Users/feiyu/workspace/Hadoop/hdfs/namesecondary/checkpoint";
  ssc.checkpoint(checkpointDir);
}
 
Example 34
Project: learning-spark-examples   File: KafkaInput.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
   String zkQuorum = args[0];
   String group = args[1];
   SparkConf conf = new SparkConf().setAppName("KafkaInput");
   // Create a StreamingContext with a 1 second batch size
   JavaStreamingContext jssc = new JavaStreamingContext(conf, new Duration(1000));
   Map<String, Integer> topics = new HashMap<String, Integer>();
   topics.put("pandas", 1);
   JavaPairDStream<String, String> input = KafkaUtils.createStream(jssc, zkQuorum, group, topics);
   input.print();
   // start our streaming context and wait for it to "finish"
   jssc.start();
   // Wait for 10 seconds then exit. To run forever call without a timeout
   jssc.awaitTermination(10000);
   // Stop the streaming context
   jssc.stop();
}
 
Example 35
Project: learning-spark-examples   File: StreamingLogInput.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	String master = args[0];
	JavaSparkContext sc = new JavaSparkContext(master, "StreamingLogInput");
   // Create a StreamingContext with a 1 second batch size
   JavaStreamingContext jssc = new JavaStreamingContext(sc, new Duration(1000));
   // Create a DStream from all the input on port 7777
   JavaDStream<String> lines = jssc.socketTextStream("localhost", 7777);
   // Filter our DStream for lines with "error"
   JavaDStream<String> errorLines = lines.filter(new Function<String, Boolean>() {
       public Boolean call(String line) {
         return line.contains("error");
       }});
   // Print out the lines with errors, which causes this DStream to be evaluated
   errorLines.print();
   // start our streaming context and wait for it to "finish"
   jssc.start();
   // Wait for 10 seconds then exit. To run forever call without a timeout
   jssc.awaitTermination(10000);
   // Stop the streaming context
   jssc.stop();
}
 
Example 36
Project: searchanalytics-bigdata   File: SparkStreamServiceImpl.java   Source Code and License 6 votes vote down vote up
@Override
public void setup() {
	// Create a StreamingContext with a SparkConf configuration
	SparkConf sparkConf = new SparkConf(false)
			.setAppName("JaiSpark")
			.setSparkHome("target/sparkhome")
			.setMaster("local")
			.set("spark.executor.memory", "128m")
			.set("spark.local.dir",
					new File("target/sparkhome/tmp").getAbsolutePath())
			.set("spark.cores.max", "2").set("spark.akka.threads", "2")
			.set("spark.akka.timeout", "60").set("spark.logConf", "true")
			.set("spark.cleaner.delay", "3700")
			.set("spark.cleaner.ttl", "86400")
			.set("spark.shuffle.spill", "false")
			.set("spark.driver.host", "localhost")
			.set("spark.driver.port", "43214");
	jssc = new JavaStreamingContext(sparkConf, new Duration(5000));

	String checkpointDir = hadoopClusterService.getHDFSUri()
			+ "/sparkcheckpoint";
	jssc.checkpoint(checkpointDir);
	startFlumeStream();
}
 
Example 37
Project: Decision   File: StreamingContextConfiguration.java   Source Code and License 6 votes vote down vote up
private JavaStreamingContext create(String streamingContextName, int port, long streamingBatchTime, String sparkHost) {
    SparkConf conf = new SparkConf();
    conf.set("spark.ui.port", String.valueOf(port));
    conf.setAppName(streamingContextName);
    conf.setJars(JavaStreamingContext.jarOfClass(StreamingEngine.class));
    conf.setMaster(sparkHost);

    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    conf.registerKryoClasses(new Class[] { StratioStreamingMessage.class, InsertMessage.class, ColumnType.class,
            Action.class});


    HashMap<String, String> tuningProperties = configurationContext.getSparkTunningProperties();
    if (tuningProperties != null && tuningProperties.size() > 0) {
        tuningProperties.forEach( (key, value) ->  conf.set(key, value));
    }

    JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(streamingBatchTime));

    return streamingContext;
}
 
Example 38
Project: Apache-Spark-2x-for-Java-Developers   File: WordCountTransformOpEx.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  
      System.setProperty("hadoop.home.dir", "E:\\hadoop");
	
   SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
   JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
   Logger rootLogger = LogManager.getRootLogger();
 		rootLogger.setLevel(Level.WARN); 
   List<Tuple2<String, Integer>> tuples = Arrays.asList(new Tuple2<>("hello", 10), new Tuple2<>("world", 10));
   JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
	    

   JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
   
   JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
  
   JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
  
   wordCounts.print();
   
JavaPairDStream<String, Integer> joinedDstream = wordCounts
		.transformToPair(new Function<JavaPairRDD<String, Integer>, JavaPairRDD<String, Integer>>() {
			@Override
			public JavaPairRDD<String, Integer> call(JavaPairRDD<String, Integer> rdd) throws Exception {
				JavaPairRDD<String, Integer> modRDD = rdd.join(initialRDD).mapToPair(
						new PairFunction<Tuple2<String, Tuple2<Integer, Integer>>, String, Integer>() {
							@Override
							public Tuple2<String, Integer> call(
									Tuple2<String, Tuple2<Integer, Integer>> joinedTuple) throws Exception {
								return new Tuple2<>(joinedTuple._1(),(joinedTuple._2()._1() + joinedTuple._2()._2()));
							}
						});
				return modRDD;
			}
		});

   joinedDstream.print();
   streamingContext.start();
   streamingContext.awaitTermination();
 }
 
Example 39
Project: Apache-Spark-2x-for-Java-Developers   File: WordCountSocketStateful.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
 System.setProperty("hadoop.home.dir", "E:\\hadoop");

   SparkConf sparkConf = new SparkConf().setAppName("WordCountSocketEx").setMaster("local[*]");
   JavaStreamingContext streamingContext = new JavaStreamingContext(sparkConf, Durations.seconds(1));
   streamingContext.checkpoint("E:\\hadoop\\checkpoint");
// Initial state RDD input to mapWithState
   @SuppressWarnings("unchecked")
   List<Tuple2<String, Integer>> tuples =Arrays.asList(new Tuple2<>("hello", 1), new Tuple2<>("world", 1));
   JavaPairRDD<String, Integer> initialRDD = streamingContext.sparkContext().parallelizePairs(tuples);
   
   JavaReceiverInputDStream<String> StreamingLines = streamingContext.socketTextStream( "10.0.75.1", Integer.parseInt("9000"), StorageLevels.MEMORY_AND_DISK_SER);
   
   JavaDStream<String> words = StreamingLines.flatMap( str -> Arrays.asList(str.split(" ")).iterator() );
  
   JavaPairDStream<String, Integer> wordCounts = words.mapToPair(str-> new Tuple2<>(str, 1)).reduceByKey((count1,count2) ->count1+count2 );
  


  // Update the cumulative count function
  Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>> mappingFunc =
      new Function3<String, Optional<Integer>, State<Integer>, Tuple2<String, Integer>>() {
        @Override
        public Tuple2<String, Integer> call(String word, Optional<Integer> one,
            State<Integer> state) {
          int sum = one.orElse(0) + (state.exists() ? state.get() : 0);
          Tuple2<String, Integer> output = new Tuple2<>(word, sum);
          state.update(sum);
          return output;
        }
      };

  // DStream made of get cumulative counts that get updated in every batch
  JavaMapWithStateDStream<String, Integer, Integer, Tuple2<String, Integer>> stateDstream = wordCounts.mapWithState(StateSpec.function(mappingFunc).initialState(initialRDD));

  stateDstream.print();
  streamingContext.start();
  streamingContext.awaitTermination();
}
 
Example 40
Project: Apache-Spark-2x-for-Java-Developers   File: FileStreamingEx.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {
   	//Window Specific property if Hadoop is not instaalled or HADOOP_HOME is not set
	 System.setProperty("hadoop.home.dir", "E:\\hadoop");
   	//Logger rootLogger = LogManager.getRootLogger();
  		//rootLogger.setLevel(Level.WARN); 
       SparkConf conf = new SparkConf().setAppName("KafkaExample").setMaster("local[*]");
       String inputDirectory="E:\\hadoop\\streamFolder\\";
    
       JavaSparkContext sc = new JavaSparkContext(conf);
       JavaStreamingContext streamingContext = new JavaStreamingContext(sc, Durations.seconds(1));
      // streamingContext.checkpoint("E:\\hadoop\\checkpoint");
       Logger rootLogger = LogManager.getRootLogger();
  		rootLogger.setLevel(Level.WARN); 
  		
  		JavaDStream<String> streamfile = streamingContext.textFileStream(inputDirectory);
  		streamfile.print();
  		streamfile.foreachRDD(rdd-> rdd.foreach(x -> System.out.println(x)));
  		
  			   		
  		JavaPairDStream<LongWritable, Text> streamedFile = streamingContext.fileStream(inputDirectory, LongWritable.class, Text.class, TextInputFormat.class);
  	 streamedFile.print();
  		
  	 streamingContext.start();
  	 

       try {
		streamingContext.awaitTermination();
	} catch (InterruptedException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
}