Java Code Examples for org.apache.spark.streaming.api.java.JavaStreamingContext#addStreamingListener()

The following examples show how to use org.apache.spark.streaming.api.java.JavaStreamingContext#addStreamingListener() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: StreamingService.java From cxf with Apache License 2.0

5 votes

private void processStream(AsyncResponse async, List<String> inputStrings) {
    try {
        SparkConf sparkConf = new SparkConf().setMaster("local[*]")
            .setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId());
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
        SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
        jssc.addStreamingListener(sparkListener);

        JavaDStream<String> receiverStream = null;
        if ("queue".equals(receiverType)) {
            Queue<JavaRDD<String>> rddQueue = new LinkedList<>();
            for (int i = 0; i < 30; i++) {
                rddQueue.add(jssc.sparkContext().parallelize(inputStrings));
            }
            receiverStream = jssc.queueStream(rddQueue);
        } else {
            receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings));
        }

        JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false);
        wordCounts.foreachRDD(new OutputFunction(streamOut));
        jssc.start();

        executor.execute(new SparkJob(async, sparkListener));
    } catch (Exception ex) {
        // the compiler does not allow to catch SparkException directly
        if (ex instanceof SparkException) {
            async.cancel(60);
        } else {
            async.resume(new WebApplicationException(ex));
        }
    }
}

Example 2

Source File: SparkRunner.java From beam with Apache License 2.0

4 votes

@Override
public SparkPipelineResult run(final Pipeline pipeline) {
  LOG.info("Executing pipeline using the SparkRunner.");

  final SparkPipelineResult result;
  final Future<?> startPipeline;

  final SparkPipelineTranslator translator;

  final ExecutorService executorService = Executors.newSingleThreadExecutor();

  MetricsEnvironment.setMetricsSupported(true);

  // visit the pipeline to determine the translation mode
  detectTranslationMode(pipeline);

  pipeline.replaceAll(SparkTransformOverrides.getDefaultOverrides(mOptions.isStreaming()));

  prepareFilesToStage(mOptions);

  if (mOptions.isStreaming()) {
    CheckpointDir checkpointDir = new CheckpointDir(mOptions.getCheckpointDir());
    SparkRunnerStreamingContextFactory streamingContextFactory =
        new SparkRunnerStreamingContextFactory(pipeline, mOptions, checkpointDir);
    final JavaStreamingContext jssc =
        JavaStreamingContext.getOrCreate(
            checkpointDir.getSparkCheckpointDir().toString(), streamingContextFactory);

    // Checkpoint aggregator/metrics values
    jssc.addStreamingListener(
        new JavaStreamingListenerWrapper(
            new AggregatorsAccumulator.AccumulatorCheckpointingSparkListener()));
    jssc.addStreamingListener(
        new JavaStreamingListenerWrapper(
            new MetricsAccumulator.AccumulatorCheckpointingSparkListener()));

    // register user-defined listeners.
    for (JavaStreamingListener listener : mOptions.as(SparkContextOptions.class).getListeners()) {
      LOG.info("Registered listener {}." + listener.getClass().getSimpleName());
      jssc.addStreamingListener(new JavaStreamingListenerWrapper(listener));
    }

    // register Watermarks listener to broadcast the advanced WMs.
    jssc.addStreamingListener(
        new JavaStreamingListenerWrapper(new WatermarkAdvancingStreamingListener()));

    // The reason we call initAccumulators here even though it is called in
    // SparkRunnerStreamingContextFactory is because the factory is not called when resuming
    // from checkpoint (When not resuming from checkpoint initAccumulators will be called twice
    // but this is fine since it is idempotent).
    initAccumulators(mOptions, jssc.sparkContext());

    startPipeline =
        executorService.submit(
            () -> {
              LOG.info("Starting streaming pipeline execution.");
              jssc.start();
            });
    executorService.shutdown();

    result = new SparkPipelineResult.StreamingMode(startPipeline, jssc);
  } else {
    // create the evaluation context
    final JavaSparkContext jsc = SparkContextFactory.getSparkContext(mOptions);
    final EvaluationContext evaluationContext = new EvaluationContext(jsc, pipeline, mOptions);
    translator = new TransformTranslator.Translator();

    // update the cache candidates
    updateCacheCandidates(pipeline, translator, evaluationContext);

    initAccumulators(mOptions, jsc);
    startPipeline =
        executorService.submit(
            () -> {
              pipeline.traverseTopologically(new Evaluator(translator, evaluationContext));
              evaluationContext.computeOutputs();
              LOG.info("Batch pipeline execution complete.");
            });
    executorService.shutdown();

    result = new SparkPipelineResult.BatchMode(startPipeline, jsc);
  }

  if (mOptions.getEnableSparkMetricSinks()) {
    registerMetricsSource(mOptions.getAppName());
  }

  // it would have been better to create MetricsPusher from runner-core but we need
  // runner-specific
  // MetricsContainerStepMap
  MetricsPusher metricsPusher =
      new MetricsPusher(
          MetricsAccumulator.getInstance().value(), mOptions.as(MetricsOptions.class), result);
  metricsPusher.start();
  return result;
}

Example 3

Source File: Server.java From cxf with Apache License 2.0

4 votes

protected Server(String[] args) throws Exception {

        ServerSocket sparkServerSocket = new ServerSocket(9999);
        ServerSocket jaxrsResponseServerSocket = new ServerSocket(10000);
        Socket jaxrsResponseClientSocket = new Socket("localhost", 10000);


        SparkConf sparkConf = new SparkConf().setMaster("local[*]")
            .setAppName("JAX-RS Spark Socket Connect");
        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1));

        SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc);
        SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut);
        jssc.addStreamingListener(sparkListener);

        JavaDStream<String> receiverStream = jssc.socketTextStream(
            "localhost", 9999, StorageLevels.MEMORY_ONLY);

        JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, true);
        PrintStream sparkResponseOutputStream = new PrintStream(jaxrsResponseClientSocket.getOutputStream(), true);
        wordCounts.foreachRDD(new SocketOutputFunction(sparkResponseOutputStream));

        jssc.start();

        Socket receiverClientSocket = sparkServerSocket.accept();
        PrintStream sparkOutputStream = new PrintStream(receiverClientSocket.getOutputStream(), true);
        BufferedReader sparkInputStream =
            new BufferedReader(new InputStreamReader(jaxrsResponseServerSocket.accept().getInputStream()));


        JAXRSServerFactoryBean sf = new JAXRSServerFactoryBean();

        sf.setResourceClasses(StreamingService.class);
        sf.setResourceProvider(StreamingService.class,
            new SingletonResourceProvider(new StreamingService(sparkInputStream,
                                                                     sparkOutputStream)));
        sf.setAddress("http://localhost:9000/spark");
        sf.create();

        jssc.awaitTermination();
        sparkServerSocket.close();
        jaxrsResponseServerSocket.close();
        jaxrsResponseClientSocket.close();

    }

Example 4

Source File: ReceiverLauncher.java From kafka-spark-consumer with Apache License 2.0

4 votes

private static <E> JavaDStream<MessageAndMetadata<E>> createStream(
        JavaStreamingContext jsc, Properties props, int numberOfReceivers, StorageLevel storageLevel,
        KafkaMessageHandler<E> messageHandler) {

    List<JavaDStream<MessageAndMetadata<E>>> streamsList =
            new ArrayList<>();
    JavaDStream<MessageAndMetadata<E>> unionStreams;
    KafkaConfig globalConfig = new KafkaConfig(props);
    _zkPath = (String) globalConfig.brokerZkPath;
    String[] topicList = props.getProperty(Config.KAFKA_TOPIC).split(",");
    int totalPartitions = 0;
    Map<String, KafkaConfig> topicConfigMap = new HashMap<>();

    for(String topic : topicList) {
        Properties property = new Properties();
        property.putAll(props);
        property.replace(Config.KAFKA_TOPIC, topic.trim());
        KafkaConfig kafkaConfig = new KafkaConfig(property);
        ZkState zkState = new ZkState(kafkaConfig);
        int numberOfPartition = getNumPartitions(zkState, topic.trim());
        totalPartitions = totalPartitions + numberOfPartition;
        zkState.close();
        topicConfigMap.put(topic + ":" + numberOfPartition, kafkaConfig);
    }

    for(Map.Entry<String, KafkaConfig> entry : topicConfigMap.entrySet()) {
        String[] tp = entry.getKey().split(":");
        int partitions = Integer.parseInt(tp[1]);
        KafkaConfig config = entry.getValue();
        int assignedReceivers = (int)Math.round(((partitions/(double)totalPartitions) * numberOfReceivers));
        if(assignedReceivers == 0)
            assignedReceivers = 1;

        assignReceiversToPartitions(assignedReceivers,partitions,streamsList, config, storageLevel, messageHandler, jsc);
    }

    
    // Union all the streams if there is more than 1 stream
    if (streamsList.size() > 1) {
        unionStreams =
                jsc.union(
                        streamsList.get(0), streamsList.subList(1, streamsList.size()));
    } else {
        // Otherwise, just use the 1 stream
        unionStreams = streamsList.get(0);
    }
    final long batchDuration = jsc.ssc().graph().batchDuration().milliseconds();
    ReceiverStreamListener listener = new ReceiverStreamListener(globalConfig, batchDuration);
    jsc.addStreamingListener(listener);
    //Reset the fetch size
    Utils.setFetchRate(globalConfig, globalConfig._pollRecords);
    return unionStreams;
}

Example 5

Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

public ExpectingToThrow from(JavaStreamingContext ssc) {
    ssc.addStreamingListener(this);
    return this;
}

Example 6

Source File: AbstractJavaEsSparkStreamingTest.java From elasticsearch-hadoop with Apache License 2.0

4 votes

public ExpectingToThrow from(JavaStreamingContext ssc) {
    ssc.addStreamingListener(this);
    return this;
}