org.apache.spark.streaming.Seconds Java Examples

The following examples show how to use org.apache.spark.streaming.Seconds. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: JobHelper.java    From sylph with Apache License 2.0 5 votes vote down vote up
static Serializable build1xJob(String jobId, EtlFlow flow, URLClassLoader jobClassLoader, ConnectorStore connectorStore)
        throws Exception
{
    final AtomicBoolean isCompile = new AtomicBoolean(true);
    final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> {
        logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============");
        SparkConf sparkConf = isCompile.get() ?
                new SparkConf().setMaster("local[*]").setAppName("sparkCompile")
                : new SparkConf();
        //todo: 5s is default
        SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
        StreamingContext spark = new StreamingContext(sparkSession.sparkContext(), Seconds.apply(5));

        Bean bean = binder -> binder.bind(StreamingContext.class, spark);
        StreamNodeLoader loader = new StreamNodeLoader(connectorStore, IocFactory.create(bean));
        buildGraph(loader, flow);
        return spark;
    };

    JVMLauncher<Integer> launcher = JVMLaunchers.<Integer>newJvm()
            .setCallable(() -> {
                appGetter.get();
                return 1;
            })
            .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset()))
            .addUserURLClassLoader(jobClassLoader)
            .notDepThisJvmClassPath()
            .setClassLoader(jobClassLoader)
            .build();
    launcher.startAndGet();
    isCompile.set(false);
    return (Serializable) appGetter;
}
 
Example #2
Source File: CloudPubSubStreamingWordCount.java    From spark-on-k8s-gcp-examples with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws InterruptedException {
  if (args.length != 4) {
    System.err.println("Usage: CloudPubSubStreamingWordCount <GCP project ID> " +
        "<Cloud PubSub subscription> <GCS output dir path> <job duration in seconds>");
    System.exit(1);
  }

  Preconditions.checkArgument(
      !Strings.isNullOrEmpty(args[0]), "GCP project ID must not be null or empty");
  Preconditions.checkArgument(
      !Strings.isNullOrEmpty(args[1]), "Cloud PubSub topic name must not be empty");

  JavaStreamingContext jsc = new JavaStreamingContext(
      new SparkConf().setAppName("Cloud PubSub Spark Streaming Word Count"),
      Seconds.apply(30) // Batch duration
  );

  Configuration hadoopConf = jsc.sparkContext().hadoopConfiguration();
  // Use service account for authentication. The service account key file is located at the path
  // specified by the configuration property google.cloud.auth.service.account.json.keyfile.
  hadoopConf.set(
      EntriesCredentialConfiguration.BASE_KEY_PREFIX +
          EntriesCredentialConfiguration.ENABLE_SERVICE_ACCOUNTS_SUFFIX,
      "true");
  // Use the service account Json key file shared with the GCS connector.
  String serviceAccountJsonKeyFilePath = hadoopConf.get(
      EntriesCredentialConfiguration.BASE_KEY_PREFIX +
          EntriesCredentialConfiguration.JSON_KEYFILE_SUFFIX);
  Preconditions.checkArgument(!Strings.isNullOrEmpty(serviceAccountJsonKeyFilePath),
      "Service account Json key file path must be specified");

  // This will create a subscription to the given topic.
  JavaReceiverInputDStream<SparkPubsubMessage> pubSubStream = PubsubUtils.createStream(
      jsc,
      args[0], // GCP project ID
      args[1], // Cloud PubSub subscription
      new SparkGCPCredentials.Builder()
          .jsonServiceAccount(serviceAccountJsonKeyFilePath)
          .build(),
      StorageLevel.MEMORY_AND_DISK_SER());

  JavaPairDStream<String, Long> wordCounts = pubSubStream
      .mapToPair(message -> new Tuple2<>(new String(message.getData()), 1L))
      .reduceByKey((count1, count2) -> count1 + count2);

  final String gcsFilePathTemplate = args[2] + "/batch-%d";
  wordCounts
      .mapToPair(tuple -> new Tuple2<>(new Text(tuple._1), new LongWritable(tuple._2)))
      .foreachRDD(rdd -> rdd
          .saveAsNewAPIHadoopFile(String.format(gcsFilePathTemplate, rdd.id()),
              Text.class,
              LongWritable.class,
              TextOutputFormat.class));

  try {
    jsc.start();
    // Let the job run for the given duration and then terminate it.
    jsc.awaitTerminationOrTimeout(TimeUnit.SECONDS.toMillis(Long.parseLong(args[3])));
  } finally {
    jsc.stop(true, true);
  }
}
 
Example #3
Source File: JavaStreamingTestExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length != 3) {
    System.err.println("Usage: JavaStreamingTestExample " +
      "<dataDir> <batchDuration> <numBatchesTimeout>");
      System.exit(1);
  }

  String dataDir = args[0];
  Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
  int numBatchesTimeout = Integer.parseInt(args[2]);

  SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
  JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);

  ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());

  // $example on$
  JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(
    new Function<String, BinarySample>() {
      @Override
      public BinarySample call(String line) {
        String[] ts = line.split(",");
        boolean label = Boolean.parseBoolean(ts[0]);
        double value = Double.parseDouble(ts[1]);
        return new BinarySample(label, value);
      }
    });

  StreamingTest streamingTest = new StreamingTest()
    .setPeacePeriod(0)
    .setWindowSize(0)
    .setTestMethod("welch");

  JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
  out.print();
  // $example off$

  // Stop processing if test becomes significant or we time out
  timeoutCounter = numBatchesTimeout;

  out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() {
    @Override
    public void call(JavaRDD<StreamingTestResult> rdd) {
      timeoutCounter -= 1;

      boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() {
        @Override
        public Boolean call(StreamingTestResult v) {
          return v.pValue() < 0.05;
        }
      }).isEmpty();

      if (timeoutCounter <= 0 || anySignificant) {
        rdd.context().stop();
      }
    }
  });

  ssc.start();
  ssc.awaitTermination();
}
 
Example #4
Source File: AbstractJavaEsSparkStreamingTest.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Before
public void createStreamingContext() throws Exception {
    ssc = new JavaStreamingContext(sc, Seconds.apply(1));
}
 
Example #5
Source File: AbstractJavaEsSparkStreamingTest.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Before
public void createStreamingContext() throws Exception {
    ssc = new JavaStreamingContext(sc, Seconds.apply(1));
}