org.apache.spark.streaming.Duration Java Examples

The following examples show how to use org.apache.spark.streaming.Duration. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: StreamingRsvpsDStreamCountWindow.java    From -Data-Stream-Development-with-Apache-Spark-Kafka-and-Spring-Boot with MIT License 6 votes vote down vote up
public static void main(String[] args) throws InterruptedException {

        System.setProperty("hadoop.home.dir", HADOOP_HOME_DIR_VALUE);

        final SparkConf conf = new SparkConf()
                .setMaster(RUN_LOCAL_WITH_AVAILABLE_CORES)
                .setAppName(APPLICATION_NAME)
                .set("spark.mongodb.output.uri", MONGODB_OUTPUT_URI)
                .set("spark.streaming.kafka.consumer.cache.enabled", "false");

        final JavaStreamingContext streamingContext
                = new JavaStreamingContext(conf, new Duration(BATCH_DURATION_INTERVAL_MS));

        streamingContext.checkpoint(CHECKPOINT_FOLDER);

        final JavaInputDStream<ConsumerRecord<String, String>> meetupStream =
                KafkaUtils.createDirectStream(
                        streamingContext,
                        LocationStrategies.PreferConsistent(),
                        ConsumerStrategies.<String, String>Subscribe(TOPICS, KAFKA_CONSUMER_PROPERTIES)
                );
                
        // transformations, streaming algorithms, etc
        JavaDStream<Long> countStream  
            = meetupStream.countByWindow(
                 new Duration(WINDOW_LENGTH_MS), 
                 new Duration(SLIDING_INTERVAL_MS));        

        countStream.foreachRDD((JavaRDD<Long> countRDD) -> {                
            MongoSpark.save(        
                    countRDD.map(
                        r -> Document.parse("{\"rsvps_count\":\"" + String.valueOf(r) + "\"}")
                    )
            );            
        });
        
        // some time later, after outputs have completed
        meetupStream.foreachRDD((JavaRDD<ConsumerRecord<String, String>> meetupRDD) -> {        
            OffsetRange[] offsetRanges = ((HasOffsetRanges) meetupRDD.rdd()).offsetRanges();            

            ((CanCommitOffsets) meetupStream.inputDStream())
                .commitAsync(offsetRanges, new MeetupOffsetCommitCallback());
        });
        
        streamingContext.start();
        streamingContext.awaitTermination();    
    }
 
Example #2
Source File: SparkStreamingBinding.java    From datacollector with Apache License 2.0 6 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public JavaStreamingContext create() {
  sparkConf.set("spark.streaming.kafka.maxRatePerPartition", String.valueOf(maxRatePerPartition));
  // Use our classpath first, since we ship a newer version of Jackson and possibly other deps in the future.
  sparkConf.set("spark.driver.userClassPathFirst", "true");
  sparkConf.set("spark.executor.userClassPathFirst", "true");

  session = SparkSession.builder().config(sparkConf).getOrCreate();

  JavaStreamingContext result =
      new JavaStreamingContext(new JavaSparkContext(session.sparkContext()), new Duration(duration));
  Map<String, Object> props = new HashMap<>();

  props.put("group.id", groupId);
  props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
  props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
  for (Map.Entry<String, Object> map : props.entrySet()) {
    logMessage(Utils.format("Adding extra kafka config, {}:{}", map.getKey(), map.getValue()), isRunningInMesos);
  }

  logMessage("Meta data broker list " + metaDataBrokerList, isRunningInMesos);
  logMessage("Topic is " + topic, isRunningInMesos);
  logMessage("Auto offset reset is set to " + autoOffsetValue, isRunningInMesos);
  return createDStream(result, props);
}
 
Example #3
Source File: Flags.java    From SparkToParquet with Apache License 2.0 6 votes vote down vote up
public static void setFromCommandLineArgs(Options options, String[] args) {
	CommandLineParser parser = new PosixParser();
	try {
		CommandLine cl = parser.parse(options, args);
		// 参数默认值
		THE_INSTANCE.windowLength = new Duration(
				Integer.parseInt(cl.getOptionValue(AppMain.WINDOW_LENGTH, "30")) * 1000);
		THE_INSTANCE.slideInterval = new Duration(
				Integer.parseInt(cl.getOptionValue(AppMain.SLIDE_INTERVAL, "5")) * 1000);
		THE_INSTANCE.kafka_broker = cl.getOptionValue(AppMain.KAFKA_BROKER, "kafka:9092");
		THE_INSTANCE.kafka_topic = cl.getOptionValue(AppMain.KAFKA_TOPIC, "apache");
		THE_INSTANCE.parquet_file = cl.getOptionValue(AppMain.PARQUET_FILE, "/user/spark/");
		THE_INSTANCE.initialized = true;
	} catch (ParseException e) {
		THE_INSTANCE.initialized = false;
		System.err.println("Parsing failed.  Reason: " + e.getMessage());
	}
}
 
Example #4
Source File: StreamingContextConfiguration.java    From Decision with Apache License 2.0 6 votes vote down vote up
private JavaStreamingContext create(String streamingContextName, int port, long streamingBatchTime, String sparkHost) {
    SparkConf conf = new SparkConf();
    conf.set("spark.ui.port", String.valueOf(port));
    conf.setAppName(streamingContextName);
    conf.setJars(JavaStreamingContext.jarOfClass(StreamingEngine.class));
    conf.setMaster(sparkHost);

    conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
    conf.registerKryoClasses(new Class[] { StratioStreamingMessage.class, InsertMessage.class, ColumnType.class,
            Action.class});


    HashMap<String, String> tuningProperties = configurationContext.getSparkTunningProperties();
    if (tuningProperties != null && tuningProperties.size() > 0) {
        tuningProperties.forEach( (key, value) ->  conf.set(key, value));
    }

    JavaStreamingContext streamingContext = new JavaStreamingContext(conf, new Duration(streamingBatchTime));

    return streamingContext;
}
 
Example #5
Source File: SparkStreamServiceImpl.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
@Override
public void setup() {
	// Create a StreamingContext with a SparkConf configuration
	SparkConf sparkConf = new SparkConf(false)
			.setAppName("JaiSpark")
			.setSparkHome("target/sparkhome")
			.setMaster("local")
			.set("spark.executor.memory", "128m")
			.set("spark.local.dir",
					new File("target/sparkhome/tmp").getAbsolutePath())
			.set("spark.cores.max", "2").set("spark.akka.threads", "2")
			.set("spark.akka.timeout", "60").set("spark.logConf", "true")
			.set("spark.cleaner.delay", "3700")
			.set("spark.cleaner.ttl", "86400")
			.set("spark.shuffle.spill", "false")
			.set("spark.driver.host", "localhost")
			.set("spark.driver.port", "43214");
	jssc = new JavaStreamingContext(sparkConf, new Duration(5000));

	String checkpointDir = hadoopClusterService.getHDFSUri()
			+ "/sparkcheckpoint";
	jssc.checkpoint(checkpointDir);
	startFlumeStream();
}
 
Example #6
Source File: AbstractSparkLayer.java    From oryx with Apache License 2.0 5 votes vote down vote up
protected final JavaStreamingContext buildStreamingContext() {
  log.info("Starting SparkContext with interval {} seconds", generationIntervalSec);

  SparkConf sparkConf = new SparkConf();

  // Only for tests, really
  if (sparkConf.getOption("spark.master").isEmpty()) {
    log.info("Overriding master to {} for tests", streamingMaster);
    sparkConf.setMaster(streamingMaster);
  }
  // Only for tests, really
  if (sparkConf.getOption("spark.app.name").isEmpty()) {
    String appName = "Oryx" + getLayerName();
    if (id != null) {
      appName = appName + '-' + id;
    }
    log.info("Overriding app name to {} for tests", appName);
    sparkConf.setAppName(appName);
  }
  extraSparkConfig.forEach((key, value) -> sparkConf.setIfMissing(key, value.toString()));

  // Turn this down to prevent long blocking at shutdown
  sparkConf.setIfMissing(
      "spark.streaming.gracefulStopTimeout",
      Long.toString(TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS)));
  sparkConf.setIfMissing("spark.cleaner.ttl", Integer.toString(20 * generationIntervalSec));
  long generationIntervalMS =
      TimeUnit.MILLISECONDS.convert(generationIntervalSec, TimeUnit.SECONDS);

  JavaSparkContext jsc = JavaSparkContext.fromSparkContext(SparkContext.getOrCreate(sparkConf));
  return new JavaStreamingContext(jsc, new Duration(generationIntervalMS));
}
 
Example #7
Source File: JavaHBaseStreamingBulkPutExample.java    From learning-hadoop with Apache License 2.0 5 votes vote down vote up
public static void main(String args[]) {
  if (args.length == 0) {
    System.out
        .println("JavaHBaseBulkPutExample  {master} {host} {post} {tableName} {columnFamily}");
  }

  String master = args[0];
  String host = args[1];
  String port = args[2];
  String tableName = args[3];
  String columnFamily = args[4];

  System.out.println("master:" + master);
  System.out.println("host:" + host);
  System.out.println("port:" + Integer.parseInt(port));
  System.out.println("tableName:" + tableName);
  System.out.println("columnFamily:" + columnFamily);
  
  SparkConf sparkConf = new SparkConf();
  sparkConf.set("spark.cleaner.ttl", "120000");
  
  JavaSparkContext jsc = new JavaSparkContext(master,
      "JavaHBaseBulkPutExample");
  jsc.addJar("SparkHBase.jar");
  
  JavaStreamingContext jssc = new JavaStreamingContext(jsc, new Duration(1000));

  JavaReceiverInputDStream<String> javaDstream = jssc.socketTextStream(host, Integer.parseInt(port));
  
  Configuration conf = HBaseConfiguration.create();
  conf.addResource(new Path("/etc/hbase/conf/core-site.xml"));
  conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

  JavaHBaseContext hbaseContext = new JavaHBaseContext(jsc, conf);

  hbaseContext.streamBulkPut(javaDstream, tableName, new PutFunction(), true);
}
 
Example #8
Source File: SparkRunnerStreamingContextFactory.java    From beam with Apache License 2.0 5 votes vote down vote up
@Override
public JavaStreamingContext call() throws Exception {
  LOG.info("Creating a new Spark Streaming Context");
  // validate unbounded read properties.
  checkArgument(
      options.getMinReadTimeMillis() < options.getBatchIntervalMillis(),
      "Minimum read time has to be less than batch time.");
  checkArgument(
      options.getReadTimePercentage() > 0 && options.getReadTimePercentage() < 1,
      "Read time percentage is bound to (0, 1).");

  SparkPipelineTranslator translator =
      new StreamingTransformTranslator.Translator(new TransformTranslator.Translator());
  Duration batchDuration = new Duration(options.getBatchIntervalMillis());
  LOG.info("Setting Spark streaming batchDuration to {} msec", batchDuration.milliseconds());

  JavaSparkContext jsc = SparkContextFactory.getSparkContext(options);
  JavaStreamingContext jssc = new JavaStreamingContext(jsc, batchDuration);

  // We must first init accumulators since translators expect them to be instantiated.
  SparkRunner.initAccumulators(options, jsc);
  // do not need to create a MetricsPusher instance here because if is called in SparkRunner.run()

  EvaluationContext ctxt = new EvaluationContext(jsc, pipeline, options, jssc);
  // update cache candidates
  SparkRunner.updateCacheCandidates(pipeline, translator, ctxt);
  pipeline.traverseTopologically(new SparkRunner.Evaluator(translator, ctxt));
  ctxt.computeOutputs();

  checkpoint(jssc, checkpointDir);

  return jssc;
}
 
Example #9
Source File: SparkGroupAlsoByWindowViaWindowSet.java    From beam with Apache License 2.0 5 votes vote down vote up
private static void checkpointIfNeeded(
    final DStream<Tuple2<ByteArray, Tuple2<StateAndTimers, List<byte[]>>>> firedStream,
    final SerializablePipelineOptions options) {

  final Long checkpointDurationMillis = getBatchDuration(options);

  if (checkpointDurationMillis > 0) {
    firedStream.checkpoint(new Duration(checkpointDurationMillis));
  }
}
 
Example #10
Source File: KafkaInput.java    From envelope with Apache License 2.0 5 votes vote down vote up
@Override
public JavaDStream<?> getDStream() throws Exception {
  if (dStream == null) {
    JavaStreamingContext jssc = Contexts.getJavaStreamingContext();
    Map<TopicPartition, Long> lastOffsets = null;
    if (doesRecordProgress(config) && !usingKafkaManagedOffsets(config)) {
      lastOffsets = getLastOffsets();
    }

    if (lastOffsets != null) {
      dStream = KafkaUtils.createDirectStream(jssc, LocationStrategies.PreferConsistent(),
          ConsumerStrategies.Subscribe(topics, kafkaParams, lastOffsets));
    } else {
      dStream = KafkaUtils.createDirectStream(jssc, LocationStrategies.PreferConsistent(),
          ConsumerStrategies.Subscribe(topics, kafkaParams));
    }

    if (ConfigUtils.getOrElse(config, WINDOW_ENABLED_CONFIG, false)) {
      int windowDuration = config.getInt(WINDOW_MILLISECONDS_CONFIG);
      if (config.hasPath(WINDOW_SLIDE_MILLISECONDS_CONFIG)) {
        int slideDuration = config.getInt(WINDOW_SLIDE_MILLISECONDS_CONFIG);
        dStream = dStream.window(new Duration(windowDuration), new Duration(slideDuration));
      } else {
        dStream = dStream.window(new Duration(windowDuration));
      }
    }
  }

  return dStream;
}
 
Example #11
Source File: SparkStreamingFromNetworkExample.java    From SparkOnALog with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
  if (args.length < 3) {
    System.err.println("Usage: NetworkWordCount <master> <hostname> <port>\n" +
        "In local mode, <master> should be 'local[n]' with n > 1");
    System.exit(1);
  }

  // Create the context with a 1 second batch size
  JavaStreamingContext ssc = new JavaStreamingContext(args[0], "NetworkWordCount",
          new Duration(5000), System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));

  // Create a NetworkInputDStream on target ip:port and count the
  // words in input stream of \n delimited test (eg. generated by 'nc')
  JavaDStream<String> lines = ssc.socketTextStream(args[1], Integer.parseInt(args[2]));
  
  lines.map(new Function<String, String> () {

@Override
public String call(String arg0) throws Exception {
	System.out.println("arg0" + arg0);
	return arg0;
}}).print();
  
  lines.print();
  ssc.start();


}
 
Example #12
Source File: JavaCustomReceiver.java    From SparkDemo with MIT License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length < 2) {
    System.err.println("Usage: JavaCustomReceiver <hostname> <port>");
    System.exit(1);
  }

  StreamingExamples.setStreamingLogLevels();

  // Create the context with a 1 second batch size
  SparkConf sparkConf = new SparkConf().setAppName("JavaCustomReceiver");
  JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(1000));

  // Create an input stream with the custom receiver on target ip:port and count the
  // words in input stream of \n delimited text (eg. generated by 'nc')
  JavaReceiverInputDStream<String> lines = ssc.receiverStream(
    new JavaCustomReceiver(args[0], Integer.parseInt(args[1])));
  JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
    @Override
    public Iterator<String> call(String x) {
      return Arrays.asList(SPACE.split(x)).iterator();
    }
  });
  JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
    new PairFunction<String, String, Integer>() {
      @Override public Tuple2<String, Integer> call(String s) {
        return new Tuple2<>(s, 1);
      }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer i1, Integer i2) {
        return i1 + i2;
      }
    });

  wordCounts.print();
  ssc.start();
  ssc.awaitTermination();
}
 
Example #13
Source File: SparkStreamingJob.java    From zipkin-sparkstreaming with Apache License 2.0 5 votes vote down vote up
@Memoized
JavaStreamingContext jsc() {
  SparkConf conf = new SparkConf(true)
      .setMaster(master())
      .setAppName(getClass().getName());
  if (!jars().isEmpty()) conf.setJars(jars().toArray(new String[0]));
  for (Map.Entry<String, String> entry : conf().entrySet()) {
    conf.set(entry.getKey(), entry.getValue());
  }
  return new JavaStreamingContext(conf, new Duration(batchDuration()));
}
 
Example #14
Source File: SparkStreamingSqlEngine.java    From sylph with Apache License 2.0 5 votes vote down vote up
private static Serializable compile(String jobId, SqlFlow sqlFlow, ConnectorStore connectorStore, SparkJobConfig sparkJobConfig, URLClassLoader jobClassLoader)
        throws JVMException
{
    int batchDuration = sparkJobConfig.getSparkStreamingBatchDuration();
    final AtomicBoolean isCompile = new AtomicBoolean(true);
    final Supplier<StreamingContext> appGetter = (Supplier<StreamingContext> & Serializable) () -> {
        logger.info("========create spark StreamingContext mode isCompile = " + isCompile.get() + "============");
        SparkConf sparkConf = isCompile.get() ?
                new SparkConf().setMaster("local[*]").setAppName("sparkCompile")
                : new SparkConf();
        SparkSession sparkSession = SparkSession.builder().config(sparkConf).getOrCreate();
        StreamingContext ssc = new StreamingContext(sparkSession.sparkContext(), Duration.apply(batchDuration));

        //build sql
        SqlAnalyse analyse = new SparkStreamingSqlAnalyse(ssc, connectorStore, isCompile.get());
        try {
            buildSql(analyse, jobId, sqlFlow);
        }
        catch (Exception e) {
            throwsException(e);
        }
        return ssc;
    };

    JVMLauncher<Boolean> launcher = JVMLaunchers.<Boolean>newJvm()
            .setConsole((line) -> System.out.println(new Ansi().fg(YELLOW).a("[" + jobId + "] ").fg(GREEN).a(line).reset()))
            .setCallable(() -> {
                System.out.println("************ job start ***************");
                appGetter.get();
                return true;
            })
            .addUserURLClassLoader(jobClassLoader)
            .setClassLoader(jobClassLoader)
            .notDepThisJvmClassPath()
            .build();

    launcher.startAndGet();
    isCompile.set(false);
    return (Serializable) appGetter;
}
 
Example #15
Source File: SparkScheduler.java    From oodt with Apache License 2.0 5 votes vote down vote up
public SparkScheduler(JobQueue queue) {
    SparkConf conf = new SparkConf();
    conf.setMaster(System.getProperty("resource.runner.spark.host","local"));
    conf.setAppName("OODT Spark Job");

    URL location = SparkScheduler.class.getResource('/'+SparkScheduler.class.getName().replace('.', '/')+".class");
    conf.setJars(new String[]{"../lib/cas-resource-0.8-SNAPSHOT.jar"});
    sc = new SparkContext(conf);
    ssc = new StreamingContext(sc,new Duration(10000));
    this.queue = queue;
}
 
Example #16
Source File: ReaderWriterExample.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final String dbUrl = args[0];
        final String hostname = args[1];
        final String port = args[2];
        final String inTargetSchema = args[3];
        final String inTargetTable = args[4];

        SparkConf conf = new SparkConf();

        JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(500));

        JavaReceiverInputDStream<String> stream = ssc.socketTextStream(hostname, Integer.parseInt(port));

        SparkSession spark = SparkSession.builder().getOrCreate();

        // Create a SplicemachineContext based on the provided DB connection
        SplicemachineContext splicemachineContext = new SplicemachineContext(dbUrl);

        // Set target tablename and schemaname
        final String table = inTargetSchema + "." + inTargetTable;

        stream.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> {
            JavaRDD<Row> rowRDD = rdd.map((Function<String, Row>) s -> RowFactory.create(s));
            Dataset<Row> df = spark.createDataFrame(rowRDD, splicemachineContext.getSchema(table));

            splicemachineContext.insert(df, table);
        });

        ssc.start();
        ssc.awaitTermination();
    }
 
Example #17
Source File: ReaderWriterExample.java    From spliceengine with GNU Affero General Public License v3.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {

        final String dbUrl = args[0];
        final String hostname = args[1];
        final String port = args[2];
        final String inTargetSchema = args[3];
        final String inTargetTable = args[4];

        SparkConf conf = new SparkConf();

        JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(500));
        SpliceSpark.setContext(ssc.sparkContext());

        SparkSession spark = SpliceSpark.getSessionUnsafe();

        JavaReceiverInputDStream<String> stream = ssc.socketTextStream(hostname, Integer.parseInt(port));

        // Create a SplicemachineContext based on the provided DB connection
        SplicemachineContext splicemachineContext = new SplicemachineContext(dbUrl);

        // Set target tablename and schemaname
        final String table = inTargetSchema + "." + inTargetTable;

        stream.foreachRDD((VoidFunction<JavaRDD<String>>) rdd -> {
            JavaRDD<Row> rowRDD = rdd.map((Function<String, Row>) s -> RowFactory.create(s));
            Dataset<Row> df = spark.createDataFrame(rowRDD, splicemachineContext.getSchema(table));

            splicemachineContext.insert(df, table);
        });

        ssc.start();
        ssc.awaitTermination();
    }
 
Example #18
Source File: SparkUnboundedSource.java    From beam with Apache License 2.0 4 votes vote down vote up
private static void checkpointStream(JavaDStream<?> dStream, SparkPipelineOptions options) {
  long checkpointDurationMillis = options.getCheckpointDurationMillis();
  if (checkpointDurationMillis > 0) {
    dStream.checkpoint(new Duration(checkpointDurationMillis));
  }
}
 
Example #19
Source File: KafkaStreamRestHandler.java    From elasticsearch-rest-command with The Unlicense 4 votes vote down vote up
@Override
protected void handleRequest(RestRequest request, RestChannel channel, Client client)
		throws Exception {
	final String topic = request.param("topic", "");
	final boolean schema = request.paramAsBoolean("schema", false);
	final String master = request.param("masterAddress", "local");
	final String hdfs =  request.param("hdfs", "hdfs://localhost:50070");
	final String memory =  request.param("memory", "2g");
	final String appName = request.param("appName", "appName-"+topic);
	final int duration = request.paramAsInt("duration", 1000);
	
	Thread exec = new Thread(new Runnable(){

		@Override
		public void run() {
		
			SparkConf sparkConf = new SparkConf().setAppName(appName).setMaster(master).set("spark.executor.memory", memory);
			JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(duration));
			
			Map<String, Integer> topicMap = new HashMap<String, Integer>();
			topicMap.put(topic, 3);
			
			JavaPairReceiverInputDStream<String, byte[]> kafkaStream = KafkaUtils.createStream(jssc, String.class, byte[].class, 
						kafka.serializer.DefaultDecoder.class, kafka.serializer.DefaultDecoder.class, null, 
						topicMap,  StorageLevel.MEMORY_ONLY());
	
			//JobConf confHadoop = new JobConf();
			//confHadoop.set("mapred.output.compress", "true");
			//confHadoop.set("mapred.output.compression.codec", "com.hadoop.compression.lzo.LzopCodec");
	
			kafkaStream.saveAsHadoopFiles(hdfs, "seq", Text.class, BytesWritable.class, KafkaStreamSeqOutputFormat.class);
			
			topicContextMap.put(topic, jssc);
			jssc.start();		
			jssc.awaitTermination();
			
		}
	});
	
	exec.start();
	
	channel.sendResponse(new BytesRestResponse(RestStatus.OK, String.format("{\"topic\":\"%s\"}",  topic)));
	
	
}
 
Example #20
Source File: Throughput.java    From flink-perf with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	SparkConf conf = new SparkConf().setAppName("throughput").setMaster("local[8]");
	JavaStreamingContext ssc = new JavaStreamingContext(conf, new Duration(2000));

	JavaReceiverInputDStream<Tuple4<Long, Integer, Long, byte[]>> source = ssc.receiverStream(new Source(StorageLevel.MEMORY_ONLY()));
	JavaPairDStream<Long, Tuple3<Integer, Long, byte[]>> kvsource = source.mapToPair(new PairFunction<Tuple4<Long, Integer, Long, byte[]>, Long, Tuple3<Integer, Long, byte[]>>() {
		@Override
		public Tuple2<Long, Tuple3<Integer, Long, byte[]>> call(Tuple4<Long, Integer, Long, byte[]> longIntegerLongTuple4) throws Exception {
			return new Tuple2<Long, Tuple3<Integer, Long, byte[]>>(longIntegerLongTuple4._1(),
					new Tuple3<Integer, Long, byte[]>(longIntegerLongTuple4._2(), longIntegerLongTuple4._3(), longIntegerLongTuple4._4()));
		}
	});
	JavaDStream<Long> res = kvsource.repartition(3).mapPartitions(new FlatMapFunction<Iterator<Tuple2<Long,Tuple3<Integer,Long,byte[]>>>, Long>() {
		@Override
		public Iterable<Long> call(Iterator<Tuple2<Long, Tuple3<Integer, Long, byte[]>>> tuple2Iterator) throws Exception {
			long start = System.currentTimeMillis();
			long received = 0;
			while(tuple2Iterator.hasNext()) {
				received++;
				Tuple2<Long, Tuple3<Integer, Long, byte[]>> el = tuple2Iterator.next();

				if (el._2()._2() != 0) {
					long lat = System.currentTimeMillis() - el._2()._2();
					System.out.println("Latency " + lat + " ms");
				}
			}
			long sinceMs = (System.currentTimeMillis() - start);

			System.out.println("Finished Batch. Processed "+received+" elements in "+sinceMs+" ms.");

			return new Iterable<Long>() {
				@Override
				public Iterator<Long> iterator() {
					return new Iterator<Long>() {
						@Override
						public boolean hasNext() {
							return false;
						}

						@Override
						public Long next() {
							return 1L;
						}

						@Override
						public void remove() {

						}
					};
				}
			};
		}


	/*	@Override
		public Long call(Tuple2<Long, Tuple3<Integer, Long, byte[]>> v1) throws Exception {
			//	System.out.println("Recevied " + v1);
			if (start == 0) {

			}
			received++;
			if (received % logfreq == 0) {

				if (sinceSec == 0) {
					System.out.println("received " + received + " elements since 0");
					return 0L;
				}
				System.out.println("Received " + received + " elements since " + sinceSec + ". " +
						"Elements per second " + received / sinceSec + ", GB received " + ((received * (8 + 4 + 12)) / 1024 / 1024 / 1024));
			}
			if (v1._2()._2() != 0) {
				long lat = System.currentTimeMillis() - v1._2()._2();
				System.out.println("Latency " + lat + " ms");
			}

			return received;
		} */
	});


	//res.print();
	/*res.foreachRDD(new Function2<JavaRDD<Long>, Time, Void>() {
		@Override
		public Void call(JavaRDD<Long> integerJavaRDD, Time t) throws Exception {
			integerJavaRDD.saveAsTextFile("/home/robert/flink-workdir/flink-perf/out/"+t.toString());
			return null;
		}
	}); */
	res.print();
//	res.print();

	ssc.start();
}
 
Example #21
Source File: SparkStreamingFromFlumeToHBaseExample.java    From SparkOnALog with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: SparkStreamingFromFlumeToHBaseExample {master} {host} {port} {table} {columnFamily}");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);
	String tableName = args[3];
	String columnFamily = args[4];
	
	Duration batchInterval = new Duration(2000);

	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval,
			System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");
	
	final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName);
	final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	JavaPairDStream<String, Integer> lastCounts = flumeStream
			.flatMap(new FlatMapFunction<SparkFlumeEvent, String>() {

				@Override
				public Iterable<String> call(SparkFlumeEvent event)
						throws Exception {
					String bodyString = new String(event.event().getBody()
							.array(), "UTF-8");
					return Arrays.asList(bodyString.split(" "));
				}
			}).map(new PairFunction<String, String, Integer>() {
				@Override
				public Tuple2<String, Integer> call(String str)
						throws Exception {
					return new Tuple2(str, 1);
				}
			}).reduceByKey(new Function2<Integer, Integer, Integer>() {

				@Override
				public Integer call(Integer x, Integer y) throws Exception {
					// TODO Auto-generated method stub
					return x.intValue() + y.intValue();
				}
			});
			
			
			lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {

				@Override
				public Void call(JavaPairRDD<String, Integer> values,
						Time time) throws Exception {
					
					values.foreach(new VoidFunction<Tuple2<String, Integer>> () {

						@Override
						public void call(Tuple2<String, Integer> tuple)
								throws Exception {
							HBaseCounterIncrementor incrementor = 
									HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value());
							incrementor.incerment("Counter", tuple._1(), tuple._2());
							System.out.println("Counter:" + tuple._1() + "," + tuple._2());
							
						}} );
					
					return null;
				}});
	
	

	sc.start();

}
 
Example #22
Source File: SparkStreamingFromFlumeToHBaseWindowingExample.java    From SparkOnALog with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: SparkStreamingFromFlumeToHBaseWindowingExample {master} {host} {port} {table} {columnFamily} {windowInSeconds} {slideInSeconds");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);
	String tableName = args[3];
	String columnFamily = args[4];
	int windowInSeconds = Integer.parseInt(args[5]);
	int slideInSeconds = Integer.parseInt(args[5]);
	
	Duration batchInterval = new Duration(2000);
	Duration windowInterval = new Duration(windowInSeconds * 1000);
	Duration slideInterval = new Duration(slideInSeconds * 1000);

	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval,
			System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");
	
	final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName);
	final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	
	JavaPairDStream<String, Integer> lastCounts = flumeStream
			.flatMap(new FlatMapFunction<SparkFlumeEvent, String>() {

				@Override
				public Iterable<String> call(SparkFlumeEvent event)
						throws Exception {
					String bodyString = new String(event.event().getBody()
							.array(), "UTF-8");
					return Arrays.asList(bodyString.split(" "));
				}
			}).map(new PairFunction<String, String, Integer>() {
				@Override
				public Tuple2<String, Integer> call(String str)
						throws Exception {
					return new Tuple2(str, 1);
				}
			}).reduceByKeyAndWindow(new Function2<Integer, Integer, Integer>() {

				@Override
				public Integer call(Integer x, Integer y) throws Exception {
					// TODO Auto-generated method stub
					return x.intValue() + y.intValue();
				}
			}, windowInterval, slideInterval);
			
			
			lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {

				@Override
				public Void call(JavaPairRDD<String, Integer> values,
						Time time) throws Exception {
					
					values.foreach(new VoidFunction<Tuple2<String, Integer>> () {

						@Override
						public void call(Tuple2<String, Integer> tuple)
								throws Exception {
							HBaseCounterIncrementor incrementor = 
									HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value());
							incrementor.incerment("Counter", tuple._1(), tuple._2());
							System.out.println("Counter:" + tuple._1() + "," + tuple._2());
							
						}} );
					
					return null;
				}});
	
	

	sc.start();

}
 
Example #23
Source File: SparkUnboundedSource.java    From beam with Apache License 2.0 4 votes vote down vote up
@Override
public Duration slideDuration() {
  return parent.slideDuration();
}
 
Example #24
Source File: SparkRunner.java    From jaeger-analytics-java with Apache License 2.0 4 votes vote down vote up
public static void main(String []args) throws InterruptedException, IOException {
  HTTPServer server = new HTTPServer(Integer.valueOf(getPropOrEnv("PROMETHEUS_PORT", "9111")));

  SparkConf sparkConf = new SparkConf()
      .setAppName("Trace DSL")
      .setMaster(getPropOrEnv("SPARK_MASTER","local[*]"));

  JavaSparkContext sc = new JavaSparkContext(sparkConf);
  JavaStreamingContext ssc = new JavaStreamingContext(sc, new Duration(Integer.parseInt(getPropOrEnv("SPARK_STREAMING_BATCH_DURATION", "5000"))));

  Set<String> topics = Collections.singleton(getPropOrEnv("KAFKA_JAEGER_TOPIC", "jaeger-spans"));
  Map<String, Object> kafkaParams = new HashMap<>();
  kafkaParams.put("bootstrap.servers", getPropOrEnv("KAFKA_BOOTSTRAP_SERVER", "localhost:9092"));
  kafkaParams.put("key.deserializer", StringDeserializer.class);
  kafkaParams.put("value.deserializer", ProtoSpanDeserializer.class);
  // hack to start always from beginning
  kafkaParams.put("group.id", "jaeger-trace-aggregation-" + System.currentTimeMillis());

  if (Boolean.parseBoolean(getPropOrEnv("KAFKA_START_FROM_BEGINNING", "true"))) {
    kafkaParams.put("auto.offset.reset", "earliest");
    kafkaParams.put("enable.auto.commit", false);
    kafkaParams.put("startingOffsets", "earliest");
  }

  JavaInputDStream<ConsumerRecord<String, Span>> messages =
      KafkaUtils.createDirectStream(
          ssc,
          LocationStrategies.PreferConsistent(),
          ConsumerStrategies.Subscribe(topics, kafkaParams));

  JavaPairDStream<String, Span> traceIdSpanTuple = messages.mapToPair(record -> {
    return new Tuple2<>(record.value().traceId, record.value());
  });

 JavaDStream<Trace> tracesStream = traceIdSpanTuple.groupByKey().map(traceIdSpans -> {
   System.out.printf("traceID: %s\n", traceIdSpans._1);
    Iterable<Span> spans = traceIdSpans._2();
    Trace trace = new Trace();
    trace.traceId = traceIdSpans._1();
    trace.spans = StreamSupport.stream(spans.spliterator(), false)
        .collect(Collectors.toList());
    return trace;
  });

  MinimumClientVersion minimumClientVersion = MinimumClientVersion.builder()
      .withJavaVersion(getPropOrEnv("TRACE_QUALITY_JAVA_VERSION", "1.0.0"))
      .withGoVersion(getPropOrEnv("TRACE_QUALITY_GO_VERSION", "2.22.0"))
      .withNodeVersion(getPropOrEnv("TRACE_QUALITY_NODE_VERSION", "3.17.1"))
      .withPythonVersion(getPropOrEnv("TRACE_QUALITY_PYTHON_VERSION", "4.0.0"))
      .build();

  List<ModelRunner> modelRunner = Arrays.asList(
      new TraceHeight(),
      new ServiceDepth(),
      new ServiceHeight(),
      new NetworkLatency(),
      new NumberOfErrors(),
      new DirectDependencies(),
      // trace quality
      minimumClientVersion,
      new HasClientServerSpans(),
      new UniqueSpanId());

  tracesStream.foreachRDD((traceRDD, time) -> {
    traceRDD.foreach(trace -> {
      Graph graph = GraphCreator.create(trace);

      for (ModelRunner model: modelRunner) {
        model.runWithMetrics(graph);
      }
    });
  });

  ssc.start();
  ssc.awaitTermination();
}
 
Example #25
Source File: Flags.java    From SparkToParquet with Apache License 2.0 4 votes vote down vote up
public Duration getSlideInterval() {
	return slideInterval;
}
 
Example #26
Source File: Flags.java    From SparkToParquet with Apache License 2.0 4 votes vote down vote up
public Duration getWindowLength() {
	return windowLength;
}
 
Example #27
Source File: KafkaReceiverWordCountJava.java    From Building-Data-Streaming-Applications-with-Apache-Kafka with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    String zkQuorum = "localhost:2181";
    String groupName = "stream";
    int numThreads = 3;
    String topicsName = "test1";
    SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream");

    JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000));

    Map<String, Integer> topicToBeUsedBySpark = new HashMap<>();
    String[] topics = topicsName.split(",");
    for (String topic : topics) {
        topicToBeUsedBySpark.put(topic, numThreads);
    }

    JavaPairReceiverInputDStream<String, String> streamMessages =
            KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark);

    JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(WORD_DELIMETER.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
            new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                }
            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    javaStreamingContext.start();
    javaStreamingContext.awaitTermination();
}
 
Example #28
Source File: JavaKafkaWordCount.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length < 4) {
    System.err.println("Usage: JavaKafkaWordCount <zkQuorum> <group> <topics> <numThreads>");
    System.exit(1);
  }

  StreamingExamples.setStreamingLogLevels();
  SparkConf sparkConf = new SparkConf().setAppName("JavaKafkaWordCount");
  // Create the context with 2 seconds batch size
  JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, new Duration(2000));

  int numThreads = Integer.parseInt(args[3]);
  Map<String, Integer> topicMap = new HashMap<>();
  String[] topics = args[2].split(",");
  for (String topic: topics) {
    topicMap.put(topic, numThreads);
  }

  JavaPairReceiverInputDStream<String, String> messages =
          KafkaUtils.createStream(jssc, args[0], args[1], topicMap);

  JavaDStream<String> lines = messages.map(new Function<Tuple2<String, String>, String>() {
    @Override
    public String call(Tuple2<String, String> tuple2) {
      return tuple2._2();
    }
  });

  JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
    @Override
    public Iterator<String> call(String x) {
      return Arrays.asList(SPACE.split(x)).iterator();
    }
  });

  JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
    new PairFunction<String, String, Integer>() {
      @Override
      public Tuple2<String, Integer> call(String s) {
        return new Tuple2<>(s, 1);
      }
    }).reduceByKey(new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer i1, Integer i2) {
        return i1 + i2;
      }
    });

  wordCounts.print();
  jssc.start();
  jssc.awaitTermination();
}
 
Example #29
Source File: JavaStreamingTestExample.java    From SparkDemo with MIT License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length != 3) {
    System.err.println("Usage: JavaStreamingTestExample " +
      "<dataDir> <batchDuration> <numBatchesTimeout>");
      System.exit(1);
  }

  String dataDir = args[0];
  Duration batchDuration = Seconds.apply(Long.parseLong(args[1]));
  int numBatchesTimeout = Integer.parseInt(args[2]);

  SparkConf conf = new SparkConf().setMaster("local").setAppName("StreamingTestExample");
  JavaStreamingContext ssc = new JavaStreamingContext(conf, batchDuration);

  ssc.checkpoint(Utils.createTempDir(System.getProperty("java.io.tmpdir"), "spark").toString());

  // $example on$
  JavaDStream<BinarySample> data = ssc.textFileStream(dataDir).map(
    new Function<String, BinarySample>() {
      @Override
      public BinarySample call(String line) {
        String[] ts = line.split(",");
        boolean label = Boolean.parseBoolean(ts[0]);
        double value = Double.parseDouble(ts[1]);
        return new BinarySample(label, value);
      }
    });

  StreamingTest streamingTest = new StreamingTest()
    .setPeacePeriod(0)
    .setWindowSize(0)
    .setTestMethod("welch");

  JavaDStream<StreamingTestResult> out = streamingTest.registerStream(data);
  out.print();
  // $example off$

  // Stop processing if test becomes significant or we time out
  timeoutCounter = numBatchesTimeout;

  out.foreachRDD(new VoidFunction<JavaRDD<StreamingTestResult>>() {
    @Override
    public void call(JavaRDD<StreamingTestResult> rdd) {
      timeoutCounter -= 1;

      boolean anySignificant = !rdd.filter(new Function<StreamingTestResult, Boolean>() {
        @Override
        public Boolean call(StreamingTestResult v) {
          return v.pValue() < 0.05;
        }
      }).isEmpty();

      if (timeoutCounter <= 0 || anySignificant) {
        rdd.context().stop();
      }
    }
  });

  ssc.start();
  ssc.awaitTermination();
}
 
Example #30
Source File: SylphKafkaOffset.java    From sylph with Apache License 2.0 4 votes vote down vote up
@Override
public Duration slideDuration()
{
    return parent.slideDuration();
}