Java Code Examples for org.apache.spark.streaming.api.java.JavaDStream#filter()
The following examples show how to use
org.apache.spark.streaming.api.java.JavaDStream#filter() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RealtimeTrafficDataProcessor.java From lambda-arch with Apache License 2.0 | 5 votes |
/** * Method to get the vehicles which are in radius of POI and their distance from POI. * * @param nonFilteredIotDataStream original IoT data stream * @param broadcastPOIValues variable containing POI coordinates, route and vehicle types to monitor. */ public void processPOIData( JavaDStream<IoTData> nonFilteredIotDataStream, Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues ) { // Filter by routeId,vehicleType and in POI range JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream .filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2()) && iot.getVehicleType().contains(broadcastPOIValues.value()._3()) && GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()), Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(), broadcastPOIValues.value()._1().getLongitude(), broadcastPOIValues.value()._1().getRadius()))); // pair with poi JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered.mapToPair( iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1()) ); // Transform to dstream of POITrafficData JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc); // Map Cassandra table column Map<String, String> columnNameMappings = new HashMap<String, String>(); columnNameMappings.put("vehicleId", "vehicleid"); columnNameMappings.put("distance", "distance"); columnNameMappings.put("vehicleType", "vehicletype"); columnNameMappings.put("timeStamp", "timestamp"); // call CassandraStreamingJavaUtil function to save in DB javaFunctions(trafficDStream) .writerBuilder( "traffickeyspace", "poi_traffic", CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings) ) .withConstantTTL(120)//keeping data for 2 minutes .saveToCassandra(); }
Example 2
Source File: IoTTrafficDataProcessor.java From iot-traffic-monitor with Apache License 2.0 | 5 votes |
/** * Method to get the vehicles which are in radius of POI and their distance from POI. * * @param nonFilteredIotDataStream original IoT data stream * @param broadcastPOIValues variable containing POI coordinates, route and vehicle types to monitor. */ public void processPOIData(JavaDStream<IoTData> nonFilteredIotDataStream,Broadcast<Tuple3<POIData, String, String>> broadcastPOIValues) { // Filter by routeId,vehicleType and in POI range JavaDStream<IoTData> iotDataStreamFiltered = nonFilteredIotDataStream .filter(iot -> (iot.getRouteId().equals(broadcastPOIValues.value()._2()) && iot.getVehicleType().contains(broadcastPOIValues.value()._3()) && GeoDistanceCalculator.isInPOIRadius(Double.valueOf(iot.getLatitude()), Double.valueOf(iot.getLongitude()), broadcastPOIValues.value()._1().getLatitude(), broadcastPOIValues.value()._1().getLongitude(), broadcastPOIValues.value()._1().getRadius()))); // pair with poi JavaPairDStream<IoTData, POIData> poiDStreamPair = iotDataStreamFiltered .mapToPair(iot -> new Tuple2<>(iot, broadcastPOIValues.value()._1())); // Transform to dstream of POITrafficData JavaDStream<POITrafficData> trafficDStream = poiDStreamPair.map(poiTrafficDataFunc); // Map Cassandra table column Map<String, String> columnNameMappings = new HashMap<String, String>(); columnNameMappings.put("vehicleId", "vehicleid"); columnNameMappings.put("distance", "distance"); columnNameMappings.put("vehicleType", "vehicletype"); columnNameMappings.put("timeStamp", "timestamp"); // call CassandraStreamingJavaUtil function to save in DB javaFunctions(trafficDStream) .writerBuilder("traffickeyspace", "poi_traffic",CassandraJavaUtil.mapToRow(POITrafficData.class, columnNameMappings)) .withConstantTTL(120)//keeping data for 2 minutes .saveToCassandra(); }
Example 3
Source File: ComputeStreamingResponse.java From incubator-retired-pirk with Apache License 2.0 | 4 votes |
/** * Method to read in the data from an allowed input format, filter, and return a RDD of MapWritable data elements */ @SuppressWarnings("unchecked") public JavaDStream<MapWritable> readData() throws IOException, PIRException { logger.info("Reading data "); Job job = Job.getInstance(); String baseQuery = SystemConfiguration.getProperty("pir.baseQuery"); String jobName = "pirSpark_base_" + baseQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().setBoolean("mapreduce.input.fileinputformat.input.dir.recursive", true); job.getConfiguration().set("query", baseQuery); job.getConfiguration().set("dataSchemaName", qSchema.getDataSchemaName()); job.getConfiguration().set("data.schemas", SystemConfiguration.getProperty("data.schemas")); // Set the inputFormatClass based upon the baseInputFormat property String classString = SystemConfiguration.getProperty("pir.baseInputFormat"); Class<? extends BaseInputFormat<Text,MapWritable>> inputClass; try { inputClass = (Class<? extends BaseInputFormat<Text,MapWritable>>) Class.forName(classString); } catch (ClassNotFoundException | ClassCastException e) { throw new PIRException(classString + " cannot be instantiated or does not extend BaseInputFormat", e); } job.setInputFormatClass(inputClass); FileInputFormat.setInputPaths(job, inputData); // Read data from hdfs logger.info("useQueueStream = " + useQueueStream); JavaDStream<MapWritable> mwStream; if (useQueueStream) { Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>(); JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), inputClass, Text.class, MapWritable.class).values() .coalesce(numDataPartitions); rddQueue.add(rddIn); mwStream = jssc.queueStream(rddQueue); } else { JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, inputClass); mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>() { private static final long serialVersionUID = 1L; @Override public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception { return pair.values(); } }).repartition(numDataPartitions); } // Filter out by the provided stopListFile entries if (qSchema.getFilter() != null) { return mwStream.filter(new FilterData(accum, bVars)); } return mwStream; }
Example 4
Source File: ComputeStreamingResponse.java From incubator-retired-pirk with Apache License 2.0 | 4 votes |
/** * Method to read in the data from elasticsearch, filter, and return a RDD of MapWritable data elements */ @SuppressWarnings("unchecked") public JavaDStream<MapWritable> readDataES() throws IOException { logger.info("Reading data "); Job job = Job.getInstance(); String jobName = "pirSpark_ES_" + esQuery + "_" + System.currentTimeMillis(); job.setJobName(jobName); job.getConfiguration().set("es.nodes", SystemConfiguration.getProperty("es.nodes")); job.getConfiguration().set("es.port", SystemConfiguration.getProperty("es.port")); job.getConfiguration().set("es.resource", esResource); job.getConfiguration().set("es.query", esQuery); // Read data from hdfs JavaDStream<MapWritable> mwStream; if (useQueueStream) { Queue<JavaRDD<MapWritable>> rddQueue = new LinkedList<>(); JavaRDD<MapWritable> rddIn = jssc.sparkContext().newAPIHadoopRDD(job.getConfiguration(), EsInputFormat.class, Text.class, MapWritable.class).values() .coalesce(numDataPartitions); rddQueue.add(rddIn); mwStream = jssc.queueStream(rddQueue); } else { JavaPairInputDStream<Text,MapWritable> inputRDD = jssc.fileStream(inputData, Text.class, MapWritable.class, EsInputFormat.class); mwStream = inputRDD.transform(new Function<JavaPairRDD<Text,MapWritable>,JavaRDD<MapWritable>>() { private static final long serialVersionUID = 1L; @Override public JavaRDD<MapWritable> call(JavaPairRDD<Text,MapWritable> pair) throws Exception { return pair.values(); } }).repartition(numDataPartitions); } // Filter out by the provided stopListFile entries if (qSchema.getFilter() != null) { return mwStream.filter(new FilterData(accum, bVars)); } else { return mwStream; } }