Java Code Examples for org.apache.spark.api.java.function.Function2

The following are top voted examples for showing how to use org.apache.spark.api.java.function.Function2. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: big-data-benchmark   File: SparkWordCount.java   Source Code and License 13 votes vote down vote up
public static void main(String[] args) {
    if (args.length != 2) {
        System.err.println("Usage:");
        System.err.println("  SparkWordCount <sourceFile> <targetFile>");
        System.exit(1);
    }

    SparkConf conf = new SparkConf()
            .setAppName("Word Count");
    JavaSparkContext sc = new JavaSparkContext(conf);
    JavaRDD<String> textFile = sc.textFile(args[0]);
    JavaRDD<String> words = textFile.flatMap(LineIterator::new);
    JavaPairRDD<String, Long> pairs =
            words.mapToPair(s -> new Tuple2<>(s, 1L));
    JavaPairRDD<String, Long> counts =
            pairs.reduceByKey((Function2<Long, Long, Long>) (a, b) -> a + b);

    System.out.println("Starting task..");
    long t = System.currentTimeMillis();
    counts.saveAsTextFile(args[1] + "_" + t);
    System.out.println("Time=" + (System.currentTimeMillis() - t));
}
 
Example 2
Project: incubator-sdap-mudrod   File: CrawlerDetection.java   Source Code and License 6 votes vote down vote up
void checkByRateInParallel() throws InterruptedException, IOException {

    JavaRDD<String> userRDD = getUserRDD(this.httpType);
    LOG.info("Original User count: {}", userRDD.count());

    int userCount = 0;
    userCount = userRDD.mapPartitions((FlatMapFunction<Iterator<String>, Integer>) iterator -> {
      ESDriver tmpES = new ESDriver(props);
      tmpES.createBulkProcessor();
      List<Integer> realUserNums = new ArrayList<>();
      while (iterator.hasNext()) {
        String s = iterator.next();
        Integer realUser = checkByRate(tmpES, s);
        realUserNums.add(realUser);
      }
      tmpES.destroyBulkProcessor();
      tmpES.close();
      return realUserNums.iterator();
    }).reduce((Function2<Integer, Integer, Integer>) (a, b) -> a + b);

    LOG.info("User count: {}", Integer.toString(userCount));
  }
 
Example 3
Project: Sparkathon   File: PassingFunctions.java   Source Code and License 6 votes vote down vote up
public static void main(String[] args) {
    SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local");
    JavaSparkContext sc = new JavaSparkContext(conf);

    class GetLength implements Function<String, Integer> {
        public Integer call(String s) {
            return s.length();
        }
    }

    class Sum implements Function2<Integer, Integer, Integer> {
        public Integer call(Integer a, Integer b) {
            return a + b;
        }
    }

    JavaRDD<String> lines = sc.textFile("src/main/resources/compressed.gz");
    JavaRDD<Integer> lineLengths = lines.map(new GetLength());
    // Printing an RDD
    lineLengths.foreach(x-> System.out.println(x));

    int totalLength = lineLengths.reduce(new Sum());

    System.out.println(totalLength);
}
 
Example 4
Project: mudrod   File: CrawlerDetection.java   Source Code and License 6 votes vote down vote up
void checkByRateInParallel() throws InterruptedException, IOException {

    JavaRDD<String> userRDD = getUserRDD(this.httpType);
    LOG.info("Original User count: {}", userRDD.count());

    int userCount = 0;
    userCount = userRDD.mapPartitions((FlatMapFunction<Iterator<String>, Integer>) iterator -> {
      ESDriver tmpES = new ESDriver(props);
      tmpES.createBulkProcessor();
      List<Integer> realUserNums = new ArrayList<>();
      while (iterator.hasNext()) {
        String s = iterator.next();
        Integer realUser = checkByRate(tmpES, s);
        realUserNums.add(realUser);
      }
      tmpES.destroyBulkProcessor();
      tmpES.close();
      return realUserNums.iterator();
    }).reduce((Function2<Integer, Integer, Integer>) (a, b) -> a + b);

    LOG.info("User count: {}", Integer.toString(userCount));
  }
 
Example 5
Project: spark-newsreel-recommender   File: JavaMain.java   Source Code and License 6 votes vote down vote up
/**
 * Starts the spark context given a valid configuration.
 * starts a test map-reduce such that all spark workers can fetch dependencies in advance
 */
private static void startContext(int numOfWorkers) {
    JavaSparkContext sc = SharedService.getContext();

    for (int i=0; i<numOfWorkers;i++) {
        final int threadnumber = i;
        new Thread(){
            @Override
            public void run() {
                ImmutableList<Integer> range =
                        ContiguousSet.create(Range.closed(1, 5), DiscreteDomain.integers()).asList();

                JavaRDD<Integer> data = sc.parallelize(range).repartition(numOfWorkers);
                Integer result = data.reduce((Function2<Integer, Integer, Integer>)
                        (v1, v2) -> v1 + v2);
                if (result == 15)
                    log.info("successfully tested worker"+threadnumber);
                else
                    log.warn("worker "+threadnumber+" yielded a false result: "
                            +result+" (should be 15)");
            }
        }.start();
    }
}
 
Example 6
Project: vn.vitk   File: Tokenizer.java   Source Code and License 6 votes vote down vote up
/**
 * Counts the number of non-space characters in this data set. This utility method 
 * is used to check the tokenization result.
 * @param lines
 * @return number of characters
 */
int numCharacters(JavaRDD<String> lines) {
	JavaRDD<Integer> lengths = lines.map(new Function<String, Integer>() {
		private static final long serialVersionUID = -2189399343462982586L;
		@Override
		public Integer call(String line) throws Exception {
			line = line.replaceAll("[\\s_]+", "");
			return line.length();
		}
	});
	return lengths.reduce(new Function2<Integer, Integer, Integer>() {
		private static final long serialVersionUID = -8438072946884289401L;

		@Override
		public Integer call(Integer e0, Integer e1) throws Exception {
			return e0 + e1;
		}
	});
}
 
Example 7
Project: kylin   File: SparkCubingByLayer.java   Source Code and License 6 votes vote down vote up
private Long getRDDCountSum(JavaPairRDD<ByteArray, Object[]> rdd, final int countMeasureIndex) {
    final ByteArray ONE = new ByteArray();
    Long count = rdd.mapValues(new Function<Object[], Long>() {
        @Override
        public Long call(Object[] objects) throws Exception {
            return (Long) objects[countMeasureIndex];
        }
    }).reduce(new Function2<Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>, Tuple2<ByteArray, Long>>() {
        @Override
        public Tuple2<ByteArray, Long> call(Tuple2<ByteArray, Long> longTuple2, Tuple2<ByteArray, Long> longTuple22)
                throws Exception {
            return new Tuple2<>(ONE, longTuple2._2() + longTuple22._2());
        }
    })._2();
    return count;
}
 
Example 8
Project: SHMACK   File: WordCount.java   Source Code and License 6 votes vote down vote up
@SuppressWarnings("serial")
@Override
public SortedCounts<String> execute(final JavaSparkContext spark) {
	final JavaRDD<String> textFile = spark.textFile(inputFile);
	final JavaRDD<String> words = textFile.flatMap(new FlatMapFunction<String, String>() {
		@Override
		public Iterable<String> call(final String rawJSON) throws TwitterException {
			final Status tweet = TwitterObjectFactory.createStatus(rawJSON);
			String text = tweet.getText();
			return Arrays.asList(text.split(" "));
		}
	});
	final JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
		@Override
		public Tuple2<String, Integer> call(final String s) {
			return new Tuple2<String, Integer>(s.toLowerCase(), 1);
		}
	});
	final JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
		@Override
		public Integer call(final Integer a, final Integer b) {
			return a + b;
		}
	});
	return SortedCounts.create(counts);
}
 
Example 9
Project: GeoSpark   File: PolygonRDD.java   Source Code and License 6 votes vote down vote up
/**
 * Polygon union.
 *
 * @return the polygon
 */
public Polygon PolygonUnion() {
	Polygon result = this.rawSpatialRDD.reduce(new Function2<Polygon, Polygon, Polygon>() {
        public Polygon call(Polygon v1, Polygon v2) {
            //Reduce precision in JTS to avoid TopologyException
            PrecisionModel pModel = new PrecisionModel();
            GeometryPrecisionReducer pReducer = new GeometryPrecisionReducer(pModel);
            Geometry p1 = pReducer.reduce(v1);
            Geometry p2 = pReducer.reduce(v2);
            //Union two polygons
            Geometry polygonGeom = p1.union(p2);
            Coordinate[] coordinates = polygonGeom.getCoordinates();
            ArrayList<Coordinate> coordinateList = new ArrayList<Coordinate>(Arrays.asList(coordinates));
            Coordinate lastCoordinate = coordinateList.get(0);
            coordinateList.add(lastCoordinate);
            Coordinate[] coordinatesClosed = new Coordinate[coordinateList.size()];
            coordinatesClosed = coordinateList.toArray(coordinatesClosed);
            GeometryFactory fact = new GeometryFactory();
            LinearRing linear = new GeometryFactory().createLinearRing(coordinatesClosed);
            Polygon polygon = new Polygon(linear, null, fact);
            //Return the two polygon union result
            return polygon;
        }
    });
    return result;
}
 
Example 10
Project: GeoSpark   File: JoinQuery.java   Source Code and License 6 votes vote down vote up
private static <U extends Geometry, T extends Geometry> JavaPairRDD<U, Long> countGeometriesByKey(JavaPairRDD<U, T> input) {
    return input.aggregateByKey(
        0L,
        new Function2<Long, T, Long>() {

            @Override
            public Long call(Long count, T t) throws Exception {
                return count + 1;
            }
        },
        new Function2<Long, Long, Long>() {

            @Override
            public Long call(Long count1, Long count2) throws Exception {
                return count1 + count2;
            }
        });
}
 
Example 11
Project: GeoSpark   File: ShapefileRDD.java   Source Code and License 6 votes vote down vote up
/**
 * read and merge bound boxes of all shapefiles user input, if there is no, leave BoundBox null;
 */
public BoundBox getBoundBox(JavaSparkContext sc, String inputPath){
    // read bound boxes into memory
    JavaPairRDD<Long, BoundBox>  bounds = sc.newAPIHadoopFile(
            inputPath,
            BoundaryInputFormat.class,
            Long.class,
            BoundBox.class,
            sc.hadoopConfiguration()
    );
    // merge all into one
    bounds = bounds.reduceByKey(new Function2<BoundBox, BoundBox, BoundBox>(){
        @Override
        public BoundBox call(BoundBox box1, BoundBox box2) throws Exception {
            return BoundBox.mergeBoundBox(box1, box2);
        }
    });
    // if there is a result assign it to variable : boundBox
    if(bounds.count() > 0){
        return new BoundBox(bounds.collect().get(0)._2());
    }else return null;
}
 
Example 12
Project: GeoSpark   File: ShapefileReader.java   Source Code and License 6 votes vote down vote up
/**
 * read and merge bound boxes of all shapefiles user input, if there is no, leave BoundBox null;
 */
public static BoundBox readBoundBox(JavaSparkContext sc, String inputPath){
    // read bound boxes into memory
    JavaPairRDD<Long, BoundBox>  bounds = sc.newAPIHadoopFile(
            inputPath,
            BoundaryInputFormat.class,
            Long.class,
            BoundBox.class,
            sc.hadoopConfiguration()
    );
    // merge all into one
    bounds = bounds.reduceByKey(new Function2<BoundBox, BoundBox, BoundBox>(){
        @Override
        public BoundBox call(BoundBox box1, BoundBox box2) throws Exception {
            return BoundBox.mergeBoundBox(box1, box2);
        }
    });
    // if there is a result assign it to variable : boundBox
    if(bounds.count() > 0){
        return new BoundBox(bounds.collect().get(0)._2());
    }else return null;
}
 
Example 13
Project: stratio-connector-deep   File: QueryExecutorTest.java   Source Code and License 6 votes vote down vote up
@Before
public void before() throws Exception {

    queryExecutor = new QueryExecutor(deepContext, deepConnectionHandler);

    // Stubs
    when(deepConnectionHandler.getConnection(CLUSTERNAME_CONSTANT.getName())).thenReturn(deepConnection);
    when(deepConnection.getExtractorConfig()).thenReturn(extractorConfig);
    when(extractorConfig.clone()).thenReturn(extractorConfig);
    when(deepContext.createJavaRDD(any(ExtractorConfig.class))).thenReturn(singleRdd);
    when(deepContext.createHDFSRDD(any(ExtractorConfig.class))).thenReturn(rdd);
    when(rdd.toJavaRDD()).thenReturn(singleRdd);
    when(singleRdd.collect()).thenReturn(generateListOfCells(3));
    when(singleRdd.filter(any(Function.class))).thenReturn(singleRdd);
    when(singleRdd.map(any(FilterColumns.class))).thenReturn(singleRdd);
    when(singleRdd.mapToPair(any(PairFunction.class))).thenReturn(pairRdd);
    when(singleRdd.keyBy(any(Function.class))).thenReturn(pairRdd);
    when(pairRdd.join(pairRdd)).thenReturn(joinedRdd);
    when(pairRdd.reduceByKey(any(Function2.class))).thenReturn(pairRdd);
    when(pairRdd.map(any(Function.class))).thenReturn(singleRdd);
    when(joinedRdd.map(any(JoinCells.class))).thenReturn(singleRdd);


}
 
Example 14
Project: incubator-blur   File: BlurBulkLoadSparkProcessor.java   Source Code and License 6 votes vote down vote up
@Override
protected Function2<JavaPairRDD<String, RowMutation>, Time, Void> getFunction() {
  return new Function2<JavaPairRDD<String, RowMutation>, Time, Void>() {
    // Blur Thrift Client
    @Override
    public Void call(JavaPairRDD<String, RowMutation> rdd, Time time) throws Exception {
      Iface client = getBlurClient();
      for (Tuple2<String, RowMutation> tuple : rdd.collect()) {
        if (tuple != null) {
          try {
            RowMutation rm = tuple._2;
            // Index using enqueue mutate call
            client.enqueueMutate(rm);
          } catch (Exception ex) {
            LOG.error("Unknown error while trying to call enqueueMutate.", ex);
            throw ex;
          }
        }
      }
      return null;
    }
  };
}
 
Example 15
Project: AbstractRendering   File: GlyphsetRDD.java   Source Code and License 6 votes vote down vote up
@Override public Rectangle2D bounds() {
	final JavaRDD<Rectangle2D> rects;
	if (partitions) {
		rects = base.mapPartitions(
			new FlatMapFunction<Iterator<Glyph<G,I>>,Rectangle2D>() {
				public Iterable<Rectangle2D> call(Iterator<Glyph<G, I>> glyphs) throws Exception {
					ArrayList<Glyph<G,I>> glyphList = Lists.newArrayList(new IterableIterator<>(glyphs));
					return Arrays.asList(Util.bounds(glyphList));
				}});
	} else {
		rects = base.map(new Function<Glyph<G,I>,Rectangle2D>() {
			public Rectangle2D call(Glyph<G,I> glyph) throws Exception {
				return Util.boundOne(glyph.shape());
			}});
	}
	
	return rects.reduce(new Function2<Rectangle2D, Rectangle2D,Rectangle2D>() {
		public Rectangle2D call(Rectangle2D left, Rectangle2D right) throws Exception {
			return Util.bounds(left, right);
		}
	});

}
 
Example 16
Project: incubator-sdap-mudrod   File: SessionExtractor.java   Source Code and License 5 votes vote down vote up
/**
 * bulidDataQueryRDD: convert click stream list to data set queries pairs.
 *
 * @param clickstreamRDD:
 *          click stream data
 * @param downloadWeight:
 *          weight of download behavior
 * @return JavaPairRDD, key is short name of data set, and values are queries
 */
public JavaPairRDD<String, List<String>> bulidDataQueryRDD(JavaRDD<ClickStream> clickstreamRDD, int downloadWeight) {
  return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, List<String>> call(ClickStream click) throws Exception {
      List<String> query = new ArrayList<>();
      // important! download behavior is given higher weights
      // than viewing
      // behavior
      boolean download = click.isDownload();
      int weight = 1;
      if (download) {
        weight = downloadWeight;
      }
      for (int i = 0; i < weight; i++) {
        query.add(click.getKeyWords());
      }

      return new Tuple2<>(click.getViewDataset(), query);
    }
  }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public List<String> call(List<String> v1, List<String> v2) throws Exception {
      List<String> list = new ArrayList<>();
      list.addAll(v1);
      list.addAll(v2);
      return list;
    }
  });
}
 
Example 17
Project: incubator-sdap-mudrod   File: SessionExtractor.java   Source Code and License 5 votes vote down vote up
public JavaPairRDD<String, Double> bulidUserItermRDD(JavaRDD<ClickStream> clickstreamRDD) {
  return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, Double>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, Double> call(ClickStream click) throws Exception {
      double rate = 1;
      boolean download = click.isDownload();
      if (download) {
        rate = 2;
      }

      String sessionID = click.getSessionID();
      String user = sessionID.split("@")[0];

      return new Tuple2<>(user + "," + click.getViewDataset(), rate);
    }
  }).reduceByKey(new Function2<Double, Double, Double>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Double call(Double v1, Double v2) throws Exception {
      return v1 >= v2 ? v1 : v2;

    }
  });
}
 
Example 18
Project: incubator-sdap-mudrod   File: SessionGenerator.java   Source Code and License 5 votes vote down vote up
public void genSessionByRefererInParallel(int timeThres) throws InterruptedException, IOException {

    JavaRDD<String> userRDD = getUserRDD(this.cleanupType);

    int sessionCount = 0;
    sessionCount = userRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();
        List<Integer> sessionNums = new ArrayList<>();
        while (arg0.hasNext()) {
          String s = arg0.next();
          Integer sessionNum = genSessionByReferer(tmpES, s, timeThres);
          sessionNums.add(sessionNum);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return sessionNums.iterator();
      }
    }).reduce(new Function2<Integer, Integer, Integer>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Integer call(Integer a, Integer b) {
        return a + b;
      }
    });

    LOG.info("Initial Session count: {}", Integer.toString(sessionCount));
  }
 
Example 19
Project: incubator-sdap-mudrod   File: SessionStatistic.java   Source Code and License 5 votes vote down vote up
public void processSessionInParallel() throws InterruptedException, IOException {

    List<String> sessions = this.getSessions();
    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessions, partition);

    int sessionCount = 0;
    sessionCount = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
      @Override
      public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();
        List<Integer> sessionNums = new ArrayList<Integer>();
        sessionNums.add(0);
        while (arg0.hasNext()) {
          String s = arg0.next();
          Integer sessionNum = processSession(tmpES, s);
          sessionNums.add(sessionNum);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return sessionNums.iterator();
      }
    }).reduce(new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer a, Integer b) {
        return a + b;
      }
    });

    LOG.info("Final Session count: {}", Integer.toString(sessionCount));
  }
 
Example 20
Project: incubator-sdap-mudrod   File: MetadataExtractor.java   Source Code and License 5 votes vote down vote up
/**
 * buildMetadataRDD: Convert metadata list to JavaPairRDD
 *
 * @param es        an Elasticsearch client node instance
 * @param sc        spark context
 * @param index     index name of log processing application
 * @param metadatas metadata list
 * @return PairRDD, in each pair key is metadata short name and value is term
 * list extracted from metadata variables.
 */
protected JavaPairRDD<String, List<String>> buildMetadataRDD(ESDriver es, JavaSparkContext sc, String index, List<PODAACMetadata> metadatas) {
  JavaRDD<PODAACMetadata> metadataRDD = sc.parallelize(metadatas);
  JavaPairRDD<String, List<String>> metadataTermsRDD = metadataRDD.mapToPair(new PairFunction<PODAACMetadata, String, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, List<String>> call(PODAACMetadata metadata) throws Exception {
      return new Tuple2<String, List<String>>(metadata.getShortName(), metadata.getAllTermList());
    }
  }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public List<String> call(List<String> v1, List<String> v2) throws Exception {
      List<String> list = new ArrayList<String>();
      list.addAll(v1);
      list.addAll(v2);
      return list;
    }
  });

  return metadataTermsRDD;
}
 
Example 21
Project: spark-streaming-direct-kafka   File: Functions.java   Source Code and License 5 votes vote down vote up
/**
 * @return a function that returns the second of two values
 * @param <T> element type
 */
public static <T> Function2<T,T,T> last() {
    return new Function2<T,T,T>() {
        @Override
        public T call(T current, T next) {
            return next;
        }
    };
}
 
Example 22
Project: gspark   File: JavaLogQuery.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {

    SparkConf sparkConf = new SparkConf().setAppName("JavaLogQuery");
    JavaSparkContext jsc = new JavaSparkContext(sparkConf);

    JavaRDD<String> dataSet = (args.length == 1) ? jsc.textFile(args[0]) : jsc.parallelize(exampleApacheLogs);

    JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.mapToPair(new PairFunction<String, Tuple3<String, String, String>, Stats>() {
      @Override
      public Tuple2<Tuple3<String, String, String>, Stats> call(String s) {
        return new Tuple2<Tuple3<String, String, String>, Stats>(extractKey(s), extractStats(s));
      }
    });

    JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey(new Function2<Stats, Stats, Stats>() {
      @Override
      public Stats call(Stats stats, Stats stats2) {
        return stats.merge(stats2);
      }
    });

    List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect();
    for (Tuple2<?,?> t : output) {
      System.out.println(t._1() + "\t" + t._2());
    }
    jsc.stop();
  }
 
Example 23
Project: mudrod   File: SessionExtractor.java   Source Code and License 5 votes vote down vote up
/**
 * bulidDataQueryRDD: convert click stream list to data set queries pairs.
 *
 * @param clickstreamRDD:
 *          click stream data
 * @param downloadWeight:
 *          weight of download behavior
 * @return JavaPairRDD, key is short name of data set, and values are queries
 */
public JavaPairRDD<String, List<String>> bulidDataQueryRDD(JavaRDD<ClickStream> clickstreamRDD, int downloadWeight) {
  return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, List<String>> call(ClickStream click) throws Exception {
      List<String> query = new ArrayList<>();
      // important! download behavior is given higher weights
      // than viewing
      // behavior
      boolean download = click.isDownload();
      int weight = 1;
      if (download) {
        weight = downloadWeight;
      }
      for (int i = 0; i < weight; i++) {
        query.add(click.getKeyWords());
      }

      return new Tuple2<>(click.getViewDataset(), query);
    }
  }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public List<String> call(List<String> v1, List<String> v2) throws Exception {
      List<String> list = new ArrayList<>();
      list.addAll(v1);
      list.addAll(v2);
      return list;
    }
  });
}
 
Example 24
Project: mudrod   File: SessionExtractor.java   Source Code and License 5 votes vote down vote up
public JavaPairRDD<String, Double> bulidUserItermRDD(JavaRDD<ClickStream> clickstreamRDD) {
  return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, Double>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, Double> call(ClickStream click) throws Exception {
      double rate = 1;
      boolean download = click.isDownload();
      if (download) {
        rate = 2;
      }

      String sessionID = click.getSessionID();
      String user = sessionID.split("@")[0];

      return new Tuple2<>(user + "," + click.getViewDataset(), rate);
    }
  }).reduceByKey(new Function2<Double, Double, Double>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Double call(Double v1, Double v2) throws Exception {
      return v1 >= v2 ? v1 : v2;

    }
  });
}
 
Example 25
Project: mudrod   File: SessionGenerator.java   Source Code and License 5 votes vote down vote up
public void genSessionByRefererInParallel(int timeThres) throws InterruptedException, IOException {

    JavaRDD<String> userRDD = getUserRDD(this.cleanupType);

    int sessionCount = 0;
    sessionCount = userRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();
        List<Integer> sessionNums = new ArrayList<>();
        while (arg0.hasNext()) {
          String s = arg0.next();
          Integer sessionNum = genSessionByReferer(tmpES, s, timeThres);
          sessionNums.add(sessionNum);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return sessionNums.iterator();
      }
    }).reduce(new Function2<Integer, Integer, Integer>() {
      /**
       *
       */
      private static final long serialVersionUID = 1L;

      @Override
      public Integer call(Integer a, Integer b) {
        return a + b;
      }
    });

    LOG.info("Initial Session count: {}", Integer.toString(sessionCount));
  }
 
Example 26
Project: mudrod   File: MetadataExtractor.java   Source Code and License 5 votes vote down vote up
/**
 * buildMetadataRDD: Convert metadata list to JavaPairRDD
 *
 * @param es        an Elasticsearch client node instance
 * @param sc        spark context
 * @param index     index name of log processing application
 * @param metadatas metadata list
 * @return PairRDD, in each pair key is metadata short name and value is term
 * list extracted from metadata variables.
 */
protected JavaPairRDD<String, List<String>> buildMetadataRDD(ESDriver es, JavaSparkContext sc, String index, List<PODAACMetadata> metadatas) {
  JavaRDD<PODAACMetadata> metadataRDD = sc.parallelize(metadatas);
  JavaPairRDD<String, List<String>> metadataTermsRDD = metadataRDD.mapToPair(new PairFunction<PODAACMetadata, String, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public Tuple2<String, List<String>> call(PODAACMetadata metadata) throws Exception {
      return new Tuple2<String, List<String>>(metadata.getShortName(), metadata.getAllTermList());
    }
  }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    @Override
    public List<String> call(List<String> v1, List<String> v2) throws Exception {
      List<String> list = new ArrayList<String>();
      list.addAll(v1);
      list.addAll(v2);
      return list;
    }
  });

  return metadataTermsRDD;
}
 
Example 27
Project: incubator-pulsar   File: SparkStreamingPulsarReceiverExample.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws InterruptedException {
    SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("pulsar-spark");
    JavaStreamingContext jssc = new JavaStreamingContext(conf, Durations.seconds(5));

    ClientConfiguration clientConf = new ClientConfiguration();
    ConsumerConfiguration consConf = new ConsumerConfiguration();
    String url = "pulsar://localhost:6650/";
    String topic = "persistent://sample/standalone/ns1/topic1";
    String subs = "sub1";

    JavaReceiverInputDStream<byte[]> msgs = jssc
            .receiverStream(new SparkStreamingPulsarReceiver(clientConf, consConf, url, topic, subs));

    JavaDStream<Integer> isContainingPulsar = msgs.flatMap(new FlatMapFunction<byte[], Integer>() {
        @Override
        public Iterator<Integer> call(byte[] msg) {
            return Arrays.asList(((new String(msg)).indexOf("Pulsar") != -1) ? 1 : 0).iterator();
        }
    });

    JavaDStream<Integer> numOfPulsar = isContainingPulsar.reduce(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    numOfPulsar.print();

    jssc.start();
    jssc.awaitTermination();
}
 
Example 28
Project: rheem   File: FunctionCompiler.java   Source Code and License 5 votes vote down vote up
/**
 * Create an appropriate {@link Function} for deploying the given {@link ReduceDescriptor}
 * on Apache Spark.
 */
public <T> Function2<T, T, T> compile(ReduceDescriptor<T> descriptor,
                                      SparkExecutionOperator operator,
                                      OptimizationContext.OperatorContext operatorContext,
                                      ChannelInstance[] inputs) {
    final BinaryOperator<T> javaImplementation = descriptor.getJavaImplementation();
    if (javaImplementation instanceof FunctionDescriptor.ExtendedSerializableBinaryOperator) {
        return new ExtendedBinaryOperatorAdapter<>(
                (FunctionDescriptor.ExtendedSerializableBinaryOperator<T>) javaImplementation,
                new SparkExecutionContext(operator, inputs, operatorContext.getOptimizationContext().getIterationNumber())
        );
    } else {
        return new BinaryOperatorAdapter<>(javaImplementation);
    }
}
 
Example 29
Project: StreamBench   File: StreamKMeans.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {

//        String inputFile = StreamKMeans.class.getClassLoader().getResource("centroids.txt").getFile();
        SparkConf sparkConf = new SparkConf().setMaster("spark://master:7077").setAppName("JavaKMeans");

        JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.milliseconds(1000));

        HashSet<String> topicsSet = new HashSet<>();
        topicsSet.add("KMeans");
        HashMap<String, String> kafkaParams = new HashMap<>();
//        kafkaParams.put("metadata.broker.list", "kafka1:9092,kafka2:9092,kafka3:9092");
        kafkaParams.put("metadata.broker.list", "localhost:9092");
        kafkaParams.put("auto.offset.reset", "largest");
        kafkaParams.put("zookeeper.connect", "zoo1:2181");
        kafkaParams.put("group.id", "spark");

        // Create direct kafka stream with brokers and topics
        JavaPairInputDStream<String, String> lines = KafkaUtils.createDirectStream(
                jssc,
                String.class,
                String.class,
                StringDecoder.class,
                StringDecoder.class,
                kafkaParams,
                topicsSet
        );

        JavaDStream<Vector> points = lines.map(new ParseKafkaString()).map(new ParsePoint());

        Vector[] initCentroids = loadInitCentroids();
        double[] weights = new double[96];
        for (int i = 0; i < 96; i++) {
            weights[i] = 1.0 / 96;
        }

        final StreamingKMeans model = new StreamingKMeans()
                .setK(96)
                .setDecayFactor(0)
                .setInitialCenters(initCentroids, weights);

        model.trainOn(points);

        points.foreachRDD(new Function2<JavaRDD<Vector>, Time, Void>() {
            @Override
            public Void call(JavaRDD<Vector> vectorJavaRDD, Time time) throws Exception {
                Vector[] vector = model.latestModel().clusterCenters();
                for (int i = 0; i < vector.length; i++) {
                    logger.warn(vector[i].toArray()[0] + "\t" + vector[i].toArray()[1]);
                }
                return null;
            }
        });

        jssc.addStreamingListener(new PerformanceStreamingListener());
        jssc.start();
        jssc.awaitTermination();
    }
 
Example 30
Project: kite-apps   File: SparkDatasets.java   Source Code and License 5 votes vote down vote up
/**
 * Save all RDDs in the given DStream to the given view.
 * @param dstream
 * @param view
 */
public static <T> void save(JavaDStream<T> dstream, final View<T> view) {

  final String uri = view.getUri().toString();

  dstream.foreachRDD(new Function2<JavaRDD<T>, Time, Void>() {
    @Override
    public Void call(JavaRDD<T> rdd, Time time) throws Exception {

      save(rdd, uri);

      return null;
    }
  });
}
 
Example 31
Project: learning-spark   File: SimpleAggregation.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {
  SparkConf sparkConf = new SparkConf().setAppName("Ad Provider Aggregation");
  JavaSparkContext sparkContext = new JavaSparkContext(sparkConf);


  // Read the source file
  JavaRDD<String> adInput = sparkContext.textFile(args[0]);

  // Now we have non-empty lines, lets split them into words
  JavaPairRDD<String, Integer> adsRDD = adInput.mapToPair(new PairFunction<String, String, Integer>() {
    @Override
    public Tuple2<String, Integer> call(String s) {
      CSVReader csvReader = new CSVReader(new StringReader(s));
      // lets skip error handling here for simplicity
      try {
        String[] adDetails = csvReader.readNext();
        return new Tuple2<String, Integer>(adDetails[1], 1);
      } catch (IOException e) {
        e.printStackTrace();
        // noop
      }
      // Need to explore more on error handling
      return new Tuple2<String, Integer>("-1", 1);
    }
  });

  JavaPairRDD<String, Integer> adsAggregated = adsRDD.reduceByKey(new Function2<Integer, Integer, Integer>() {
    @Override
    public Integer call(Integer integer, Integer integer2) throws Exception {
      return integer + integer2;
    }
  });

  adsAggregated.saveAsTextFile("./output/ads-aggregated-provider");
}
 
Example 32
Project: adam-plugins   File: JavaCountAlignments.java   Source Code and License 5 votes vote down vote up
@Override
public JavaRDD<Tuple2<String, Integer>> run(final JavaADAMContext ac, final JavaRDD<AlignmentRecord> recs, final String args) {

    JavaRDD<String> contigNames = recs.map(new Function<AlignmentRecord, String>() {
            @Override
            public String call(final AlignmentRecord rec) {
                return rec.getReadMapped() ? rec.getContigName() : "unmapped";
            }
        });

    JavaPairRDD<String, Integer> counts = contigNames.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(final String contigName) {
                return new Tuple2<String, Integer>(contigName, Integer.valueOf(1));
            }
        });

    JavaPairRDD<String, Integer> reducedCounts = counts.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(final Integer value0, final Integer value1) {
                return Integer.valueOf(value0.intValue() + value1.intValue());
            }
        });

    // todo:  seems like there should be a more direct way
    return JavaRDD.fromRDD(reducedCounts.rdd(), null);
}
 
Example 33
Project: adam-plugins   File: JavaCountAlignmentsPerRead.java   Source Code and License 5 votes vote down vote up
@Override
public JavaRDD<Tuple2<String, Integer>> run(final JavaADAMContext ac, final JavaRDD<AlignmentRecord> recs, final String args) {

    JavaRDD<String> contigNames = recs.map(new Function<AlignmentRecord, String>() {
            @Override
            public String call(final AlignmentRecord rec) {
                return rec.getReadMapped() ? rec.getReadName() : "unmapped";
            }
        });

    JavaPairRDD<String, Integer> counts = contigNames.mapToPair(new PairFunction<String, String, Integer>() {
            @Override
            public Tuple2<String, Integer> call(final String readName) {
                return new Tuple2<String, Integer>(readName, Integer.valueOf(1));
            }
        });

    JavaPairRDD<String, Integer> reducedCounts = counts.reduceByKey(new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(final Integer value0, final Integer value1) {
                return Integer.valueOf(value0.intValue() + value1.intValue());
            }
        });

    // todo:  seems like there should be a more direct way
    return JavaRDD.fromRDD(reducedCounts.rdd(), null);
}
 
Example 34
Project: DDF   File: MLSupporter.java   Source Code and License 5 votes vote down vote up
@Override
public long[][] getConfusionMatrix(IModel model, double threshold) throws DDFException {
  SparkDDF ddf = (SparkDDF) this.getDDF();
  SparkDDF predictions = (SparkDDF) ddf.ML.applyModel(model, true, false);

  // Now get the underlying RDD to compute
  JavaRDD<double[]> yTrueYPred = (JavaRDD<double[]>) predictions.getJavaRDD(double[].class);
  final double threshold1 = threshold;
  long[] cm = yTrueYPred.map(new Function<double[], long[]>() {
    @Override
    public long[] call(double[] params) {
      byte isPos = toByte(params[0] > threshold1);
      byte predPos = toByte(params[1] > threshold1);

      long[] result = new long[] { 0L, 0L, 0L, 0L };
      result[isPos << 1 | predPos] = 1L;
      return result;
    }
  }).reduce(new Function2<long[], long[], long[]>() {
    @Override
    public long[] call(long[] a, long[] b) {
      return new long[] { a[0] + b[0], a[1] + b[1], a[2] + b[2], a[3] + b[3] };
    }
  });

  return new long[][] { new long[] { cm[3], cm[2] }, new long[] { cm[1], cm[0] } };
}
 
Example 35
Project: learning-spark-examples   File: BasicSum.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
	String master;
	if (args.length > 0) {
     master = args[0];
	} else {
		master = "local";
	}
	JavaSparkContext sc = new JavaSparkContext(
     master, "basicmap", System.getenv("SPARK_HOME"), System.getenv("JARS"));
   JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4));
   Integer result = rdd.fold(0, new Function2<Integer, Integer, Integer>() {
       public Integer call(Integer x, Integer y) { return x + y;}});
   System.out.println(result);
}
 
Example 36
Project: Test_Projects   File: Streaming101.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("Streaming102");
	//SparkConf sparkConf = new SparkConf().setMaster("spark://10.204.100.206:7077").setAppName("Streaming102");
	sparkConf.setJars(new String[] { "target\\original-TestProjects-1.0-SNAPSHOT.jar" });
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(10));

    String folder = "./stream/";
    if(args.length == 1){
    	folder = args[0];
    }

    JavaDStream<String> lines = ssc.textFileStream(folder);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
    	  System.out.println(x);
    	  return Lists.newArrayList(SPACE.split(x));
      }
    });
    
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
  }
 
Example 37
Project: Test_Projects   File: Streaming102.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) {

    // Create the context with a 1 second batch size
    SparkConf sparkConf = new SparkConf().setMaster("local[*]").setAppName("Streaming101");
    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, Durations.seconds(10));


    JavaReceiverInputDStream<String> lines = ssc.socketTextStream("localhost",9999, StorageLevels.MEMORY_AND_DISK_SER);
    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
      @Override
      public Iterable<String> call(String x) {
    	  System.out.println(x);
    	  return Lists.newArrayList(SPACE.split(x));
      }
    });
    
    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
      new PairFunction<String, String, Integer>() {
        @Override
        public Tuple2<String, Integer> call(String s) {
          return new Tuple2<String, Integer>(s, 1);
        }
      }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
          return i1 + i2;
        }
    });

    wordCounts.print();
    ssc.start();
    ssc.awaitTermination();
  }
 
Example 38
Project: stratio-connector-deep   File: QueryExecutorTest.java   Source Code and License 5 votes vote down vote up
@Test
public void simpleProjectAndSelectWithGroupByQueryTest() throws UnsupportedException, ExecutionException
         {

    // Input data
    List<LogicalStep> stepList = new ArrayList<>();
    Project project = createProject(CLUSTERNAME_CONSTANT, TABLE1_CONSTANT);

    GroupBy groupBy = createGroupBy();
    project.setNextStep(groupBy);

    groupBy.setNextStep(createSelect());

    // One single initial step
    stepList.add(project);

    LogicalWorkflow logicalWorkflow = new LogicalWorkflow(stepList);

    // Execution
    queryExecutor.executeWorkFlow(logicalWorkflow);

    // Assertions
    verify(deepContext, times(1)).createJavaRDD(any(ExtractorConfig.class));
    verify(singleRdd, times(0)).filter(any(Function.class));
    verify(singleRdd, times(0)).mapToPair(any(MapKeyForJoin.class));
    verify(singleRdd, times(0)).mapToPair(any(MapKeyForJoin.class));
    verify(pairRdd, times(0)).join(pairRdd);
    verify(joinedRdd, times(0)).map(any(JoinCells.class));
    verify(singleRdd, times(1)).map(any(Function.class));
    verify(joinedRdd, times(0)).map(any(Function.class));
    verify(singleRdd, times(1)).keyBy(any(Function.class));
    verify(pairRdd, times(1)).reduceByKey(any(Function2.class));
    verify(pairRdd, times(1)).map(any(Function.class));

}
 
Example 39
Project: MFIBlocking   File: JavaLogQuery.java   Source Code and License 5 votes vote down vote up
public static void main(String[] args) throws Exception {
  if (args.length == 0) {
    System.err.println("Usage: JavaLogQuery <master> [logFile]");
    System.exit(1);
  }

  JavaSparkContext jsc = new JavaSparkContext(args[0], "JavaLogQuery",
    System.getenv("SPARK_HOME"), System.getenv("SPARK_EXAMPLES_JAR"));

  JavaRDD<String> dataSet = (args.length == 2) ? jsc.textFile(args[1]) : jsc.parallelize(exampleApacheLogs);

  JavaPairRDD<Tuple3<String, String, String>, Stats> extracted = dataSet.map(new PairFunction<String, Tuple3<String, String, String>, Stats>() {
    @Override
    public Tuple2<Tuple3<String, String, String>, Stats> call(String s) throws Exception {
      return new Tuple2<Tuple3<String, String, String>, Stats>(extractKey(s), extractStats(s));
    }
  });

  JavaPairRDD<Tuple3<String, String, String>, Stats> counts = extracted.reduceByKey(new Function2<Stats, Stats, Stats>() {
    @Override
    public Stats call(Stats stats, Stats stats2) throws Exception {
      return stats.merge(stats2);
    }
  });

  List<Tuple2<Tuple3<String, String, String>, Stats>> output = counts.collect();
  for (Tuple2 t : output) {
    System.out.println(t._1 + "\t" + t._2);
  }
  System.exit(0);
}
 
Example 40
Project: Building-Data-Streaming-Applications-with-Apache-Kafka   File: KafkaReceiverWordCountJava.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    String zkQuorum = "localhost:2181";
    String groupName = "stream";
    int numThreads = 3;
    String topicsName = "test1";
    SparkConf sparkConf = new SparkConf().setAppName("WordCountKafkaStream");

    JavaStreamingContext javaStreamingContext = new JavaStreamingContext(sparkConf, new Duration(5000));

    Map<String, Integer> topicToBeUsedBySpark = new HashMap<>();
    String[] topics = topicsName.split(",");
    for (String topic : topics) {
        topicToBeUsedBySpark.put(topic, numThreads);
    }

    JavaPairReceiverInputDStream<String, String> streamMessages =
            KafkaUtils.createStream(javaStreamingContext, zkQuorum, groupName, topicToBeUsedBySpark);

    JavaDStream<String> lines = streamMessages.map(new Function<Tuple2<String, String>, String>() {
        @Override
        public String call(Tuple2<String, String> tuple2) {
            return tuple2._2();
        }
    });

    JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
        @Override
        public Iterator<String> call(String x) {
            return Arrays.asList(WORD_DELIMETER.split(x)).iterator();
        }
    });

    JavaPairDStream<String, Integer> wordCounts = words.mapToPair(
            new PairFunction<String, String, Integer>() {
                @Override
                public Tuple2<String, Integer> call(String s) {
                    return new Tuple2<>(s, 1);
                }
            }).reduceByKey(new Function2<Integer, Integer, Integer>() {
        @Override
        public Integer call(Integer i1, Integer i2) {
            return i1 + i2;
        }
    });

    wordCounts.print();
    javaStreamingContext.start();
    javaStreamingContext.awaitTermination();
}
 
Example 41
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
public MizoBuilder parseVertexProperty(Function2<MizoVertex, String, Boolean> predicate) {
    this.parseVertexProperty = predicate;

    return this;
}
 
Example 42
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
public MizoBuilder parseEdgeProperty(Function2<MizoEdge, String, Boolean> predicate) {
    this.parseEdgeProperty = predicate;

    return this;
}
 
Example 43
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
public MizoBuilder parseInEdge(Function2<MizoVertex, String, Boolean> predicate) {
    this.parseInEdge = predicate;

    return this;
}
 
Example 44
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
public MizoBuilder parseOutEdge(Function2<MizoVertex, String, Boolean> predicate) {
    this.parseOutEdge = predicate;

    return this;
}
 
Example 45
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
@Override
public Function2<MizoVertex, String, Boolean> parseVertexProperty() {
    return parseVertexProperty;
}
 
Example 46
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
@Override
public Function2<MizoVertex, String, Boolean> parseInEdge() {
    return parseInEdge;
}
 
Example 47
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
@Override
public Function2<MizoVertex, String, Boolean> parseOutEdge() {
    return parseOutEdge;
}
 
Example 48
Project: mizo   File: MizoBuilder.java   Source Code and License 4 votes vote down vote up
@Override
public Function2<MizoEdge, String, Boolean> parseEdgeProperty() {
    return parseEdgeProperty;
}
 
Example 49
Project: Sparkathon   File: Windowstream.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {

        final Pattern SPACE = Pattern.compile(" ");

        SparkConf conf = new SparkConf().setAppName("Big Apple").setMaster("local[2]");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));

        JavaDStream<String> lines = ssc.textFileStream("src/main/resources/stream");
        lines.print();

        JavaDStream<String> words = lines.flatMap(new FlatMapFunction<String, String>() {
            @Override
            public Iterator<String> call(String x) {
                return Lists.newArrayList(SPACE.split(x)).iterator();
            }
        });

        JavaPairDStream<String, Integer> wordsDstream = words.mapToPair(
                new PairFunction<String, String, Integer>() {
                    @Override
                    public Tuple2<String, Integer> call(String s) {
                        return new Tuple2<String, Integer>(s, 1);
                    }
                });

        wordsDstream.print();

        Function2<Integer, Integer, Integer> reduceFunc = new Function2<Integer, Integer, Integer>() {
            @Override
            public Integer call(Integer i1, Integer i2) {
                return i1 + i2;
            }
        };

        JavaPairDStream<String, Integer> windowedWordCounts = wordsDstream.reduceByKeyAndWindow(reduceFunc, Durations.seconds(30), Durations.seconds(10));

        windowedWordCounts.print();


        ssc.start();
        ssc.awaitTermination();

    }
 
Example 50
Project: mudrod   File: SessionStatistic.java   Source Code and License 4 votes vote down vote up
public void processSessionInParallel() throws InterruptedException, IOException {

    List<String> sessions = this.getSessions();
    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessions, partition);

    int sessionCount = 0;
    sessionCount = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, Integer>() {
      @Override
      public Iterator<Integer> call(Iterator<String> arg0) throws Exception {
        ESDriver tmpES = new ESDriver(props);
        tmpES.createBulkProcessor();
        List<Integer> sessionNums = new ArrayList<Integer>();
        sessionNums.add(0);
        while (arg0.hasNext()) {
          String s = arg0.next();
          Integer sessionNum = processSession(tmpES, s);
          sessionNums.add(sessionNum);
        }
        tmpES.destroyBulkProcessor();
        tmpES.close();
        return sessionNums.iterator();
      }
    }).reduce(new Function2<Integer, Integer, Integer>() {
      @Override
      public Integer call(Integer a, Integer b) {
        return a + b;
      }
    });

    LOG.info("Final Session count: {}", Integer.toString(sessionCount));
  }
 
Example 51
Project: vn.vitk   File: Tokenizer.java   Source Code and License 4 votes vote down vote up
/**
 * Tokenizes a RDD of text lines and return a RDD of result.
 * @param input
 * @return a RDD of tokenized text lines.
 */
public JavaRDD<String> tokenize(JavaRDD<String> input) {
	if (verbose) {
		// print some basic statistic about the input, including 
		// max line length, min line length, average line length in syllables
		JavaRDD<Integer> wordCount = input.map(new Function<String, Integer>() {
			private static final long serialVersionUID = 7214093453452927565L;
			@Override
			public Integer call(String line) throws Exception {
				return line.split("\\s+").length;
			}
			
		});
		Comparator<Integer> comp = new IntegerComparator();
		System.out.println("Max line length (in syllables) = " + wordCount.max(comp));
		System.out.println("Min line length (in syllables) = " + wordCount.min(comp));
		float totalCount = wordCount.reduce(new Function2<Integer, Integer, Integer>() {
			private static final long serialVersionUID = 1L;
			@Override
			public Integer call(Integer v1, Integer v2) throws Exception {
				return v1 + v2;
			}
		});
		System.out.println("Avg line length (in syllables) = " + (totalCount) / input.count());
	}
	
	JavaRDD<String> output = null;
	if (classifier == null) {
		// use phrase graph approach (shortest paths and bigram model)
		// to segment phrases
		output = input.map(new SegmentationFunction());
	} else {
		// use logistic regression approach to segment phrases
		JavaRDD<String> s = input.map(new SegmentationFunction());
		// make sure that the preceding lazy computation has been evaluated
		// so that whitespace contexts have been properly accumulated
		System.out.println("Number of text lines = " + s.count());
		System.out.println("Number of contexts = " + contexts.value().size());
		// use whitespace classification approach (logistic regresion model)
		JavaRDD<WhitespaceContext> jrdd = jsc.parallelize(contexts.value());
		DataFrame df0 = (new SQLContext(jsc)).createDataFrame(jrdd, WhitespaceContext.class);
		DataFrame df1 = model.transform(df0);
		prediction = jsc.broadcast(df1.select("prediction").collect());
		if (df1.count() > 0) {
			output = s.map(new WhitespaceClassificationFunction());
		}
		else { 
			System.err.println("Empty data frame!");
		}
	}
	if (verbose) {
		// print number of non-space characters of the input and output dataset
		System.out.println("#(non-space characters of input) = " + numCharacters(input));
		if (output != null) {
			System.out.println("#(non-space characters of output) = " + numCharacters(output));
		}
	}
	return output;
}
 
Example 52
Project: beam   File: SparkGroupAlsoByWindowViaWindowSet.java   Source Code and License 4 votes vote down vote up
private static <K, InputT> PairDStreamFunctions<ByteArray, byte[]> buildPairDStream(
    final JavaDStream<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> inputDStream,
    final Coder<K> keyCoder,
    final Coder<WindowedValue<InputT>> wvCoder) {

  // we have to switch to Scala API to avoid Optional in the Java API, see: SPARK-4819.
  // we also have a broader API for Scala (access to the actual key and entire iterator).
  // we use coders to convert objects in the PCollection to byte arrays, so they
  // can be transferred over the network for the shuffle and be in serialized form
  // for checkpointing.
  // for readability, we add comments with actual type next to byte[].
  // to shorten line length, we use:
  //---- WV: WindowedValue
  //---- Iterable: Itr
  //---- AccumT: A
  //---- InputT: I
  final DStream<Tuple2<ByteArray, byte[]>> tupleDStream =
      inputDStream
          .transformToPair(
              new Function2<
                  JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>>, Time,
                  JavaPairRDD<ByteArray, byte[]>>() {

                // we use mapPartitions with the RDD API because its the only available API
                // that allows to preserve partitioning.
                @Override
                public JavaPairRDD<ByteArray, byte[]> call(
                    final JavaRDD<WindowedValue<KV<K, Iterable<WindowedValue<InputT>>>>> rdd,
                    final Time time)
                    throws Exception {
                  return rdd.mapPartitions(
                          TranslationUtils.functionToFlatMapFunction(
                              WindowingHelpers
                                  .<KV<K, Iterable<WindowedValue<InputT>>>>unwindowFunction()),
                          true)
                      .mapPartitionsToPair(
                          TranslationUtils
                              .<K, Iterable<WindowedValue<InputT>>>toPairFlatMapFunction(),
                          true)
                      .mapValues(
                          new Function<
                              Iterable<WindowedValue<InputT>>,
                              KV<Long, Iterable<WindowedValue<InputT>>>>() {

                            @Override
                            public KV<Long, Iterable<WindowedValue<InputT>>> call(
                                final Iterable<WindowedValue<InputT>> values) throws Exception {
                              // add the batch timestamp for visibility (e.g., debugging)
                              return KV.of(time.milliseconds(), values);
                            }
                          })
                      // move to bytes representation and use coders for deserialization
                      // because of checkpointing.
                      .mapPartitionsToPair(
                          TranslationUtils.pairFunctionToPairFlatMapFunction(
                              CoderHelpers.toByteFunction(
                                  keyCoder,
                                  KvCoder.of(VarLongCoder.of(), IterableCoder.of(wvCoder)))),
                          true);
                }
              })
          .dstream();

  return DStream.toPairDStreamFunctions(
      tupleDStream,
      JavaSparkContext$.MODULE$.<ByteArray>fakeClassTag(),
      JavaSparkContext$.MODULE$.<byte[]>fakeClassTag(),
      null);
}
 
Example 53
Project: beam   File: GroupCombineFunctions.java   Source Code and License 4 votes vote down vote up
/**
 * Apply a composite {@link org.apache.beam.sdk.transforms.Combine.Globally} transformation.
 */
public static <InputT, AccumT> Optional<Iterable<WindowedValue<AccumT>>> combineGlobally(
    JavaRDD<WindowedValue<InputT>> rdd,
    final SparkGlobalCombineFn<InputT, AccumT, ?> sparkCombineFn,
    final Coder<InputT> iCoder,
    final Coder<AccumT> aCoder,
    final WindowingStrategy<?, ?> windowingStrategy) {
  // coders.
  final WindowedValue.FullWindowedValueCoder<InputT> wviCoder =
      WindowedValue.FullWindowedValueCoder.of(iCoder,
          windowingStrategy.getWindowFn().windowCoder());
  final WindowedValue.FullWindowedValueCoder<AccumT> wvaCoder =
      WindowedValue.FullWindowedValueCoder.of(aCoder,
          windowingStrategy.getWindowFn().windowCoder());
  final IterableCoder<WindowedValue<AccumT>> iterAccumCoder = IterableCoder.of(wvaCoder);

  // Use coders to convert objects in the PCollection to byte arrays, so they
  // can be transferred over the network for the shuffle.
  // for readability, we add comments with actual type next to byte[].
  // to shorten line length, we use:
  //---- WV: WindowedValue
  //---- Iterable: Itr
  //---- AccumT: A
  //---- InputT: I
  JavaRDD<byte[]> inputRDDBytes = rdd.map(CoderHelpers.toByteFunction(wviCoder));

  if (inputRDDBytes.isEmpty()) {
    return Optional.absent();
  }

  /*Itr<WV<A>>*/ byte[] accumulatedBytes = inputRDDBytes.aggregate(
      CoderHelpers.toByteArray(sparkCombineFn.zeroValue(), iterAccumCoder),
      new Function2</*A*/ byte[], /*I*/ byte[], /*A*/ byte[]>() {
        @Override
        public /*Itr<WV<A>>*/ byte[] call(/*Itr<WV<A>>*/ byte[] ab, /*WV<I>*/ byte[] ib)
            throws Exception {
          Iterable<WindowedValue<AccumT>> a = CoderHelpers.fromByteArray(ab, iterAccumCoder);
          WindowedValue<InputT> i = CoderHelpers.fromByteArray(ib, wviCoder);
          return CoderHelpers.toByteArray(sparkCombineFn.seqOp(a, i), iterAccumCoder);
        }
      },
      new Function2</*Itr<WV<A>>>*/ byte[], /*Itr<WV<A>>>*/ byte[], /*Itr<WV<A>>>*/ byte[]>() {
        @Override
        public /*Itr<WV<A>>>*/ byte[] call(/*Itr<WV<A>>>*/ byte[] a1b, /*Itr<WV<A>>>*/ byte[] a2b)
            throws Exception {
          Iterable<WindowedValue<AccumT>> a1 = CoderHelpers.fromByteArray(a1b, iterAccumCoder);
          Iterable<WindowedValue<AccumT>> a2 = CoderHelpers.fromByteArray(a2b, iterAccumCoder);
          Iterable<WindowedValue<AccumT>> merged = sparkCombineFn.combOp(a1, a2);
          return CoderHelpers.toByteArray(merged, iterAccumCoder);
        }
      }
  );

  return Optional.of(CoderHelpers.fromByteArray(accumulatedBytes, iterAccumCoder));
}
 
Example 54
Project: StreamBench   File: StreamingWordCount.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) throws Exception {
        SparkConf conf = new SparkConf().setMaster("local[2]").setAppName("Stateful Network Word Count");
        JavaStreamingContext ssc = new JavaStreamingContext(conf, Durations.seconds(1));
        ssc.checkpoint("checkpoint");

        ssc.addStreamingListener(new PerformanceStreamingListener());


        JavaReceiverInputDStream<String> lines = ssc.socketTextStream("127.0.0.1", 9999);

        JavaPairDStream<String, Long> wordCounts = lines.flatMap(new FlatMapFunction<String, String>() {
            public Iterable<String> call(String l) throws Exception {
                return Arrays.asList(l.split(" "));
            }
        }).mapToPair(new PairFunction<String, String, Long>() {
            public Tuple2<String, Long> call(String w) throws Exception {
                return new Tuple2<>(w, 1L);
            }
        })
                .reduceByKey(new Function2<Long, Long, Long>() {
                    @Override
                    public Long call(Long aLong, Long aLong2) throws Exception {
                        return aLong + aLong2;
                    }
                })
                .updateStateByKey(new Function2<List<Long>, Optional<Long>, Optional<Long>>() {
                    public Optional<Long> call(List<Long> values, Optional<Long> state) throws Exception {
                        if (values == null || values.isEmpty()) {
                            return state;
                        }
                        long sum = 0L;
                        for (Long v : values) {
                            sum += v;
                        }

                        return Optional.of(state.or(0L) + sum);
                    }
                });
//                .updateStateByKey(new Function2<List<Iterable<Long>>, Optional<Long>, Optional<Long>>() {
//                    @Override
//                    public Optional<Long> call(List<Iterable<Long>> iterables, Optional<Long> longOptional) throws Exception {
//                        if (iterables == null || iterables.isEmpty()) {
//                            return longOptional;
//                        }
//                        long sum = 0L;
//                        for (Iterable<Long> iterable : iterables) {
//                            for(Long l : iterable)
//                                sum += l;
//                        }
//                        return Optional.of(longOptional.or(0L) + sum);
//                    }
//                });

        wordCounts.print();
        wordCounts.foreach(new Function2<JavaPairRDD<String, Long>, Time, Void>() {
            @Override
            public Void call(JavaPairRDD<String, Long> stringLongJavaPairRDD, Time time) throws Exception {
                return null;
            }
        });
        ssc.start();
        ssc.awaitTermination();
    }
 
Example 55
Project: kafka-examples   File: StreamingAvg.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
    if (args.length < 4) {
        System.err.println("Usage: StreamingAvg <zkQuorum> <group> <topics> <numThreads>");
        System.exit(1);
    }

    //Configure the Streaming Context
    SparkConf sparkConf = new SparkConf().setAppName("StreamingAvg");

    JavaStreamingContext ssc = new JavaStreamingContext(sparkConf, new Duration(10000));

    int numThreads = Integer.parseInt(args[3]);
    Map<String, Integer> topicMap = new HashMap<String, Integer>();
    String[] topics = args[2].split(",");
    for (String topic: topics) {
        topicMap.put(topic, numThreads);
    }

    JavaPairReceiverInputDStream<String, String> messages =
            KafkaUtils.createStream(ssc, args[0], args[1], topicMap);


    System.out.println("Got my DStream! connecting to zookeeper "+ args[0] + " group " + args[1] + " topics" +
    topicMap);



    JavaPairDStream<Integer,Integer> nums = messages.mapToPair(new PairFunction<Tuple2<String,String>, Integer, Integer>()
    {
        @Override
        public Tuple2<Integer,Integer> call(Tuple2<String, String> tuple2) {
            return new Tuple2<Integer,Integer>(1,Integer.parseInt(tuple2._2()));
        }
    });

    JavaDStream<Tuple2<Integer,Integer>> countAndSum = nums.reduce(new Function2<Tuple2<Integer,Integer>, Tuple2<Integer,Integer>, Tuple2<Integer,Integer>>() {
        @Override
        public Tuple2<Integer, Integer> call(Tuple2<Integer, Integer> a, Tuple2<Integer, Integer> b) {
            return new Tuple2<Integer, Integer>(a._1() + b._1(), a._2() + b._2());
        }
    });

    countAndSum.foreachRDD(new Function<JavaRDD<Tuple2<Integer, Integer>>, Void>() {
        @Override
        public Void call(JavaRDD<Tuple2<Integer, Integer>> tuple2JavaRDD) throws Exception {
            if (tuple2JavaRDD.count() > 0) {
                System.out.println("Current avg: " + tuple2JavaRDD.first()._2() / tuple2JavaRDD.first()._1());
            } else {
                System.out.println("Got no data in this window");
            }
            return null;
        }
    });

    ssc.start();
    ssc.awaitTermination();

}
 
Example 56
Project: gatk-protected   File: CoverageModelEMWorkspace.java   Source Code and License 4 votes vote down vote up
/**
 * A generic function for broadcasting an object to all compute blocks
 *
 * If Spark is enabled:
 *
 *      A {@link Broadcast} will be created from {@param obj} and will be "received" by the compute nodes by calling
 *      {@param pusher}. A reference to the updated RDD will replace the old RDD.
 *
 * If Spark is disabled:
 *
 *      The {@param pusher} function will be called together with {@param obj} and {@link #localComputeBlock}
 *
 * @param obj te object to broadcast
 * @param pusher a map from (V, {@link CoverageModelEMComputeBlock}) -> {@link CoverageModelEMComputeBlock} that
 *               updates the compute block with the broadcasted value
 * @param <V> the type of the broadcasted object
 */
@UpdatesRDD
private <V> void pushToWorkers(@Nonnull final V obj,
                               @Nonnull final Function2<V, CoverageModelEMComputeBlock, CoverageModelEMComputeBlock> pusher) {
    if (sparkContextIsAvailable) {
        final Broadcast<V> broadcastedObj = ctx.broadcast(obj);
        final Function<CoverageModelEMComputeBlock, CoverageModelEMComputeBlock> mapper =
                cb -> pusher.call(broadcastedObj.value(), cb);
        mapWorkers(mapper);
    } else {
        try {
            localComputeBlock = pusher.call(obj, localComputeBlock);
        } catch (final Exception ex) {
            throw new RuntimeException("Can not apply the map function to the local compute block", ex);
        }
    }
}
 
Example 57
Project: spark-dataflow   File: TransformTranslator.java   Source Code and License 4 votes vote down vote up
private static <I, A, O> TransformEvaluator<Combine.Globally<I, O>> combineGlobally() {
  return new TransformEvaluator<Combine.Globally<I, O>>() {

    @Override
    public void evaluate(Combine.Globally<I, O> transform, EvaluationContext context) {
      final Combine.CombineFn<I, A, O> globally = COMBINE_GLOBALLY_FG.get("fn", transform);

      @SuppressWarnings("unchecked")
      JavaRDDLike<WindowedValue<I>, ?> inRdd =
          (JavaRDDLike<WindowedValue<I>, ?>) context.getInputRDD(transform);

      final Coder<I> iCoder = context.getInput(transform).getCoder();
      final Coder<A> aCoder;
      try {
        aCoder = globally.getAccumulatorCoder(
            context.getPipeline().getCoderRegistry(), iCoder);
      } catch (CannotProvideCoderException e) {
        throw new IllegalStateException("Could not determine coder for accumulator", e);
      }

      // Use coders to convert objects in the PCollection to byte arrays, so they
      // can be transferred over the network for the shuffle.
      JavaRDD<byte[]> inRddBytes = inRdd
          .map(WindowingHelpers.<I>unwindowFunction())
          .map(CoderHelpers.toByteFunction(iCoder));

      /*A*/ byte[] acc = inRddBytes.aggregate(
          CoderHelpers.toByteArray(globally.createAccumulator(), aCoder),
          new Function2</*A*/ byte[], /*I*/ byte[], /*A*/ byte[]>() {
            @Override
            public /*A*/ byte[] call(/*A*/ byte[] ab, /*I*/ byte[] ib) throws Exception {
              A a = CoderHelpers.fromByteArray(ab, aCoder);
              I i = CoderHelpers.fromByteArray(ib, iCoder);
              return CoderHelpers.toByteArray(globally.addInput(a, i), aCoder);
            }
          },
          new Function2</*A*/ byte[], /*A*/ byte[], /*A*/ byte[]>() {
            @Override
            public /*A*/ byte[] call(/*A*/ byte[] a1b, /*A*/ byte[] a2b) throws Exception {
              A a1 = CoderHelpers.fromByteArray(a1b, aCoder);
              A a2 = CoderHelpers.fromByteArray(a2b, aCoder);
              // don't use Guava's ImmutableList.of as values may be null
              List<A> accumulators = Collections.unmodifiableList(Arrays.asList(a1, a2));
              A merged = globally.mergeAccumulators(accumulators);
              return CoderHelpers.toByteArray(merged, aCoder);
            }
          }
      );
      O output = globally.extractOutput(CoderHelpers.fromByteArray(acc, aCoder));

      Coder<O> coder = context.getOutput(transform).getCoder();
      JavaRDD<byte[]> outRdd = context.getSparkContext().parallelize(
          // don't use Guava's ImmutableList.of as output may be null
          CoderHelpers.toByteArrays(Collections.singleton(output), coder));
      context.setOutputRDD(transform, outRdd.map(CoderHelpers.fromByteFunction(coder))
          .map(WindowingHelpers.<O>windowFunction()));
    }
  };
}
 
Example 58
Project: iis   File: IisAffMatchResultWriterTest.java   Source Code and License 4 votes vote down vote up
private void assertDuplicateMatchedOrgsReduceFunction(Function2<MatchedOrganization, MatchedOrganization, MatchedOrganization> function) throws Exception {
    
    // given
    
    MatchedOrganization matchedOrg1 = mock(MatchedOrganization.class);
    MatchedOrganization matchedOrg2 = mock(MatchedOrganization.class);
    MatchedOrganization newMatchedOrg = mock(MatchedOrganization.class);
    
    when(duplicateMatchedOrgStrengthRecalculator.recalculateStrength(matchedOrg1, matchedOrg2)).thenReturn(newMatchedOrg);
    
    
    // execute
    
    MatchedOrganization retMatchedOrg = function.call(matchedOrg1, matchedOrg2);
    
    
    // assert
    
    assertNotNull(retMatchedOrg);
    assertTrue(retMatchedOrg == newMatchedOrg);
}
 
Example 59
Project: iis   File: DocClassificationReportGeneratorTest.java   Source Code and License 4 votes vote down vote up
private void assertReduceFunction(Function2<Long, Long, Long> function) throws Exception {
    
    // execute & assert
    assertEquals(4L, function.call(1L, 3L).longValue());
    assertEquals(7L, function.call(4L, 3L).longValue());
}
 
Example 60
Project: SparkOnALog   File: SparkStreamingFromFlumeToHBaseExample.java   Source Code and License 4 votes vote down vote up
public static void main(String[] args) {
	if (args.length == 0) {
		System.err
				.println("Usage: SparkStreamingFromFlumeToHBaseExample {master} {host} {port} {table} {columnFamily}");
		System.exit(1);
	}

	String master = args[0];
	String host = args[1];
	int port = Integer.parseInt(args[2]);
	String tableName = args[3];
	String columnFamily = args[4];
	
	Duration batchInterval = new Duration(2000);

	JavaStreamingContext sc = new JavaStreamingContext(master,
			"FlumeEventCount", batchInterval,
			System.getenv("SPARK_HOME"), "/home/cloudera/SparkOnALog.jar");
	
	final Broadcast<String> broadcastTableName = sc.sparkContext().broadcast(tableName);
	final Broadcast<String> broadcastColumnFamily = sc.sparkContext().broadcast(columnFamily);
	
	//JavaDStream<SparkFlumeEvent> flumeStream = sc.flumeStream(host, port);
	
	JavaDStream<SparkFlumeEvent> flumeStream = FlumeUtils.createStream(sc, host, port);
	
	JavaPairDStream<String, Integer> lastCounts = flumeStream
			.flatMap(new FlatMapFunction<SparkFlumeEvent, String>() {

				@Override
				public Iterable<String> call(SparkFlumeEvent event)
						throws Exception {
					String bodyString = new String(event.event().getBody()
							.array(), "UTF-8");
					return Arrays.asList(bodyString.split(" "));
				}
			}).map(new PairFunction<String, String, Integer>() {
				@Override
				public Tuple2<String, Integer> call(String str)
						throws Exception {
					return new Tuple2(str, 1);
				}
			}).reduceByKey(new Function2<Integer, Integer, Integer>() {

				@Override
				public Integer call(Integer x, Integer y) throws Exception {
					// TODO Auto-generated method stub
					return x.intValue() + y.intValue();
				}
			});
			
			
			lastCounts.foreach(new Function2<JavaPairRDD<String,Integer>, Time, Void>() {

				@Override
				public Void call(JavaPairRDD<String, Integer> values,
						Time time) throws Exception {
					
					values.foreach(new VoidFunction<Tuple2<String, Integer>> () {

						@Override
						public void call(Tuple2<String, Integer> tuple)
								throws Exception {
							HBaseCounterIncrementor incrementor = 
									HBaseCounterIncrementor.getInstance(broadcastTableName.value(), broadcastColumnFamily.value());
							incrementor.incerment("Counter", tuple._1(), tuple._2());
							System.out.println("Counter:" + tuple._1() + "," + tuple._2());
							
						}} );
					
					return null;
				}});
	
	

	sc.start();

}