Java Code Examples for org.apache.flink.api.java.tuple.Tuple2

The following are top voted examples for showing how to use org.apache.flink.api.java.tuple.Tuple2. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: flink-examples   File: TopTweet.java   View source code 6 votes vote down vote up
@Override
public void flatMap(String tweetJsonStr, Collector<Tuple2<String, Integer>> collector) throws Exception {
    JsonNode tweetJson = mapper.readTree(tweetJsonStr);
    JsonNode entities = tweetJson.get("entities");
    if (entities == null) return;

    JsonNode hashtags = entities.get("hashtags");
    if (hashtags == null) return;

    for (Iterator<JsonNode> iter = hashtags.getElements(); iter.hasNext();) {
        JsonNode node = iter.next();
        String hashtag = node.get("text").getTextValue();

        if (hashtag.matches("\\w+")) {
            collector.collect(new Tuple2<>(hashtag, 1));
        }
    }
}
 
Example 2
Project: flink-connectors   File: IntSequenceExactlyOnceValidator.java   View source code 6 votes vote down vote up
@Override
public void restoreState(List<Tuple2<Integer, BitSet>> state) throws Exception {
    if (state.isEmpty()) {
        Assert.fail("Function was restored without state - no checkpoint completed before.");
    }

    if (state.size() > 1) {
        Assert.fail("Function was restored with multiple states. unexpected scale-in");
    }

    Tuple2<Integer, BitSet> s = state.get(0);
    this.numElementsSoFar = s.f0;
    this.duplicateChecker.clear();
    this.duplicateChecker.or(s.f1);

    log.debug("IntSequenceExactlyOnceValidator was restored with {} elements", numElementsSoFar);
}
 
Example 3
Project: streaming-engines-benchmark   File: StatefulMap.java   View source code 6 votes vote down vote up
@Override
public StreamState map(Tuple2<String, Message> input) throws Exception {

    StreamState currentState = internalState.value();

    if(currentState == null)
        currentState = new StreamState(input.f1.getId());

    currentState.appendMessage(input.f1);

    StreamState state = null;

    if(currentState.size() >= input.f1.getValues()) {
        currentState.isReadyForReduction = true;
        state = currentState;
        internalState.clear();
    } else {
        internalState.update(currentState);
    }

    return state;
}
 
Example 4
Project: pravega-samples   File: EventStateMachine.java   View source code 6 votes vote down vote up
public Tuple2<Event.EventType, State> randomTransition(Random rnd) {
	if (transitions.isEmpty()) {
		throw new RuntimeException("Cannot transition from state " + name);
	}
	float p = rnd.nextFloat();
	float mass = 0.0f;
	Transition transition = null;

	for (Transition t : transitions) {
		mass += t.prob;
		if (transition == null && p <= mass) {
			transition = t;
		}
	}
	return new Tuple2<>(transition.event, transition.targetState);
}
 
Example 5
Project: flink-java-project   File: WordCount.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {

		// set up the execution environment
		final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

		// get input data
		DataSet<String> text = env.fromElements(
				"To be, or not to be,--that is the question:--",
				"Whether 'tis nobler in the mind to suffer",
				"The slings and arrows of outrageous fortune",
				"Or to take arms against a sea of troubles,"
				);

		DataSet<Tuple2<String, Integer>> counts =
				// split up the lines in pairs (2-tuples) containing: (word,1)
				text.flatMap(new LineSplitter())
				// group by the tuple field "0" and sum up tuple field "1"
				.groupBy(0)
				.sum(1);

		// execute and print result
		counts.print();

	}
 
Example 6
Project: flink-java-project   File: PoupularPlacesMain.java   View source code 6 votes vote down vote up
@Override
public Tuple2<Integer, Boolean> map(TaxiRide taxiRide) throws Exception {
	float lon;
	float lat;
	final boolean isStart = taxiRide.isStart;
	if(isStart) {
		lon = taxiRide.startLon;
		lat = taxiRide.startLat;
	}
	else {
		lon = taxiRide.endLon;
		lat = taxiRide.endLat;
	}

	int gridId = GeoUtils.mapToGridCell(lon, lat);
	return Tuple2.of(gridId, isStart);
}
 
Example 7
Project: nautilus-samples   File: IntSequenceExactlyOnceValidator.java   View source code 6 votes vote down vote up
@Override
public List<Tuple2<Integer, BitSet>> snapshotState(long checkpointId, long timestamp) throws Exception {
	log.info("IntSequenceExactlyOnceValidator - at checkpoint {} having {} elements, waitUntil {}",
			checkpointId, numElementsSoFar, waitUntil);

	// after we are done, we need to wait for two more checkpoint to complete
	// before finishing the program - that is to be on the safe side that
	// the sink also got the "commit" notification for all relevant checkpoints
	// and committed the data to pravega
	if (numElementsSoFar == numElementsTotal) {
		waitUntil--;
		if (waitUntil == 0) {
			throw new SuccessException();
		}
	}
	return Collections.singletonList(new Tuple2<>(numElementsSoFar, duplicateChecker));
}
 
Example 8
Project: nautilus-samples   File: IntSequenceExactlyOnceValidator.java   View source code 6 votes vote down vote up
@Override
public void restoreState(List<Tuple2<Integer, BitSet>> state) throws Exception {
	if (state.isEmpty()) {
		Assert.fail("Function was restored without state - no checkpoint completed before.");
	}

	if (state.size() > 1) {
		Assert.fail("Function was restored with multiple states. unexpected scale-in");
	}

	Tuple2<Integer, BitSet> s = state.get(0);
	this.numElementsSoFar = s.f0;
	this.duplicateChecker.clear();
	this.duplicateChecker.or(s.f1);

	log.info("IntSequenceExactlyOnceValidator was restored with {} elements", numElementsSoFar);
}
 
Example 9
Project: nautilus-samples   File: EventStateMachine.java   View source code 6 votes vote down vote up
public Tuple2<Event.EventType, State> randomTransition(Random rnd) {
	if (transitions.isEmpty()) {
		throw new RuntimeException("Cannot transition from state " + name);
	}
	float p = rnd.nextFloat();
	float mass = 0.0f;
	Transition transition = null;

	for (Transition t : transitions) {
		mass += t.prob;
		if (transition == null && p <= mass) {
			transition = t;
		}
	}
	return new Tuple2<>(transition.event, transition.targetState);
}
 
Example 10
Project: flink   File: CassandraTupleSinkExample.java   View source code 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<String, Integer>> source = env.fromCollection(collection);

	CassandraSink.addSink(source)
		.setQuery(INSERT)
		.setClusterBuilder(new ClusterBuilder() {
			@Override
			protected Cluster buildCluster(Builder builder) {
				return builder.addContactPoint("127.0.0.1").build();
			}
		})
		.build();

	env.execute("WriteTupleIntoCassandra");
}
 
Example 11
Project: erad2016-streamprocessing   File: PositiveScoreFunction.java   View source code 6 votes vote down vote up
@Override
public void flatMap(Tuple2<Long, String> tweet, Collector<Tuple3<Long, String, Float>> out) throws Exception {

    String text = tweet.f1;
    Set<String> posWords = PositiveWords.getWords();
    String[] words = text.split(" ");
    int numWords = words.length;
    int numPosWords = 0;
    for (String word : words) {
        if (posWords.contains(word))
            numPosWords++;
    }

    out.collect(new Tuple3<>(
            tweet.f0,
            tweet.f1,
            (float) numPosWords / numWords));

}
 
Example 12
Project: flink   File: CrossITCase.java   View source code 6 votes vote down vote up
@Test
public void testCorrectnessOfCrossWithHuge() throws Exception {
	/*
	 * check correctness of crossWithHuge (only correctness of result -> should be the same as with normal cross)
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds = CollectionDataSets.getSmall5TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
	DataSet<Tuple2<Integer, String>> crossDs = ds.crossWithHuge(ds2).with(new Tuple5Cross());

	List<Tuple2<Integer, String>> result = crossDs.collect();

	String expected = "0,HalloHallo\n" +
			"1,HalloHallo Welt\n" +
			"2,HalloHallo Welt wie\n" +
			"1,Hallo WeltHallo\n" +
			"2,Hallo WeltHallo Welt\n" +
			"3,Hallo WeltHallo Welt wie\n" +
			"2,Hallo Welt wieHallo\n" +
			"3,Hallo Welt wieHallo Welt\n" +
			"4,Hallo Welt wieHallo Welt wie\n";

	compareResultAsTuples(result, expected);
}
 
Example 13
Project: flink   File: SideOutputExample.java   View source code 6 votes vote down vote up
@Override
public void processElement(
		String value,
		Context ctx,
		Collector<Tuple2<String, Integer>> out) throws Exception {
	// normalize and split the line
	String[] tokens = value.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 5) {
			ctx.output(rejectedWordsTag, token);
		} else if (token.length() > 0) {
			out.collect(new Tuple2<>(token, 1));
		}
	}

}
 
Example 14
Project: flink   File: SelectorFunctionKeysTest.java   View source code 6 votes vote down vote up
@Test
public void testAreCompatible4() throws Keys.IncompatibleKeysException {
	TypeInformation<Tuple3<String, Long, Integer>> t1 = new TupleTypeInfo<>(
		BasicTypeInfo.STRING_TYPE_INFO,
		BasicTypeInfo.LONG_TYPE_INFO,
		BasicTypeInfo.INT_TYPE_INFO
	);
	TypeInformation<PojoWithMultiplePojos> t2 = TypeExtractor.getForClass(PojoWithMultiplePojos.class);

	Keys.ExpressionKeys<Tuple3<String, Long, Integer>> ek1 = new Keys.ExpressionKeys<>(new int[]{2,0}, t1);
	Keys<PojoWithMultiplePojos> sk2 = new Keys.SelectorFunctionKeys<>(
		new KeySelector3(),
		t2,
		new TupleTypeInfo<Tuple2<Integer, String>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO)
	);

	Assert.assertTrue(sk2.areCompatible(ek1));
}
 
Example 15
Project: flink   File: ContinuousFileProcessingTest.java   View source code 6 votes vote down vote up
/**
 * Create a file with pre-determined String format of the form:
 * {@code fileIdx +": "+ sampleLine +" "+ lineNo}.
 * */
private static Tuple2<org.apache.hadoop.fs.Path, String> createFileAndFillWithData(
			String base, String fileName, int fileIdx, String sampleLine) throws IOException {

	assert (hdfs != null);

	final String fileRandSuffix = UUID.randomUUID().toString();

	org.apache.hadoop.fs.Path file = new org.apache.hadoop.fs.Path(base + "/" + fileName + fileRandSuffix);
	Assert.assertFalse(hdfs.exists(file));

	org.apache.hadoop.fs.Path tmp = new org.apache.hadoop.fs.Path(base + "/." + fileName + fileRandSuffix);
	FSDataOutputStream stream = hdfs.create(tmp);
	StringBuilder str = new StringBuilder();
	for (int i = 0; i < LINES_PER_FILE; i++) {
		String line = fileIdx + ": " + sampleLine + " " + i + "\n";
		str.append(line);
		stream.write(line.getBytes(ConfigConstants.DEFAULT_CHARSET));
	}
	stream.close();

	hdfs.rename(tmp, file);

	Assert.assertTrue("No result file present", hdfs.exists(file));
	return new Tuple2<>(file, str.toString());
}
 
Example 16
Project: flink   File: ReduceOnNeighborMethodsITCase.java   View source code 6 votes vote down vote up
@Test
public void testSumOfInNeighborsNoValue() throws Exception {
	/*
	 * Get the sum of in-neighbor values
	 * times the edge weights for each vertex
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSum =
		graph.groupReduceOnNeighbors(new SumInNeighborsNoValue(), EdgeDirection.IN);
	List<Tuple2<Long, Long>> result = verticesWithSum.collect();

	expectedResult = "1,255\n" +
		"2,12\n" +
		"3,59\n" +
		"4,102\n" +
		"5,285\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 17
Project: flink   File: GraphMetrics.java   View source code 6 votes vote down vote up
@SuppressWarnings("serial")
private static DataSet<Edge<Long, NullValue>> getEdgesDataSet(ExecutionEnvironment env) {
	if (fileOutput) {
		return env.readCsvFile(edgesInputPath)
				.lineDelimiter("\n").fieldDelimiter("\t")
				.types(Long.class, Long.class).map(
						new MapFunction<Tuple2<Long, Long>, Edge<Long, NullValue>>() {

							public Edge<Long, NullValue> map(Tuple2<Long, Long> value) {
								return new Edge<Long, NullValue>(value.f0, value.f1, 
										NullValue.getInstance());
							}
				});
	} else {
		return ExampleUtils.getRandomEdges(env, NUM_VERTICES);
	}
}
 
Example 18
Project: flink   File: ContinuousFileProcessingCheckpointITCase.java   View source code 6 votes vote down vote up
/**
 * Fill the file with content and put the content in the {@code hdPathContents} list.
 * */
private Tuple2<Path, String> fillWithData(
	String base, String fileName, int fileIdx, String sampleLine) throws IOException, InterruptedException {

	assert (localFs != null);

	org.apache.hadoop.fs.Path tmp =
		new org.apache.hadoop.fs.Path(base + "/." + fileName + fileIdx);

	FSDataOutputStream stream = localFs.create(tmp);
	StringBuilder str = new StringBuilder();
	for (int i = 0; i < LINES_PER_FILE; i++) {
		String line = fileIdx + ": " + sampleLine + " " + i + "\n";
		str.append(line);
		stream.write(line.getBytes(ConfigConstants.DEFAULT_CHARSET));
	}
	stream.close();
	return new Tuple2<>(tmp, str.toString());
}
 
Example 19
Project: osm-flink-tools   File: TestMapStringTools.java   View source code 6 votes vote down vote up
/**
 * 
 * @throws Exception
 */
public void testWriteValuesWithComma() throws Exception {
	
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	ArrayList<Tuple2<String,String>> al = new ArrayList<>();
	al.add(new Tuple2<String, String>("hello","1"));
	al.add(new Tuple2<String, String>("toto,tiri,\"","1"));
	DataSource<Tuple2<String, String>> d = env.fromCollection(al);
	
	CsvOutputFormat f = new CsvOutputFormat(new Path("testcsv.csv"));
	f.setQuoteStrings(true);
	
	d.write(f, "testcsv.csv");
	env.execute();
	
}
 
Example 20
Project: flink   File: ReduceOnNeighborMethodsITCase.java   View source code 6 votes vote down vote up
@Test
public void testSumOfOAllNeighbors() throws Exception {
	/*
	 * Get the sum of all neighbor values
	 * including own vertex value
	 * for each vertex
        */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	Graph<Long, Long, Long> graph = Graph.fromDataSet(TestGraphUtils.getLongLongVertexData(env),
		TestGraphUtils.getLongLongEdgeData(env), env);

	DataSet<Tuple2<Long, Long>> verticesWithSumOfOutNeighborValues =
		graph.groupReduceOnNeighbors(new SumAllNeighbors(), EdgeDirection.ALL);
	List<Tuple2<Long, Long>> result = verticesWithSumOfOutNeighborValues.collect();

	expectedResult = "1,11\n" +
		"2,6\n" +
		"3,15\n" +
		"4,12\n" +
		"5,13\n";

	compareResultAsTuples(result, expectedResult);
}
 
Example 21
Project: flink   File: WindowOperatorFrom11MigrationTest.java   View source code 6 votes vote down vote up
@Override
public void apply(String key,
		W window,
		Iterable<Tuple2<String, Integer>> input,
		Collector<Tuple2<String, Integer>> out) throws Exception {

	if (!openCalled) {
		fail("Open was not called");
	}
	int sum = 0;

	for (Tuple2<String, Integer> t: input) {
		sum += t.f1;
	}
	out.collect(new Tuple2<>(key, sum));

}
 
Example 22
Project: flink   File: EuclideanGraphWeighing.java   View source code 6 votes vote down vote up
private static DataSet<Edge<Long, Double>> getEdgesDataSet(ExecutionEnvironment env) {
	if (fileOutput) {
		return env.readCsvFile(edgesInputPath)
				.lineDelimiter("\n")
				.types(Long.class, Long.class)
				.map(new MapFunction<Tuple2<Long, Long>, Edge<Long, Double>>() {

					@Override
					public Edge<Long, Double> map(Tuple2<Long, Long> tuple2) throws Exception {
						return new Edge<>(tuple2.f0, tuple2.f1, 0.0);
					}
				});
	} else {
		return EuclideanGraphData.getDefaultEdgeDataSet(env);
	}
}
 
Example 23
Project: flink   File: ElasticsearchSinkITCase.java   View source code 6 votes vote down vote up
@Test(expected = JobExecutionException.class)
public void testTransportClientFails() throws Exception{
	// this checks whether the TransportClient fails early when there is no cluster to
	// connect to. There isn't a similar test for the Node Client version since that
	// one will block and wait for a cluster to come online

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new TestSourceFunction());

	Map<String, String> config = new HashMap<>();
	// This instructs the sink to emit after every element, otherwise they would be buffered
	config.put(ElasticsearchSink.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1");
	config.put("cluster.name", "my-node-client-cluster");

	List<InetSocketAddress> transports = new ArrayList<>();
	transports.add(new InetSocketAddress(InetAddress.getByName("127.0.0.1"), 9300));

	source.addSink(new ElasticsearchSink<>(config, transports, new TestElasticsearchSinkFunction()));

	env.execute("Elasticsearch Node Client Test");
}
 
Example 24
Project: flink   File: CollectionDataSets.java   View source code 6 votes vote down vote up
public static DataSet<Tuple2<byte[], Integer>> getTuple2WithByteArrayDataSet(ExecutionEnvironment env) {
	List<Tuple2<byte[], Integer>> data = new ArrayList<>();
	data.add(new Tuple2<>(new byte[]{0, 4}, 1));
	data.add(new Tuple2<>(new byte[]{2, 0}, 1));
	data.add(new Tuple2<>(new byte[]{2, 0, 4}, 4));
	data.add(new Tuple2<>(new byte[]{2, 1}, 3));
	data.add(new Tuple2<>(new byte[]{0}, 0));
	data.add(new Tuple2<>(new byte[]{2, 0}, 1));
			
	TupleTypeInfo<Tuple2<byte[], Integer>> type = new TupleTypeInfo<>(
			PrimitiveArrayTypeInfo.BYTE_PRIMITIVE_ARRAY_TYPE_INFO,
			BasicTypeInfo.INT_TYPE_INFO
	);
	
	return env.fromCollection(data, type);
}
 
Example 25
Project: flink   File: SortPartitionITCase.java   View source code 6 votes vote down vote up
@Test
public void testSortPartitionWithKeySelector2() throws Exception {
	/*
	 * Test sort partition on an extracted key
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple3<Integer, Long, String>> ds = CollectionDataSets.get3TupleDataSet(env);
	List<Tuple1<Boolean>> result = ds
		.map(new IdMapper<Tuple3<Integer, Long, String>>()).setParallelism(4) // parallelize input
		.sortPartition(new KeySelector<Tuple3<Integer, Long, String>, Tuple2<Integer, Long>>() {
			@Override
			public Tuple2<Integer, Long> getKey(Tuple3<Integer, Long, String> value) throws Exception {
				return new Tuple2<>(value.f0, value.f1);
			}
		}, Order.DESCENDING)
		.mapPartition(new OrderCheckMapper<>(new Tuple3Checker()))
		.distinct().collect();

	String expected = "(true)\n";

	compareResultAsText(result, expected);
}
 
Example 26
Project: flink   File: JoinITCase.java   View source code 6 votes vote down vote up
@Test
public void testDefaultJoinOnTuples() throws Exception {
	/*
	 * Default Join on tuples
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.get5TupleDataSet(env);
	DataSet<Tuple2<Tuple3<Integer, Long, String>, Tuple5<Integer, Long, Integer, String, Long>>> joinDs =
			ds1.join(ds2)
					.where(0)
					.equalTo(2);

	List<Tuple2<Tuple3<Integer, Long, String>, Tuple5<Integer, Long, Integer, String, Long>>> result = joinDs.collect();

	String expected = "(1,1,Hi),(2,2,1,Hallo Welt,2)\n" +
			"(2,2,Hello),(2,3,2,Hallo Welt wie,1)\n" +
			"(3,2,Hello world),(3,4,3,Hallo Welt wie gehts?,2)\n";

	compareResultAsTuples(result, expected);

}
 
Example 27
Project: flink   File: OuterJoinITCase.java   View source code 6 votes vote down vote up
@Test
public void testJoinWithTupleReturningKeySelectors() throws Exception {
	/*
	 * UDF Join on tuples with tuple-returning key selectors
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<Integer, Long, String>> ds1 = CollectionDataSets.getSmall3TupleDataSet(env);
	DataSet<Tuple5<Integer, Long, Integer, String, Long>> ds2 = CollectionDataSets.getSmall5TupleDataSet(env);
	DataSet<Tuple2<String, String>> joinDs =
			ds1.fullOuterJoin(ds2)
					.where(new KeySelector3()) //0, 1
					.equalTo(new KeySelector4()) // 0, 4
					.with(new T3T5FlatJoin());

	List<Tuple2<String, String>> result = joinDs.collect();

	String expected = "Hi,Hallo\n" +
			"Hello,Hallo Welt\n" +
			"Hello world,null\n" +
			"null,Hallo Welt wie\n";

	compareResultAsTuples(result, expected);
}
 
Example 28
Project: flink-examples   File: RatingsDistribution.java   View source code 5 votes vote down vote up
@Override
public void flatMap(String s, Collector<Tuple2<IntValue, Integer>> collector) throws Exception {
    // Every line contains tab separated values
    // user id | item id | rating | timestamp
    String[] split = s.split(",");
    String ratingStr = split[2];

    if (!ratingStr.equals("rating")) {
        int rating = (int) Double.parseDouble(split[2]);
        ratingValue.setValue(rating);

        collector.collect(result);
    }
}
 
Example 29
Project: flink-examples   File: RatingsDistribution.java   View source code 5 votes vote down vote up
@Override
public void reduce(Iterable<Tuple2<IntValue, Integer>> iterable, Collector<Tuple2<IntValue, Integer>> collector) throws Exception {
    IntValue rating = null;
    int ratingsCount = 0;
    for (Tuple2<IntValue, Integer> tuple : iterable) {
        rating = tuple.f0;
        ratingsCount += tuple.f1;
    }

    collector.collect(new Tuple2<>(rating, ratingsCount));
}
 
Example 30
Project: flink-examples   File: Java8WordCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    DataSource<String> lines = env.fromElements(
        "Apache Flink is a community-driven open source framework for distributed big data analytics,",
        "like Hadoop and Spark. The core of Apache Flink is a distributed streaming dataflow engine written",
        " in Java and Scala.[1][2] It aims to bridge the gap between MapReduce-like systems and shared-nothing",
        "parallel database systems. Therefore, Flink executes arbitrary dataflow programs in a data-parallel and",
        "pipelined manner.[3] Flink's pipelined runtime system enables the execution of bulk/batch and stream",
        "processing programs.[4][5] Furthermore, Flink's runtime supports the execution of iterative algorithms natively.[6]"
    );

    lines.flatMap((line, out) -> {
        String[] words = line.split("\\W+");
        for (String word : words) {
            out.collect(new Tuple2<>(word, 1));
        }
    })
    .returns(new TupleTypeInfo(TypeInformation.of(String.class), TypeInformation.of(Integer.class)))
    .groupBy(0)
    .sum(1)
    .print();
}
 
Example 31
Project: flink-examples   File: TopTweet.java   View source code 5 votes vote down vote up
@Override
public void apply(TimeWindow window, Iterable<Tuple2<String, Integer>> hashTags, Collector<TweetsCount> out) throws Exception {
    Tuple2<String, Integer> topHashTag = new Tuple2<>("", 0);
    for (Tuple2<String, Integer> hashTag : hashTags) {
        if (hashTag.f1 > topHashTag.f1) {
            topHashTag = hashTag;
        }
    }

    out.collect(new TweetsCount(window.getStart(), window.getEnd(), topHashTag.f0, topHashTag.f1));
}
 
Example 32
Project: flink-connectors   File: EventTimeOrderingOperatorTest.java   View source code 5 votes vote down vote up
@Before
public void before() throws Exception {
    operator = new EventTimeOrderingOperator<>();
    operator.setInputType(TypeInformation.of(new TypeHint<Tuple2<String, Long>>() {
    }), new ExecutionConfig());
    testHarness = new KeyedOneInputStreamOperatorTestHarness<>(
            operator, in -> in.f0, TypeInformation.of(String.class));
    testHarness.setTimeCharacteristic(TimeCharacteristic.EventTime);
    testHarness.open();
}
 
Example 33
Project: flink-connectors   File: EventTimeOrderingOperatorTest.java   View source code 5 votes vote down vote up
@Test
public void testProcessingTime() throws Exception {
    testHarness.setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
    testHarness.processElement(new StreamRecord<>(new Tuple2<>(K1, 0L)));
    Queue<Object> actual = testHarness.getOutput();
    Queue<Object> expected = new ConcurrentLinkedQueue<>();
    expected.add(new StreamRecord<>(new Tuple2<>(K1, 0L)));
    TestHarnessUtil.assertOutputEquals("Unexpected output", expected, actual);
}
 
Example 34
Project: aliyun-log-flink-connector   File: FlinkLogConsumer.java   View source code 5 votes vote down vote up
public void initializeState(FunctionInitializationContext context) throws Exception {
    LOG.info("initializeState...");
    TypeInformation<Tuple2<LogstoreShardMeta, String>> shardsStateTypeInfo = new TupleTypeInfo<Tuple2<LogstoreShardMeta, String>>(
            TypeInformation.of(LogstoreShardMeta.class),
            TypeInformation.of(String.class));

    cursorStateForCheckpoint = context.getOperatorStateStore().getUnionListState(
            new ListStateDescriptor(curcorStateStoreName, shardsStateTypeInfo));

    if (context.isRestored()) {
        if (cursorsToRestore == null) {
            cursorsToRestore = new HashMap<LogstoreShardMeta, String>();
            for (Tuple2<LogstoreShardMeta, String> cursor : cursorStateForCheckpoint.get()) {
                LOG.info("initializeState, project: {}, logstore: {}, shard: {}, checkpoint: {}", logProject, logStore, cursor.f0.toString(), cursor.f1);
                cursorsToRestore.put(cursor.f0, cursor.f1);
                if (consumerGroupName != null && logClient != null) {
                    logClient.updateCheckpoint(logProject, logStore, consumerGroupName, "flinkTask-" + getRuntimeContext().getIndexOfThisSubtask() + "Of" + getRuntimeContext().getNumberOfParallelSubtasks(), cursor.f0.getShardId(), cursor.f1);
                }
            }

            LOG.info("Setting restore state in the FlinkLogConsumer. Using the following offsets: {}",
                    cursorsToRestore);
        }
    }
    else {
        LOG.info("No restore state for FlinkLogConsumer.");
    }
}
 
Example 35
Project: Mastering-Apache-Flink   File: Splitter.java   View source code 5 votes vote down vote up
@Override
public void flatMap(String value, Collector<Tuple2<String, Double>> out) throws Exception {

	if (null != value && value.contains(",")) {
		String parts[] = value.split(",");
		out.collect(new Tuple2<String, Double>(parts[2], Double.parseDouble(parts[1])));
	}
}
 
Example 36
Project: flink-java-project   File: SocketTextStreamWordCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {

		if (args.length != 2){
			System.err.println("USAGE:\nSocketTextStreamWordCount <hostname> <port>");
			return;
		}

		String hostName = args[0];
		Integer port = Integer.parseInt(args[1]);

		// set up the execution environment
		final StreamExecutionEnvironment env = StreamExecutionEnvironment
				.getExecutionEnvironment();

		// get input data
		DataStream<String> text = env.socketTextStream(hostName, port);

		DataStream<Tuple2<String, Integer>> counts =
		// split up the lines in pairs (2-tuples) containing: (word,1)
		text.flatMap(new LineSplitter())
		// group by the tuple field "0" and sum up tuple field "1"
				.keyBy(0)
				.sum(1);

		counts.print();

		// execute program
		env.execute("Java WordCount from SocketTextStream Example");
	}
 
Example 37
Project: flink-java-project   File: SocketTextStreamWordCount.java   View source code 5 votes vote down vote up
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
	// normalize and split the line
	String[] tokens = value.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Tuple2<String, Integer>(token, 1));
		}
	}
}
 
Example 38
Project: flink-java-project   File: WordCount.java   View source code 5 votes vote down vote up
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) {
	// normalize and split the line
	String[] tokens = value.toLowerCase().split("\\W+");

	// emit the pairs
	for (String token : tokens) {
		if (token.length() > 0) {
			out.collect(new Tuple2<String, Integer>(token, 1));
		}
	}
}
 
Example 39
Project: flink-java-project   File: PoupularPlacesMain.java   View source code 5 votes vote down vote up
@Override
public void apply(Tuple key, TimeWindow timeWindow, Iterable<Tuple2<Integer, Boolean>> events,
		Collector<Tuple4<Integer, Long, Boolean, Integer>> collector) throws Exception {

	Tuple2<Integer, Boolean> castedKey = (Tuple2<Integer, Boolean>)key;
	int gridId = castedKey.f0;
	boolean isStart = castedKey.f1;
	long windowTime = timeWindow.getEnd();
	int rideCounter = Iterables.size(events);

	collector.collect(Tuple4.of(gridId, windowTime, isStart, rideCounter));
}
 
Example 40
Project: big-data-benchmark   File: FlinkWordCount.java   View source code 5 votes vote down vote up
public static void main(String[] args) throws Exception {
    String inputPath = args[0];
    String outputPath = args[1] + "_" + System.currentTimeMillis();

    // set up the execution environment
    final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
    // get input data
    DataSet<String> text = env.readTextFile(inputPath);
    DataSet<Tuple2<String, Long>> counts = text
            .<Tuple2<String, Long>>flatMap((line, out) -> {
                StringTokenizer tokenizer = new StringTokenizer(line);
                while (tokenizer.hasMoreTokens()) {
                    out.collect(new Tuple2<>(tokenizer.nextToken(), 1L));
                }
            })
            .returns(new TypeHint<Tuple2<String, Long>>() {
            })
            // group by the tuple field "0" and sum up tuple field "1"
            .groupBy(0)
            .sum(1);

    // emit result
    counts.writeAsCsv(outputPath);
    // execute program
    long t = System.currentTimeMillis();
    env.execute("Streaming WordCount Example");
    System.out.println("Time=" + (System.currentTimeMillis() - t));
}