Java Code Examples for org.apache.flink.api.java.tuple.Tuple2

The following examples show how to use org.apache.flink.api.java.tuple.Tuple2. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: GroupReduceITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testIntBasedDefinitionOnGroupSortForPartialNestedTuple() throws Exception {
	/*
	 * Test int-based definition on group sort, for (partial) nested Tuple ASC
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
	// f0.f0 is first integer
	DataSet<String> reduceDs = ds.groupBy("f1")
			.sortGroup("f0.f0", Order.ASCENDING)
			.sortGroup("f0.f1", Order.ASCENDING)
			.reduceGroup(new NestedTupleReducer());
	List<String> result = reduceDs.collect();

	String expected = "a--(1,2)-(1,3)-(2,1)-\n" +
			"b--(2,2)-\n" +
			"c--(3,3)-(3,6)-(4,9)-\n";

	compareResultAsText(result, expected);
}
 
Example 2
Source Project: flink   Source File: WindowTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testFoldEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple3<String, String, Integer>> window1 = source
			.keyBy(0)
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.fold(new Tuple3<>("", "", 1), new DummyFolder());

	OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof FoldingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example 3
Source Project: flink   Source File: ScatterGatherIteration.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void coGroup(Iterable<Edge<K, EV>> edges, Iterable<Vertex<K, Tuple3<VV, LongValue, LongValue>>> state,
					Collector<Tuple2<K, Message>> out) throws Exception {

	final Iterator<Vertex<K, Tuple3<VV, LongValue, LongValue>>> stateIter = state.iterator();

	if (stateIter.hasNext()) {
		Vertex<K, Tuple3<VV, LongValue, LongValue>> vertexWithDegrees = stateIter.next();

		nextVertex.f0 = vertexWithDegrees.f0;
		nextVertex.f1 = vertexWithDegrees.f1.f0;

		scatterFunction.setInDegree(vertexWithDegrees.f1.f1.getValue());
		scatterFunction.setOutDegree(vertexWithDegrees.f1.f2.getValue());

		scatterFunction.set(edges.iterator(), out, vertexWithDegrees.getId());
		scatterFunction.sendMessages(nextVertex);
	}
}
 
Example 4
Source Project: flink   Source File: SkipListSerializerTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testSkipListKeySerializer(int delta) throws IOException {
	String key = "key-abcdedg" + delta;
	String namespace = "namespace-dfsfdafd" + delta;

	byte[] skipListKey = skipListKeySerializer.serialize(key, namespace);
	int offset = 10;
	byte[] data = new byte[10 + skipListKey.length];
	System.arraycopy(skipListKey, 0, data, offset, skipListKey.length);
	MemorySegment skipListKeySegment = MemorySegmentFactory.wrap(data);
	assertEquals(key, skipListKeySerializer.deserializeKey(skipListKeySegment, offset, skipListKey.length));
	assertEquals(namespace, skipListKeySerializer.deserializeNamespace(skipListKeySegment, offset, skipListKey.length));

	Tuple2<byte[], byte[]> serializedKeyAndNamespace =
		skipListKeySerializer.getSerializedKeyAndNamespace(skipListKeySegment, offset);
	assertEquals(key, deserialize(keySerializer, serializedKeyAndNamespace.f0));
	assertEquals(namespace, deserialize(namespaceSerializer, serializedKeyAndNamespace.f1));

	byte[] serializedNamespace = skipListKeySerializer.serializeNamespace(namespace);
	assertEquals(namespace, deserialize(namespaceSerializer, serializedNamespace));
}
 
Example 5
Source Project: flink   Source File: CollectionDataSets.java    License: Apache License 2.0 6 votes vote down vote up
public static DataSet<Tuple3<Tuple2<Integer, Integer>, String, Integer>> getGroupSortedNestedTupleDataSet2(ExecutionEnvironment env) {

		List<Tuple3<Tuple2<Integer, Integer>, String, Integer>> data = new ArrayList<>();
		data.add(new Tuple3<>(new Tuple2<>(1, 3), "a", 2));
		data.add(new Tuple3<>(new Tuple2<>(1, 2), "a", 1));
		data.add(new Tuple3<>(new Tuple2<>(2, 1), "a", 3));
		data.add(new Tuple3<>(new Tuple2<>(2, 2), "b", 4));
		data.add(new Tuple3<>(new Tuple2<>(3, 3), "c", 5));
		data.add(new Tuple3<>(new Tuple2<>(3, 6), "c", 6));
		data.add(new Tuple3<>(new Tuple2<>(4, 9), "c", 7));

		TupleTypeInfo<Tuple3<Tuple2<Integer, Integer>, String, Integer>> type = new TupleTypeInfo<>(
				new TupleTypeInfo<Tuple2<Integer, Integer>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO),
				BasicTypeInfo.STRING_TYPE_INFO,
				BasicTypeInfo.INT_TYPE_INFO
		);

		return env.fromCollection(data, type);
	}
 
Example 6
Source Project: flink-learning   Source File: ExplainingTable.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

    DataStream<Tuple2<Integer, String>> stream1 = env.fromElements(new Tuple2<>(1, "hello"));
    DataStream<Tuple2<Integer, String>> stream2 = env.fromElements(new Tuple2<>(1, "hello"));

    Table table1 = tEnv.fromDataStream(stream1, "count, word");
    Table table2 = tEnv.fromDataStream(stream2, "count, word");
    Table table = table1
            .where("LIKE(word, 'F%')")
            .unionAll(table2);

    String explanation = tEnv.explain(table);
    System.out.println(explanation);
}
 
Example 7
Source Project: Flink-CEPplus   Source File: AggregateITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Grouped Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example 8
Source Project: flink   Source File: AbstractOuterJoinTaskTest.java    License: Apache License 2.0 6 votes vote down vote up
private void testSortBothOuterJoinTask(int keyCnt1, int valCnt1, int keyCnt2, int valCnt2) throws Exception {
	setOutput(this.outList, this.serializer);
	addDriverComparator(this.comparator1);
	addDriverComparator(this.comparator2);
	getTaskConfig().setDriverPairComparator(new RuntimePairComparatorFactory());
	getTaskConfig().setDriverStrategy(this.getSortDriverStrategy());
	getTaskConfig().setRelativeMemoryDriver(this.bnljn_frac);
	setNumFileHandlesForSort(4);
	
	final AbstractOuterJoinDriver<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> testTask = getOuterJoinDriver();
	
	addInputSorted(new UniformIntTupleGenerator(keyCnt1, valCnt1, false), this.serializer, this.comparator1.duplicate());
	addInputSorted(new UniformIntTupleGenerator(keyCnt2, valCnt2, false), this.serializer, this.comparator2.duplicate());
	testDriver(testTask, MockJoinStub.class);
	
	final int expCnt = calculateExpectedCount(keyCnt1, valCnt1, keyCnt2, valCnt2);
	
	Assert.assertTrue("Result set size was " + this.outList.size() + ". Expected was " + expCnt, this.outList.size() == expCnt);
	
	this.outList.clear();
}
 
Example 9
Source Project: flink   Source File: RocksFullSnapshotStrategy.java    License: Apache License 2.0 6 votes vote down vote up
private void writeSnapshotToOutputStream(
	@Nonnull CheckpointStreamWithResultProvider checkpointStreamWithResultProvider,
	@Nonnull KeyGroupRangeOffsets keyGroupRangeOffsets) throws IOException, InterruptedException {

	final List<Tuple2<RocksIteratorWrapper, Integer>> kvStateIterators =
		new ArrayList<>(metaData.size());
	final DataOutputView outputView =
		new DataOutputViewStreamWrapper(checkpointStreamWithResultProvider.getCheckpointOutputStream());
	final ReadOptions readOptions = new ReadOptions();
	try {
		readOptions.setSnapshot(snapshot);
		writeKVStateMetaData(kvStateIterators, readOptions, outputView);
		writeKVStateData(kvStateIterators, checkpointStreamWithResultProvider, keyGroupRangeOffsets);
	} finally {

		for (Tuple2<RocksIteratorWrapper, Integer> kvStateIterator : kvStateIterators) {
			IOUtils.closeQuietly(kvStateIterator.f0);
		}

		IOUtils.closeQuietly(readOptions);
	}
}
 
Example 10
Source Project: flink   Source File: CassandraTupleWriteAheadSinkExample.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(1000);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
	env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));

	CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
		.setQuery("INSERT INTO example.values (id, counter) values (?, ?);")
		.enableWriteAheadLog()
		.setClusterBuilder(new ClusterBuilder() {

			private static final long serialVersionUID = 2793938419775311824L;

			@Override
			public Cluster buildCluster(Cluster.Builder builder) {
				return builder.addContactPoint("127.0.0.1").build();
			}
		})
		.build();

	sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");

	env.execute();
}
 
Example 11
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		int key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 12
Source Project: flink   Source File: WindowTranslationTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceProcessingTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingProcessingTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example 13
Source Project: flink   Source File: RestServerEndpointITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testEndpointsMustBeUnique() throws Exception {
	final RestServerEndpointConfiguration serverConfig = RestServerEndpointConfiguration.fromConfiguration(config);

	final List<Tuple2<RestHandlerSpecification, ChannelInboundHandler>> handlers = Arrays.asList(
		Tuple2.of(new TestHeaders(), testHandler),
		Tuple2.of(new TestHeaders(), testUploadHandler)
	);

	assertThrows("REST handler registration",
		FlinkRuntimeException.class,
		() -> {
			try (TestRestServerEndpoint restServerEndpoint =  new TestRestServerEndpoint(serverConfig, handlers)) {
				restServerEndpoint.start();
				return null;
			}
		});
}
 
Example 14
Source Project: flink-siddhi   Source File: SiddhiStream.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Siddhi Continuous Query Language (CQL)
 *
 * @return ExecutionSiddhiStream context
 */
public ExecutionSiddhiStream cql(DataStream<ControlEvent> controlStream) {
    DataStream<Tuple2<StreamRoute, Object>> unionStream = controlStream
        .map(new NamedControlStream(ControlEvent.DEFAULT_INTERNAL_CONTROL_STREAM))
        .broadcast()
        .union(this.toDataStream())
        .transform("add route transform",
            SiddhiTypeFactory.getStreamTupleTypeInformation(TypeInformation.of(Object.class)),
            new AddRouteOperator(getCepEnvironment().getDataStreamSchemas()));

    DataStream<Tuple2<StreamRoute, Object>> partitionedStream = new DataStream<>(
        unionStream.getExecutionEnvironment(),
        new PartitionTransformation<>(unionStream.getTransformation(),
        new DynamicPartitioner()));
    return new ExecutionSiddhiStream(partitionedStream, null, getCepEnvironment());
}
 
Example 15
Source Project: flink   Source File: SimpleTupleJoinFunction.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void join(Tuple2<String, String> first, Tuple2<String, Integer> second, Collector<Tuple4<String, String, String, Object>> out) throws Exception {
	if (first == null) {
		out.collect(new Tuple4<String, String, String, Object>(null, null, second.f0, second.f1));
	} else if (second == null) {
		out.collect(new Tuple4<String, String, String, Object>(first.f0, first.f1, null, null));
	} else {
		out.collect(new Tuple4<String, String, String, Object>(first.f0, first.f1, second.f0, second.f1));
	}
}
 
Example 16
Source Project: Flink-CEPplus   Source File: ReduceOnEdgesMethodsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void iterateEdges(Iterable<Tuple2<Long, Edge<Long, Long>>> edges,
		Collector<Tuple2<Long, Long>> out) throws Exception {

	for (Tuple2<Long, Edge<Long, Long>> edge : edges) {
		if (edge.f0 != 5) {
			out.collect(new Tuple2<>(edge.f0, edge.f1.getTarget()));
		}
	}
}
 
Example 17
Source Project: Flink-CEPplus   Source File: HITS.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<T, T> map(Edge<T, ET> value)
		throws Exception {
	output.f0 = value.f0;
	output.f1 = value.f1;
	return output;
}
 
Example 18
Source Project: flink   Source File: AllWindowTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceWithProcessWindowFunctionProcessingTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple3<String, String, Integer>> window = source
			.windowAll(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.reduce(new DummyReducer(), new ProcessAllWindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple3<String, String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(new Tuple3<>(in.f0, in.f0, in.f1));
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingProcessingTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example 19
Source Project: flink   Source File: PlanRightUnwrappingCoGroupOperator.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void coGroup(
		Iterable<I1> records1,
		Iterable<Tuple2<K, I2>> records2,
		Collector<OUT> out) throws Exception {

	iter2.set(records2.iterator());
	this.wrappedFunction.coGroup(records1, iter2, out);
}
 
Example 20
Source Project: flink-simple-tutorial   Source File: BroadcastExample.java    License: Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {


        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(4);

        // 创建需要广播的 数据集 (name, age)
        Tuple2<String, Integer> john = new Tuple2<>("john", 23);
        Tuple2<String, Integer> tom = new Tuple2<>("tom", 24);
        Tuple2<String, Integer> shiny = new Tuple2<>("shiny", 22);
        DataSource<Tuple2<String, Integer>> broadcastData = env.fromElements(john, tom, shiny);

        // 新建一个dataset -> d1, 设置并行度为4
        // 此时 d1 是无法访问 broadcastData 的数据的, 因为两个dataset可能不在一个节点或者slot中, 所以 flink 是不允许去访问的
        DataSet<String> d1 = env.fromElements("john", "tom", "shiny").setParallelism(4);

        // 使用 RichMapFunction, 在open() 方法中拿到广播变量
        d1.map(new RichMapFunction<String, String>() {
            List<Tuple2<String, Integer>> bc;
            HashMap<String, Integer> map = new HashMap<>();
            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                this.bc = getRuntimeContext().getBroadcastVariable("broadcastData");
                for (Tuple2<String, Integer> tp : bc) {
                    this.map.put(tp.f0, tp.f1);
                }
            }
            @Override
            public String map(String s) throws Exception {
                Integer age = this.map.get(s);
                return s + "->" + age;
            }
        }).withBroadcastSet(broadcastData, "broadcastData").print();

//        env.execute("Broadcast Example");
    }
 
Example 21
Source Project: flink   Source File: DataSetUtilsITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}
 
Example 22
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example 23
Source Project: flink   Source File: WindowTranslationTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testProcessWithCustomTrigger() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.trigger(CountTrigger.of(1))
			.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(in);
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example 24
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
    // 将 buffer 中的数据保存到状态中,来保证 Exactly Once
    localPvStatListState.clear();
    for (Map.Entry<String, Long> appIdPv : localPvStat.entrySet()) {
        localPvStatListState.add(Tuple2.of(appIdPv.getKey(), appIdPv.getValue()));
        log.info("snapshot   subtask: {}    appId: {}   pv: {}", subtaskIndex, appIdPv.getKey(), appIdPv.getValue());
    }
}
 
Example 25
Source Project: flink   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSet<Tuple2<Integer, String>> ds = getTupleDataSet(env);
	ds.partitionByRange(0).withOrders(Order.ASCENDING);
}
 
Example 26
Source Project: flink   Source File: EdgeTargetDegreesTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testWithEmptyGraphWithoutVertices() throws Exception {
	DataSet<Edge<LongValue, Tuple2<NullValue, Degrees>>> targetDegrees = emptyGraphWithoutVertices
		.run(new EdgeTargetDegrees<>());

	assertEquals(0, targetDegrees.collect().size());
}
 
Example 27
Source Project: flink   Source File: AdditionalOperatorsTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCrossWithSmall() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);
	DataSet<Long> set2 = env.generateSequence(0,1);

	set1.crossWithTiny(set2).name("Cross")
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

	try {
		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileWithStats(plan);
		OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan);
		
		DualInputPlanNode crossPlanNode = resolver.getNode("Cross");
		Channel in1 = crossPlanNode.getInput1();
		Channel in2 = crossPlanNode.getInput2();
		
		assertEquals(ShipStrategyType.FORWARD, in1.getShipStrategy());
		assertEquals(ShipStrategyType.BROADCAST, in2.getShipStrategy());
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The Flink optimizer is unable to compile this plan correctly.");
	}
}
 
Example 28
public void run() {
	try {
		for (int i = 0; i < NO_OF_FILES; i++) {
			Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
			long modTime;
			do {

				// give it some time so that the files have
				// different modification timestamps.
				Thread.sleep(50);

				tmpFile = fillWithData(localFsURI, "file", i, "This is test line.");

				modTime = localFs.getFileStatus(tmpFile.f0).getModificationTime();
				if (modTime <= lastCreatedModTime) {
					// delete the last created file to recreate it with a different timestamp
					localFs.delete(tmpFile.f0, false);
				}
			} while (modTime <= lastCreatedModTime);
			lastCreatedModTime = modTime;

			// rename the file
			org.apache.hadoop.fs.Path file =
				new org.apache.hadoop.fs.Path(localFsURI + "/file" + i);
			localFs.rename(tmpFile.f0, file);
			Assert.assertTrue(localFs.exists(file));

			filesCreated.add(file);
			fileContents.put(i, tmpFile.f1);
		}
	} catch (IOException | InterruptedException e) {
		e.printStackTrace();
	}
}
 
Example 29
Source Project: Flink-CEPplus   Source File: TestUtils.java    License: Apache License 2.0 5 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer();

	final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> {
		stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8));
		stream.write('\n');
	};

	return createCustomRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			bucketer,
			encoder,
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example 30
Source Project: flink   Source File: PartitionOperatorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionByComplexKeyWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0, 1).withOrders(Order.ASCENDING, Order.DESCENDING);
}