org.apache.flink.api.java.tuple.Tuple2 Java Examples

The following examples show how to use org.apache.flink.api.java.tuple.Tuple2. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GroupReduceITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testIntBasedDefinitionOnGroupSortForPartialNestedTuple() throws Exception {
	/*
	 * Test int-based definition on group sort, for (partial) nested Tuple ASC
	 */
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);

	DataSet<Tuple2<Tuple2<Integer, Integer>, String>> ds = CollectionDataSets.getGroupSortedNestedTupleDataSet(env);
	// f0.f0 is first integer
	DataSet<String> reduceDs = ds.groupBy("f1")
			.sortGroup("f0.f0", Order.ASCENDING)
			.sortGroup("f0.f1", Order.ASCENDING)
			.reduceGroup(new NestedTupleReducer());
	List<String> result = reduceDs.collect();

	String expected = "a--(1,2)-(1,3)-(2,1)-\n" +
			"b--(2,2)-\n" +
			"c--(3,3)-(3,6)-(4,9)-\n";

	compareResultAsText(result, expected);
}
 
Example #2
Source File: WindowTranslationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testFoldEventTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple3<String, String, Integer>> window1 = source
			.keyBy(0)
			.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.fold(new Tuple3<>("", "", 1), new DummyFolder());

	OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof FoldingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #3
Source File: ScatterGatherIteration.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
public void coGroup(Iterable<Edge<K, EV>> edges, Iterable<Vertex<K, Tuple3<VV, LongValue, LongValue>>> state,
					Collector<Tuple2<K, Message>> out) throws Exception {

	final Iterator<Vertex<K, Tuple3<VV, LongValue, LongValue>>> stateIter = state.iterator();

	if (stateIter.hasNext()) {
		Vertex<K, Tuple3<VV, LongValue, LongValue>> vertexWithDegrees = stateIter.next();

		nextVertex.f0 = vertexWithDegrees.f0;
		nextVertex.f1 = vertexWithDegrees.f1.f0;

		scatterFunction.setInDegree(vertexWithDegrees.f1.f1.getValue());
		scatterFunction.setOutDegree(vertexWithDegrees.f1.f2.getValue());

		scatterFunction.set(edges.iterator(), out, vertexWithDegrees.getId());
		scatterFunction.sendMessages(nextVertex);
	}
}
 
Example #4
Source File: SkipListSerializerTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testSkipListKeySerializer(int delta) throws IOException {
	String key = "key-abcdedg" + delta;
	String namespace = "namespace-dfsfdafd" + delta;

	byte[] skipListKey = skipListKeySerializer.serialize(key, namespace);
	int offset = 10;
	byte[] data = new byte[10 + skipListKey.length];
	System.arraycopy(skipListKey, 0, data, offset, skipListKey.length);
	MemorySegment skipListKeySegment = MemorySegmentFactory.wrap(data);
	assertEquals(key, skipListKeySerializer.deserializeKey(skipListKeySegment, offset, skipListKey.length));
	assertEquals(namespace, skipListKeySerializer.deserializeNamespace(skipListKeySegment, offset, skipListKey.length));

	Tuple2<byte[], byte[]> serializedKeyAndNamespace =
		skipListKeySerializer.getSerializedKeyAndNamespace(skipListKeySegment, offset);
	assertEquals(key, deserialize(keySerializer, serializedKeyAndNamespace.f0));
	assertEquals(namespace, deserialize(namespaceSerializer, serializedKeyAndNamespace.f1));

	byte[] serializedNamespace = skipListKeySerializer.serializeNamespace(namespace);
	assertEquals(namespace, deserialize(namespaceSerializer, serializedNamespace));
}
 
Example #5
Source File: CollectionDataSets.java    From flink with Apache License 2.0 6 votes vote down vote up
public static DataSet<Tuple3<Tuple2<Integer, Integer>, String, Integer>> getGroupSortedNestedTupleDataSet2(ExecutionEnvironment env) {

		List<Tuple3<Tuple2<Integer, Integer>, String, Integer>> data = new ArrayList<>();
		data.add(new Tuple3<>(new Tuple2<>(1, 3), "a", 2));
		data.add(new Tuple3<>(new Tuple2<>(1, 2), "a", 1));
		data.add(new Tuple3<>(new Tuple2<>(2, 1), "a", 3));
		data.add(new Tuple3<>(new Tuple2<>(2, 2), "b", 4));
		data.add(new Tuple3<>(new Tuple2<>(3, 3), "c", 5));
		data.add(new Tuple3<>(new Tuple2<>(3, 6), "c", 6));
		data.add(new Tuple3<>(new Tuple2<>(4, 9), "c", 7));

		TupleTypeInfo<Tuple3<Tuple2<Integer, Integer>, String, Integer>> type = new TupleTypeInfo<>(
				new TupleTypeInfo<Tuple2<Integer, Integer>>(BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO),
				BasicTypeInfo.STRING_TYPE_INFO,
				BasicTypeInfo.INT_TYPE_INFO
		);

		return env.fromCollection(data, type);
	}
 
Example #6
Source File: ExplainingTable.java    From flink-learning with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    StreamTableEnvironment tEnv = StreamTableEnvironment.create(env);

    DataStream<Tuple2<Integer, String>> stream1 = env.fromElements(new Tuple2<>(1, "hello"));
    DataStream<Tuple2<Integer, String>> stream2 = env.fromElements(new Tuple2<>(1, "hello"));

    Table table1 = tEnv.fromDataStream(stream1, "count, word");
    Table table2 = tEnv.fromDataStream(stream2, "count, word");
    Table table = table1
            .where("LIKE(word, 'F%')")
            .unionAll(table2);

    String explanation = tEnv.explain(table);
    System.out.println(explanation);
}
 
Example #7
Source File: AggregateITCase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Test
public void testGroupedAggregateOfMutableValueTypes() throws Exception {
	/*
	 * Grouped Aggregate of mutable value types
	 */

	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple3<IntValue, LongValue, StringValue>> ds = ValueCollectionDataSets.get3TupleDataSet(env);
	DataSet<Tuple2<IntValue, LongValue>> aggregateDs = ds.groupBy(1)
			.aggregate(Aggregations.SUM, 0)
			.project(1, 0);

	List<Tuple2<IntValue, LongValue>> result = aggregateDs.collect();

	String expected = "1,1\n" +
			"2,5\n" +
			"3,15\n" +
			"4,34\n" +
			"5,65\n" +
			"6,111\n";

	compareResultAsTuples(result, expected);
}
 
Example #8
Source File: AbstractOuterJoinTaskTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private void testSortBothOuterJoinTask(int keyCnt1, int valCnt1, int keyCnt2, int valCnt2) throws Exception {
	setOutput(this.outList, this.serializer);
	addDriverComparator(this.comparator1);
	addDriverComparator(this.comparator2);
	getTaskConfig().setDriverPairComparator(new RuntimePairComparatorFactory());
	getTaskConfig().setDriverStrategy(this.getSortDriverStrategy());
	getTaskConfig().setRelativeMemoryDriver(this.bnljn_frac);
	setNumFileHandlesForSort(4);
	
	final AbstractOuterJoinDriver<Tuple2<Integer, Integer>, Tuple2<Integer, Integer>, Tuple2<Integer, Integer>> testTask = getOuterJoinDriver();
	
	addInputSorted(new UniformIntTupleGenerator(keyCnt1, valCnt1, false), this.serializer, this.comparator1.duplicate());
	addInputSorted(new UniformIntTupleGenerator(keyCnt2, valCnt2, false), this.serializer, this.comparator2.duplicate());
	testDriver(testTask, MockJoinStub.class);
	
	final int expCnt = calculateExpectedCount(keyCnt1, valCnt1, keyCnt2, valCnt2);
	
	Assert.assertTrue("Result set size was " + this.outList.size() + ". Expected was " + expCnt, this.outList.size() == expCnt);
	
	this.outList.clear();
}
 
Example #9
Source File: RocksFullSnapshotStrategy.java    From flink with Apache License 2.0 6 votes vote down vote up
private void writeSnapshotToOutputStream(
	@Nonnull CheckpointStreamWithResultProvider checkpointStreamWithResultProvider,
	@Nonnull KeyGroupRangeOffsets keyGroupRangeOffsets) throws IOException, InterruptedException {

	final List<Tuple2<RocksIteratorWrapper, Integer>> kvStateIterators =
		new ArrayList<>(metaData.size());
	final DataOutputView outputView =
		new DataOutputViewStreamWrapper(checkpointStreamWithResultProvider.getCheckpointOutputStream());
	final ReadOptions readOptions = new ReadOptions();
	try {
		readOptions.setSnapshot(snapshot);
		writeKVStateMetaData(kvStateIterators, readOptions, outputView);
		writeKVStateData(kvStateIterators, checkpointStreamWithResultProvider, keyGroupRangeOffsets);
	} finally {

		for (Tuple2<RocksIteratorWrapper, Integer> kvStateIterator : kvStateIterators) {
			IOUtils.closeQuietly(kvStateIterator.f0);
		}

		IOUtils.closeQuietly(readOptions);
	}
}
 
Example #10
Source File: CassandraTupleWriteAheadSinkExample.java    From flink with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(1);
	env.enableCheckpointing(1000);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 1000));
	env.setStateBackend(new FsStateBackend("file:///" + System.getProperty("java.io.tmpdir") + "/flink/backend"));

	CassandraSink<Tuple2<String, Integer>> sink = CassandraSink.addSink(env.addSource(new MySource()))
		.setQuery("INSERT INTO example.values (id, counter) values (?, ?);")
		.enableWriteAheadLog()
		.setClusterBuilder(new ClusterBuilder() {

			private static final long serialVersionUID = 2793938419775311824L;

			@Override
			public Cluster buildCluster(Cluster.Builder builder) {
				return builder.addContactPoint("127.0.0.1").build();
			}
		})
		.build();

	sink.name("Cassandra Sink").disableChaining().setParallelism(1).uid("hello");

	env.execute();
}
 
Example #11
Source File: ReusingSortMergeCoGroupIteratorITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		int key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example #12
Source File: WindowTranslationTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceProcessingTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(SlidingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS)))
			.reduce(new DummyReducer());

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingProcessingTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #13
Source File: RestServerEndpointITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testEndpointsMustBeUnique() throws Exception {
	final RestServerEndpointConfiguration serverConfig = RestServerEndpointConfiguration.fromConfiguration(config);

	final List<Tuple2<RestHandlerSpecification, ChannelInboundHandler>> handlers = Arrays.asList(
		Tuple2.of(new TestHeaders(), testHandler),
		Tuple2.of(new TestHeaders(), testUploadHandler)
	);

	assertThrows("REST handler registration",
		FlinkRuntimeException.class,
		() -> {
			try (TestRestServerEndpoint restServerEndpoint =  new TestRestServerEndpoint(serverConfig, handlers)) {
				restServerEndpoint.start();
				return null;
			}
		});
}
 
Example #14
Source File: SiddhiStream.java    From flink-siddhi with Apache License 2.0 6 votes vote down vote up
/**
 * Siddhi Continuous Query Language (CQL)
 *
 * @return ExecutionSiddhiStream context
 */
public ExecutionSiddhiStream cql(DataStream<ControlEvent> controlStream) {
    DataStream<Tuple2<StreamRoute, Object>> unionStream = controlStream
        .map(new NamedControlStream(ControlEvent.DEFAULT_INTERNAL_CONTROL_STREAM))
        .broadcast()
        .union(this.toDataStream())
        .transform("add route transform",
            SiddhiTypeFactory.getStreamTupleTypeInformation(TypeInformation.of(Object.class)),
            new AddRouteOperator(getCepEnvironment().getDataStreamSchemas()));

    DataStream<Tuple2<StreamRoute, Object>> partitionedStream = new DataStream<>(
        unionStream.getExecutionEnvironment(),
        new PartitionTransformation<>(unionStream.getTransformation(),
        new DynamicPartitioner()));
    return new ExecutionSiddhiStream(partitionedStream, null, getCepEnvironment());
}
 
Example #15
Source File: SimpleTupleJoinFunction.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void join(Tuple2<String, String> first, Tuple2<String, Integer> second, Collector<Tuple4<String, String, String, Object>> out) throws Exception {
	if (first == null) {
		out.collect(new Tuple4<String, String, String, Object>(null, null, second.f0, second.f1));
	} else if (second == null) {
		out.collect(new Tuple4<String, String, String, Object>(first.f0, first.f1, null, null));
	} else {
		out.collect(new Tuple4<String, String, String, Object>(first.f0, first.f1, second.f0, second.f1));
	}
}
 
Example #16
Source File: ReduceOnEdgesMethodsITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public void iterateEdges(Iterable<Tuple2<Long, Edge<Long, Long>>> edges,
		Collector<Tuple2<Long, Long>> out) throws Exception {

	for (Tuple2<Long, Edge<Long, Long>> edge : edges) {
		if (edge.f0 != 5) {
			out.collect(new Tuple2<>(edge.f0, edge.f1.getTarget()));
		}
	}
}
 
Example #17
Source File: HITS.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
public Tuple2<T, T> map(Edge<T, ET> value)
		throws Exception {
	output.f0 = value.f0;
	output.f1 = value.f1;
	return output;
}
 
Example #18
Source File: AllWindowTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testReduceWithProcessWindowFunctionProcessingTime() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple3<String, String, Integer>> window = source
			.windowAll(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.reduce(new DummyReducer(), new ProcessAllWindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple3<String, String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(new Tuple3<>(in.f0, in.f0, in.f1));
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform =
			(OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingProcessingTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor);

	processElementAndEnsureOutput(operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #19
Source File: PlanRightUnwrappingCoGroupOperator.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void coGroup(
		Iterable<I1> records1,
		Iterable<Tuple2<K, I2>> records2,
		Collector<OUT> out) throws Exception {

	iter2.set(records2.iterator());
	this.wrappedFunction.coGroup(records1, iter2, out);
}
 
Example #20
Source File: BroadcastExample.java    From flink-simple-tutorial with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {


        ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
        env.setParallelism(4);

        // 创建需要广播的 数据集 (name, age)
        Tuple2<String, Integer> john = new Tuple2<>("john", 23);
        Tuple2<String, Integer> tom = new Tuple2<>("tom", 24);
        Tuple2<String, Integer> shiny = new Tuple2<>("shiny", 22);
        DataSource<Tuple2<String, Integer>> broadcastData = env.fromElements(john, tom, shiny);

        // 新建一个dataset -> d1, 设置并行度为4
        // 此时 d1 是无法访问 broadcastData 的数据的, 因为两个dataset可能不在一个节点或者slot中, 所以 flink 是不允许去访问的
        DataSet<String> d1 = env.fromElements("john", "tom", "shiny").setParallelism(4);

        // 使用 RichMapFunction, 在open() 方法中拿到广播变量
        d1.map(new RichMapFunction<String, String>() {
            List<Tuple2<String, Integer>> bc;
            HashMap<String, Integer> map = new HashMap<>();
            @Override
            public void open(Configuration parameters) throws Exception {
                super.open(parameters);
                this.bc = getRuntimeContext().getBroadcastVariable("broadcastData");
                for (Tuple2<String, Integer> tp : bc) {
                    this.map.put(tp.f0, tp.f1);
                }
            }
            @Override
            public String map(String s) throws Exception {
                Integer age = this.map.get(s);
                return s + "->" + age;
            }
        }).withBroadcastSet(broadcastData, "broadcastData").print();

//        env.execute("Broadcast Example");
    }
 
Example #21
Source File: DataSetUtilsITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCountElementsPerPartition() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	long expectedSize = 100L;
	DataSet<Long> numbers = env.generateSequence(0, expectedSize - 1);

	DataSet<Tuple2<Integer, Long>> ds = DataSetUtils.countElementsPerPartition(numbers);

	Assert.assertEquals(env.getParallelism(), ds.count());
	Assert.assertEquals(expectedSize, ds.sum(1).collect().get(0).f1.longValue());
}
 
Example #22
Source File: ConnectedComponentsWithObjectMapITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected void testProgram() throws Exception {
	// set up execution environment
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	// read vertex and edge data
	DataSet<Tuple1<Long>> vertices = env.readCsvFile(verticesPath).types(Long.class);

	DataSet<Tuple2<Long, Long>> edges = env.readCsvFile(edgesPath).fieldDelimiter(" ").types(Long.class, Long.class)
											.flatMap(new UndirectEdge());

	// assign the initial components (equal to the vertex id)
	DataSet<Tuple2<Long, Long>> verticesWithInitialId = vertices.map(new ConnectedComponentsITCase.DuplicateValue<Long>());

	// open a delta iteration
	DeltaIteration<Tuple2<Long, Long>, Tuple2<Long, Long>> iteration =
			verticesWithInitialId.iterateDelta(verticesWithInitialId, 100, 0);
	iteration.setSolutionSetUnManaged(true);

	// apply the step logic: join with the edges, select the minimum neighbor, update if the component of the candidate is smaller
	DataSet<Tuple2<Long, Long>> changes = iteration.getWorkset().join(edges).where(0).equalTo(0).with(new NeighborWithComponentIDJoin())
			.groupBy(0).aggregate(Aggregations.MIN, 1)
			.join(iteration.getSolutionSet()).where(0).equalTo(0)
			.with(new ComponentIdFilter());

	// close the delta iteration (delta and new workset are identical)
	DataSet<Tuple2<Long, Long>> result = iteration.closeWith(changes, changes);

	result.writeAsCsv(resultPath, "\n", " ");

	// execute program
	env.execute("Connected Components Example");
}
 
Example #23
Source File: WindowTranslationTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
@SuppressWarnings("rawtypes")
public void testProcessWithCustomTrigger() throws Exception {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);

	DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2));

	DataStream<Tuple2<String, Integer>> window1 = source
			.keyBy(new TupleKeySelector())
			.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS)))
			.trigger(CountTrigger.of(1))
			.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
				private static final long serialVersionUID = 1L;

				@Override
				public void process(String key,
						Context ctx,
						Iterable<Tuple2<String, Integer>> values,
						Collector<Tuple2<String, Integer>> out) throws Exception {
					for (Tuple2<String, Integer> in : values) {
						out.collect(in);
					}
				}
			});

	OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation();
	OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator();
	Assert.assertTrue(operator instanceof WindowOperator);
	WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator;
	Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger);
	Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows);
	Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor);

	processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1));
}
 
Example #24
Source File: PvStatLocalKeyByExactlyOnce.java    From flink-learning with Apache License 2.0 5 votes vote down vote up
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
    // 将 buffer 中的数据保存到状态中,来保证 Exactly Once
    localPvStatListState.clear();
    for (Map.Entry<String, Long> appIdPv : localPvStat.entrySet()) {
        localPvStatListState.add(Tuple2.of(appIdPv.getKey(), appIdPv.getValue()));
        log.info("snapshot   subtask: {}    appId: {}   pv: {}", subtaskIndex, appIdPv.getKey(), appIdPv.getValue());
    }
}
 
Example #25
Source File: PartitionOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSet<Tuple2<Integer, String>> ds = getTupleDataSet(env);
	ds.partitionByRange(0).withOrders(Order.ASCENDING);
}
 
Example #26
Source File: EdgeTargetDegreesTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testWithEmptyGraphWithoutVertices() throws Exception {
	DataSet<Edge<LongValue, Tuple2<NullValue, Degrees>>> targetDegrees = emptyGraphWithoutVertices
		.run(new EdgeTargetDegrees<>());

	assertEquals(0, targetDegrees.collect().size());
}
 
Example #27
Source File: AdditionalOperatorsTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testCrossWithSmall() {
	// construct the plan
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(DEFAULT_PARALLELISM);
	DataSet<Long> set1 = env.generateSequence(0,1);
	DataSet<Long> set2 = env.generateSequence(0,1);

	set1.crossWithTiny(set2).name("Cross")
			.output(new DiscardingOutputFormat<Tuple2<Long, Long>>());

	try {
		Plan plan = env.createProgramPlan();
		OptimizedPlan oPlan = compileWithStats(plan);
		OptimizerPlanNodeResolver resolver = new OptimizerPlanNodeResolver(oPlan);
		
		DualInputPlanNode crossPlanNode = resolver.getNode("Cross");
		Channel in1 = crossPlanNode.getInput1();
		Channel in2 = crossPlanNode.getInput2();
		
		assertEquals(ShipStrategyType.FORWARD, in1.getShipStrategy());
		assertEquals(ShipStrategyType.BROADCAST, in2.getShipStrategy());
	} catch(CompilerException ce) {
		ce.printStackTrace();
		fail("The Flink optimizer is unable to compile this plan correctly.");
	}
}
 
Example #28
Source File: ContinuousFileProcessingCheckpointITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
public void run() {
	try {
		for (int i = 0; i < NO_OF_FILES; i++) {
			Tuple2<org.apache.hadoop.fs.Path, String> tmpFile;
			long modTime;
			do {

				// give it some time so that the files have
				// different modification timestamps.
				Thread.sleep(50);

				tmpFile = fillWithData(localFsURI, "file", i, "This is test line.");

				modTime = localFs.getFileStatus(tmpFile.f0).getModificationTime();
				if (modTime <= lastCreatedModTime) {
					// delete the last created file to recreate it with a different timestamp
					localFs.delete(tmpFile.f0, false);
				}
			} while (modTime <= lastCreatedModTime);
			lastCreatedModTime = modTime;

			// rename the file
			org.apache.hadoop.fs.Path file =
				new org.apache.hadoop.fs.Path(localFsURI + "/file" + i);
			localFs.rename(tmpFile.f0, file);
			Assert.assertTrue(localFs.exists(file));

			filesCreated.add(file);
			fileContents.put(i, tmpFile.f1);
		}
	} catch (IOException | InterruptedException e) {
		e.printStackTrace();
	}
}
 
Example #29
Source File: TestUtils.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
static OneInputStreamOperatorTestHarness<Tuple2<String, Integer>, Object> createRescalingTestSink(
		File outDir,
		int totalParallelism,
		int taskIdx,
		long inactivityInterval,
		long partMaxSize) throws Exception {

	final RollingPolicy<Tuple2<String, Integer>, String> rollingPolicy =
			DefaultRollingPolicy
					.create()
					.withMaxPartSize(partMaxSize)
					.withRolloverInterval(inactivityInterval)
					.withInactivityInterval(inactivityInterval)
					.build();

	final BucketAssigner<Tuple2<String, Integer>, String> bucketer = new TupleToStringBucketer();

	final Encoder<Tuple2<String, Integer>> encoder = (element, stream) -> {
		stream.write((element.f0 + '@' + element.f1).getBytes(StandardCharsets.UTF_8));
		stream.write('\n');
	};

	return createCustomRescalingTestSink(
			outDir,
			totalParallelism,
			taskIdx,
			10L,
			bucketer,
			encoder,
			rollingPolicy,
			new DefaultBucketFactoryImpl<>());
}
 
Example #30
Source File: PartitionOperatorTest.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testRangePartitionByComplexKeyWithOrders() throws Exception {
	final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	final DataSource<Tuple2<Tuple2<Integer, Integer>, Integer>> ds = env.fromElements(
		new Tuple2<>(new Tuple2<>(1, 1), 1),
		new Tuple2<>(new Tuple2<>(2, 2), 2),
		new Tuple2<>(new Tuple2<>(2, 2), 2)
	);
	ds.partitionByRange(0, 1).withOrders(Order.ASCENDING, Order.DESCENDING);
}