Java Code Examples for org.apache.flink.api.common.functions.ReduceFunction

The following examples show how to use org.apache.flink.api.common.functions.ReduceFunction. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: HBaseConnectorITCase.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testTableInputFormat() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<Tuple1<Integer>> result = env
		.createInput(new InputFormatForTestTable())
		.reduce(new ReduceFunction<Tuple1<Integer>>(){

			@Override
			public Tuple1<Integer> reduce(Tuple1<Integer> v1, Tuple1<Integer> v2) throws Exception {
				return Tuple1.of(v1.f0 + v2.f0);
			}
		});

	List<Tuple1<Integer>> resultSet = result.collect();

	assertEquals(1, resultSet.size());
	assertEquals(360, (int) resultSet.get(0).f0);
}
 
Example 2
@Override
protected void testProgram() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
	env.setParallelism(4);

	DataSet<String> initialInput = env.fromElements("1", "1", "1", "1", "1", "1", "1", "1");

	IterativeDataSet<String> iteration = initialInput.iterate(5).name("Loop");

	DataSet<String> sumReduce = iteration.reduce(new ReduceFunction<String>(){
		@Override
		public String reduce(String value1, String value2) throws Exception {
			return value1;
		}
	}).name("Compute sum (Reduce)");

	List<String> result = iteration.closeWith(sumReduce).collect();

	compareResultAsText(result, EXPECTED);
}
 
Example 3
Source Project: flink   Source File: DistinctOperator.java    License: Apache License 2.0 6 votes vote down vote up
private static <IN, K> org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> translateSelectorFunctionDistinct(
		SelectorFunctionKeys<IN, ?> rawKeys,
		ReduceFunction<IN> function,
		TypeInformation<IN> outputType,
		String name,
		Operator<IN> input,
		int parallelism,
		CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys;

	TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<IN, K> reducer =
			new PlanUnwrappingReduceOperator<>(function, keys, name, outputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setCombineHint(hint);
	reducer.setParallelism(parallelism);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}
 
Example 4
Source Project: Alink   Source File: DocHashCountVectorizerTrainBatchOp.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public DocHashCountVectorizerTrainBatchOp linkFrom(BatchOperator<?>... inputs) {
    BatchOperator<?> in = checkAndGetFirst(inputs);
    int index = TableUtil.findColIndexWithAssertAndHint(in.getColNames(), this.getSelectedCol());

    DataSet<Row> out = in
        .getDataSet()
        .mapPartition(new HashingTF(index, this.getNumFeatures()))
        .reduce(new ReduceFunction<Tuple2<Long, HashMap<Integer, Double>>>() {
            @Override
            public Tuple2<Long, HashMap<Integer, Double>> reduce(Tuple2<Long, HashMap<Integer, Double>> map1,
                                                                 Tuple2<Long, HashMap<Integer, Double>> map2) {
                map2.f1.forEach((k2, v1) -> map1.f1.merge(k2, v1, Double::sum));
                map1.f0 += map2.f0;
                return map1;
            }
        }).flatMap(new BuildModel(getParams()));
    this.setOutput(out, new DocHashCountVectorizerModelDataConverter().getModelSchema());

    return this;
}
 
Example 5
Source Project: flink   Source File: KeyMap.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Inserts or aggregates a value into the hash map. If the hash map does not yet contain the key,
 * this method inserts the value. If the table already contains the key (and a value) this
 * method will use the given ReduceFunction function to combine the existing value and the
 * given value to a new value, and store that value for the key.
 *
 * @param key The key to map the value.
 * @param value The new value to insert, or aggregate with the existing value.
 * @param aggregator The aggregator to use if a value is already contained.
 *
 * @return The value in the map after this operation: Either the given value, or the aggregated value.
 *
 * @throws java.lang.NullPointerException Thrown, if the key is null.
 * @throws Exception The method forwards exceptions from the aggregation function.
 */
public final V putOrAggregate(K key, V value, ReduceFunction<V> aggregator) throws Exception {
	final int hash = hash(key);
	final int slot = indexOf(hash);

	// search the chain from the slot
	for (Entry<K, V> entry = table[slot]; entry != null; entry = entry.next) {
		if (entry.hashCode == hash && entry.key.equals(key)) {
			// found match
			entry.value = aggregator.reduce(entry.value, value);
			return entry.value;
		}
	}

	// no match, insert a new value
	insertNewEntry(hash, key, value, slot);
	// return the original value
	return value;
}
 
Example 6
Source Project: flink   Source File: DistinctOperator.java    License: Apache License 2.0 6 votes vote down vote up
private static <IN, K> org.apache.flink.api.common.operators.SingleInputOperator<?, IN, ?> translateSelectorFunctionDistinct(
		SelectorFunctionKeys<IN, ?> rawKeys,
		ReduceFunction<IN> function,
		TypeInformation<IN> outputType,
		String name,
		Operator<IN> input,
		int parallelism,
		CombineHint hint) {
	@SuppressWarnings("unchecked")
	final SelectorFunctionKeys<IN, K> keys = (SelectorFunctionKeys<IN, K>) rawKeys;

	TypeInformation<Tuple2<K, IN>> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
	Operator<Tuple2<K, IN>> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);

	PlanUnwrappingReduceOperator<IN, K> reducer =
			new PlanUnwrappingReduceOperator<>(function, keys, name, outputType, typeInfoWithKey);
	reducer.setInput(keyedInput);
	reducer.setCombineHint(hint);
	reducer.setParallelism(parallelism);

	return KeyFunctions.appendKeyRemover(reducer, keys);
}
 
Example 7
Source Project: Alink   Source File: DataSetUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Count number of records in the dataset.
 *
 * @return a dataset of one record, recording the number of records of [[dataset]]
 */
public static <T> DataSet<Long> count(DataSet<T> dataSet) {
    return dataSet
        .mapPartition(new MapPartitionFunction<T, Long>() {
            @Override
            public void mapPartition(Iterable<T> values, Collector<Long> out) throws Exception {
                long cnt = 0L;
                for (T v : values) {
                    cnt++;
                }
                out.collect(cnt);
            }
        })
        .name("count_dataset")
        .returns(Types.LONG)
        .reduce(new ReduceFunction<Long>() {
            @Override
            public Long reduce(Long value1, Long value2) throws Exception {
                return value1 + value2;
            }
        });
}
 
Example 8
Source Project: Flink-CEPplus   Source File: StateDescriptorPassingTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceWindowAllState() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);

	DataStream<File> src = env.fromElements(new File("/"));

	SingleOutputStreamOperator<?> result = src
			.timeWindowAll(Time.milliseconds(1000))
			.reduce(new ReduceFunction<File>() {

				@Override
				public File reduce(File value1, File value2) {
					return null;
				}
			});

	validateStateDescriptorConfigured(result);
}
 
Example 9
Source Project: Flink-CEPplus   Source File: AllReduceDriverTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testAllReduceDriverImmutableEmpty() {
	try {
		TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
				new TestTaskContext<ReduceFunction<Tuple2<String,Integer>>, Tuple2<String,Integer>>();
		
		List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
		TypeInformation<Tuple2<String, Integer>> typeInfo = TypeExtractor.getForObject(data.get(0));
		MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
		context.setDriverStrategy(DriverStrategy.ALL_REDUCE);
		
		context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
		context.setCollector(new DiscardingOutputCollector<Tuple2<String, Integer>>());
		
		AllReduceDriver<Tuple2<String, Integer>> driver = new AllReduceDriver<Tuple2<String,Integer>>();
		driver.setup(context);
		driver.prepare();
		driver.run();
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 10
Source Project: flink   Source File: StateDescriptorPassingTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testReduceWindowAllState() throws Exception {
	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class);

	DataStream<File> src = env.fromElements(new File("/"));

	SingleOutputStreamOperator<?> result = src
			.timeWindowAll(Time.milliseconds(1000))
			.reduce(new ReduceFunction<File>() {

				@Override
				public File reduce(File value1, File value2) {
					return null;
				}
			});

	validateStateDescriptorConfigured(result);
}
 
Example 11
Source Project: flink   Source File: ReduceDriverTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testReduceDriverImmutableEmpty() {
	try {
		TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
				new TestTaskContext<ReduceFunction<Tuple2<String,Integer>>, Tuple2<String,Integer>>();
		
		List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
		TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
		MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
		context.setDriverStrategy(DriverStrategy.SORTED_REDUCE);
		TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[]{0}, new boolean[] {true}, 0, new ExecutionConfig());
		
		GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String,Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
		
		context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
		context.setComparator1(comparator);
		context.setCollector(result);
		
		ReduceDriver<Tuple2<String, Integer>> driver = new ReduceDriver<Tuple2<String,Integer>>();
		driver.setup(context);
		driver.prepare();
		driver.run();
		
		Assert.assertEquals(0, result.getList().size());
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 12
Source Project: flink   Source File: TimestampITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testErrorOnEventTimeOverProcessingTime() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setParallelism(2);
	env.getConfig().disableSysoutLogging();
	env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime);

	DataStream<Tuple2<String, Integer>> source1 =
			env.fromElements(new Tuple2<>("a", 1), new Tuple2<>("b", 2));

	source1
			.keyBy(0)
			.window(TumblingEventTimeWindows.of(Time.seconds(5)))
			.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
				@Override
				public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2)  {
					return value1;
				}
			})
			.print();

	try {
		env.execute();
		fail("this should fail with an exception");
	} catch (Exception e) {
		// expected
	}
}
 
Example 13
Source Project: Flink-CEPplus   Source File: ReduceOperator.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This is the case for a reduce-all case (in contrast to the reduce-per-group case).
 *
 * @param input
 * @param function
 */
public ReduceOperator(DataSet<IN> input, ReduceFunction<IN> function, String defaultName) {
	super(input, input.getType());

	this.function = function;
	this.grouper = null;
	this.defaultName = defaultName;
	this.hint = null;
}
 
Example 14
Source Project: Flink-CEPplus   Source File: ReduceOperator.java    License: Apache License 2.0 5 votes vote down vote up
public ReduceOperator(Grouping<IN> input, ReduceFunction<IN> function, String defaultName) {
	super(input.getInputDataSet(), input.getInputDataSet().getType());

	this.function = function;
	this.grouper = input;
	this.defaultName = defaultName;
	this.hint = CombineHint.OPTIMIZER_CHOOSES;

	UdfOperatorUtils.analyzeSingleInputUdf(this, ReduceFunction.class, defaultName, function, grouper.keys);
}
 
Example 15
Source Project: flink   Source File: ReduceCombineDriverTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testImmutableEmpty() {
	try {
		TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
				new TestTaskContext<ReduceFunction<Tuple2<String,Integer>>, Tuple2<String,Integer>>(1024 * 1024);
		context.getTaskConfig().setRelativeMemoryDriver(0.5);
		
		List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
		Collections.shuffle(data);
		
		TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
		MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
		
		context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE);
		TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[]{0}, new boolean[] {true}, 0, new ExecutionConfig());
		
		GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String,Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
		
		context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
		context.setComparator1(comparator);
		context.setCollector(result);
		
		ReduceCombineDriver<Tuple2<String, Integer>> driver = new ReduceCombineDriver<Tuple2<String,Integer>>();
		driver.setup(context);
		driver.prepare();
		driver.run();
		
		Assert.assertEquals(0, result.getList().size());
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 16
Source Project: flink   Source File: HBaseConnectorITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTableInputFormat() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Integer>> result = env
		.createInput(new InputFormatForTestTable())
		.reduce((ReduceFunction<Tuple1<Integer>>) (v1, v2) -> Tuple1.of(v1.f0 + v2.f0));

	List<Tuple1<Integer>> resultSet = result.collect();

	assertEquals(1, resultSet.size());
	assertEquals(360, (int) resultSet.get(0).f0);
}
 
Example 17
Source Project: Flink-CEPplus   Source File: UdfAnalyzerExamplesTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testKMeansExamplesCentroidAccumulator() {
	compareAnalyzerResultWithAnnotationsSingleInputWithKeys(ReduceFunction.class, CentroidAccumulator.class,
			TypeInformation.of(new TypeHint<Tuple3<Integer, Point, Long>>(){}),
			TypeInformation.of(new TypeHint<Tuple3<Integer, Point, Long>>(){}),
			new String[] { "0" });
}
 
Example 18
Source Project: Flink-CEPplus   Source File: UdfAnalyzerExamplesTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testLogisticRegressionExamplesSumGradient() {
	compareAnalyzerResultWithAnnotationsSingleInputWithKeys(ReduceFunction.class, SumGradient.class,
			TypeInformation.of(new TypeHint<Tuple1<Double>>(){}),
			TypeInformation.of(new TypeHint<Tuple1<Double>>(){}),
			new String[] { "0" });
}
 
Example 19
Source Project: Flink-CEPplus   Source File: UdfAnalyzerTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testForwardWithBranchingReduce() {
	compareAnalyzerResultWithAnnotationsSingleInputWithKeys(ReduceFunction.class, Reduce2.class,
		TypeInformation.of(new TypeHint<MyPojo>(){}),
		TypeInformation.of(new TypeHint<MyPojo>(){}),
		new String[] { "field" });
}
 
Example 20
Source Project: flink   Source File: PojoSerializerUpgradeTest.java    License: Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("unchecked")
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
	pojoClass = getRuntimeContext().getUserCodeClassLoader().loadClass(POJO_NAME);

	fieldA = pojoClass.getDeclaredField("a");
	fieldA.setAccessible(true);

	if (hasBField) {
		fieldB = pojoClass.getDeclaredField("b");
		fieldB.setAccessible(true);
	}

	if (keyed) {
		keyedValueState = context.getKeyedStateStore().getState(
			new ValueStateDescriptor<>("keyedValueState", (Class<Object>) pojoClass));
		keyedListState = context.getKeyedStateStore().getListState(
			new ListStateDescriptor<>("keyedListState", (Class<Object>) pojoClass));

		ReduceFunction<Object> reduceFunction = new FirstValueReducer<>();
		keyedReducingState = context.getKeyedStateStore().getReducingState(
			new ReducingStateDescriptor<>("keyedReducingState", reduceFunction, (Class<Object>) pojoClass));
	} else {
		partitionableListState = context.getOperatorStateStore().getListState(
			new ListStateDescriptor<>("partitionableListState", (Class<Object>) pojoClass));
		unionListState = context.getOperatorStateStore().getUnionListState(
			new ListStateDescriptor<>("unionListState", (Class<Object>) pojoClass));
	}
}
 
Example 21
Source Project: Flink-CEPplus   Source File: ReducingStateDescriptorTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testHashCodeEquals() throws Exception {
	final String name = "testName";
	final ReduceFunction<String> reducer = (a, b) -> a;

	ReducingStateDescriptor<String> original = new ReducingStateDescriptor<>(name, reducer, String.class);
	ReducingStateDescriptor<String> same = new ReducingStateDescriptor<>(name, reducer, String.class);
	ReducingStateDescriptor<String> sameBySerializer = new ReducingStateDescriptor<>(name, reducer, StringSerializer.INSTANCE);

	// test that hashCode() works on state descriptors with initialized and uninitialized serializers
	assertEquals(original.hashCode(), same.hashCode());
	assertEquals(original.hashCode(), sameBySerializer.hashCode());

	assertEquals(original, same);
	assertEquals(original, sameBySerializer);

	// equality with a clone
	ReducingStateDescriptor<String> clone = CommonTestUtils.createCopySerializable(original);
	assertEquals(original, clone);

	// equality with an initialized
	clone.initializeSerializerUnlessSet(new ExecutionConfig());
	assertEquals(original, clone);

	original.initializeSerializerUnlessSet(new ExecutionConfig());
	assertEquals(original, same);
}
 
Example 22
/**
 * This test checks that reinterpreting a data stream to a keyed stream works as expected. This test consists of
 * two jobs. The first job materializes a keyBy into files, one files per partition. The second job opens the
 * files created by the first jobs as sources (doing the correct assignment of files to partitions) and
 * reinterprets the sources as keyed, because we know they have been partitioned in a keyBy from the first job.
 */
@Test
public void testReinterpretAsKeyedStream() throws Exception {

	final int maxParallelism = 8;
	final int numEventsPerInstance = 100;
	final int parallelism = 3;
	final int numTotalEvents = numEventsPerInstance * parallelism;
	final int numUniqueKeys = 100;

	final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
	env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime);
	env.setMaxParallelism(maxParallelism);
	env.setParallelism(parallelism);
	env.enableCheckpointing(100);
	env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0L));

	final List<File> partitionFiles = new ArrayList<>(parallelism);
	for (int i = 0; i < parallelism; ++i) {
		File partitionFile = temporaryFolder.newFile();
		partitionFiles.add(i, partitionFile);
	}

	env.addSource(new RandomTupleSource(numEventsPerInstance, numUniqueKeys))
		.keyBy(0)
		.addSink(new ToPartitionFileSink(partitionFiles));

	env.execute();

	DataStreamUtils.reinterpretAsKeyedStream(
		env.addSource(new FromPartitionFileSource(partitionFiles)),
		(KeySelector<Tuple2<Integer, Integer>, Integer>) value -> value.f0,
		TypeInformation.of(Integer.class))
		.timeWindow(Time.seconds(1)) // test that also timers and aggregated state work as expected
		.reduce((ReduceFunction<Tuple2<Integer, Integer>>) (value1, value2) ->
			new Tuple2<>(value1.f0, value1.f1 + value2.f1))
		.addSink(new ValidatingSink(numTotalEvents)).setParallelism(1);

	env.execute();
}
 
Example 23
Source Project: flink   Source File: RocksDBReducingState.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Creates a new {@code RocksDBReducingState}.
 *
 * @param columnFamily The RocksDB column family that this state is associated to.
 * @param namespaceSerializer The serializer for the namespace.
 * @param valueSerializer The serializer for the state.
 * @param defaultValue The default value for the state.
 * @param reduceFunction The reduce function used for reducing state.
 * @param backend The backend for which this state is bind to.
 */
private RocksDBReducingState(ColumnFamilyHandle columnFamily,
		TypeSerializer<N> namespaceSerializer,
		TypeSerializer<V> valueSerializer,
		V defaultValue,
		ReduceFunction<V> reduceFunction,
		RocksDBKeyedStateBackend<K> backend) {

	super(columnFamily, namespaceSerializer, valueSerializer, defaultValue, backend);
	this.reduceFunction = reduceFunction;
}
 
Example 24
Source Project: Flink-CEPplus   Source File: InPlaceMutableHashTable.java    License: Apache License 2.0 5 votes vote down vote up
public ReduceFacade(ReduceFunction<T> reducer, Collector<T> outputCollector, boolean objectReuseEnabled) {
	this.reducer = reducer;
	this.outputCollector = outputCollector;
	this.objectReuseEnabled = objectReuseEnabled;
	this.prober = getProber(buildSideComparator, new SameTypePairComparator<>(buildSideComparator));
	this.reuse = buildSideSerializer.createInstance();
}
 
Example 25
Source Project: flink   Source File: TimestampITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testErrorOnEventTimeWithoutTimestamps() {
	StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

	env.setParallelism(2);
			env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

	DataStream<Tuple2<String, Integer>> source1 =
			env.fromElements(new Tuple2<>("a", 1), new Tuple2<>("b", 2));

	source1
			.keyBy(0)
			.window(TumblingEventTimeWindows.of(Time.seconds(5)))
			.reduce(new ReduceFunction<Tuple2<String, Integer>>() {
				@Override
				public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2)  {
					return value1;
				}
			})
			.print();

	try {
		env.execute();
		fail("this should fail with an exception");
	} catch (Exception e) {
		// expected
	}
}
 
Example 26
Source Project: Flink-CEPplus   Source File: ReduceCombineDriverTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testImmutableEmpty() {
	try {
		TestTaskContext<ReduceFunction<Tuple2<String, Integer>>, Tuple2<String, Integer>> context =
				new TestTaskContext<ReduceFunction<Tuple2<String,Integer>>, Tuple2<String,Integer>>(1024 * 1024);
		context.getTaskConfig().setRelativeMemoryDriver(0.5);
		
		List<Tuple2<String, Integer>> data = DriverTestData.createReduceImmutableData();
		Collections.shuffle(data);
		
		TupleTypeInfo<Tuple2<String, Integer>> typeInfo = (TupleTypeInfo<Tuple2<String, Integer>>) TypeExtractor.getForObject(data.get(0));
		MutableObjectIterator<Tuple2<String, Integer>> input = EmptyMutableObjectIterator.get();
		
		context.setDriverStrategy(DriverStrategy.SORTED_PARTIAL_REDUCE);
		TypeComparator<Tuple2<String, Integer>> comparator = typeInfo.createComparator(new int[]{0}, new boolean[] {true}, 0, new ExecutionConfig());
		
		GatheringCollector<Tuple2<String, Integer>> result = new GatheringCollector<Tuple2<String,Integer>>(typeInfo.createSerializer(new ExecutionConfig()));
		
		context.setInput1(input, typeInfo.createSerializer(new ExecutionConfig()));
		context.setComparator1(comparator);
		context.setCollector(result);
		
		ReduceCombineDriver<Tuple2<String, Integer>> driver = new ReduceCombineDriver<Tuple2<String,Integer>>();
		driver.setup(context);
		driver.prepare();
		driver.run();
		
		Assert.assertEquals(0, result.getList().size());
	}
	catch (Exception e) {
		System.err.println(e.getMessage());
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 27
Source Project: flink   Source File: ChainedReduceCombineDriver.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void setup(AbstractInvokable parent) {
	this.parent = parent;
	running = true;

	strategy = config.getDriverStrategy();

	reducer = BatchTask.instantiateUserCode(config, userCodeClassLoader, ReduceFunction.class);
	FunctionUtils.setFunctionRuntimeContext(reducer, getUdfRuntimeContext());
}
 
Example 28
Source Project: flink   Source File: HBaseConnectorITCase.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTableInputFormat() throws Exception {
	ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

	DataSet<Tuple1<Integer>> result = env
		.createInput(new InputFormatForTestTable(getConf()))
		.reduce((ReduceFunction<Tuple1<Integer>>) (v1, v2) -> Tuple1.of(v1.f0 + v2.f0));

	List<Tuple1<Integer>> resultSet = result.collect();

	assertEquals(1, resultSet.size());
	assertEquals(360, (int) resultSet.get(0).f0);
}
 
Example 29
Source Project: flink   Source File: EventTimeWindowCheckpointingITCase.java    License: Apache License 2.0 4 votes vote down vote up
@Test
public void testPreAggregatedSlidingTimeWindow() {
	final int numElementsPerKey = numElementsPerKey();
	final int windowSize = windowSize();
	final int windowSlide = windowSlide();
	final int numKeys = numKeys();

	try {
		StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
		env.setParallelism(PARALLELISM);
		env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
		env.enableCheckpointing(100);
		env.setRestartStrategy(RestartStrategies.fixedDelayRestart(1, 0));
					env.setStateBackend(this.stateBackend);
		env.getConfig().setUseSnapshotCompression(true);

		env
				.addSource(new FailingSource(new KeyedEventTimeGenerator(numKeys, windowSlide), numElementsPerKey))
				.rebalance()
				.keyBy(0)
				.timeWindow(Time.of(windowSize, MILLISECONDS), Time.of(windowSlide, MILLISECONDS))
				.reduce(
						new ReduceFunction<Tuple2<Long, IntType>>() {

							@Override
							public Tuple2<Long, IntType> reduce(
									Tuple2<Long, IntType> a,
									Tuple2<Long, IntType> b) {

								// validate that the function has been opened properly
								return new Tuple2<>(a.f0, new IntType(a.f1.value + b.f1.value));
							}
						},
						new RichWindowFunction<Tuple2<Long, IntType>, Tuple4<Long, Long, Long, IntType>, Tuple, TimeWindow>() {

					private boolean open = false;

					@Override
					public void open(Configuration parameters) {
						assertEquals(PARALLELISM, getRuntimeContext().getNumberOfParallelSubtasks());
						open = true;
					}

					@Override
					public void apply(
							Tuple tuple,
							TimeWindow window,
							Iterable<Tuple2<Long, IntType>> input,
							Collector<Tuple4<Long, Long, Long, IntType>> out) {

						// validate that the function has been opened properly
						assertTrue(open);

						for (Tuple2<Long, IntType> in: input) {
							out.collect(new Tuple4<>(in.f0,
									window.getStart(),
									window.getEnd(),
									in.f1));
						}
					}
				})
				.addSink(new ValidatingSink<>(
					new SinkValidatorUpdateFun(numElementsPerKey),
					new SinkValidatorCheckFun(numKeys, numElementsPerKey, windowSlide))).setParallelism(1);

		env.execute("Tumbling Window Test");
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example 30
Source Project: gelly-streaming   Source File: SummaryTreeReduce.java    License: Apache License 2.0 4 votes vote down vote up
public SummaryTreeReduce(EdgesFold<K, EV, S> updateFun, ReduceFunction<S> combineFun, S initialVal, long timeMillis, boolean transientState) {
	this(updateFun, combineFun, null, initialVal, timeMillis, transientState, -1);
}