org.apache.flink.api.common.functions.util.CopyingListCollector Java Examples

The following examples show how to use org.apache.flink.api.common.functions.util.CopyingListCollector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MapPartitionOperatorBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, this.parameters);
	
	ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4);

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer);
	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	function.mapPartition(source, resultCollector);

	result.trimToSize();
	FunctionUtils.closeFunction(function);
	return result;
}
 
Example #2
Source File: FlatMapOperatorBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	ArrayList<OUT> result = new ArrayList<OUT>(input.size());

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	for (IN element : input) {
		IN inCopy = inSerializer.copy(element);
		function.flatMap(inCopy, resultCollector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #3
Source File: MapPartitionOperatorBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, this.parameters);
	
	ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4);

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer);
	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	function.mapPartition(source, resultCollector);

	result.trimToSize();
	FunctionUtils.closeFunction(function);
	return result;
}
 
Example #4
Source File: FlatMapOperatorBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	ArrayList<OUT> result = new ArrayList<OUT>(input.size());

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	for (IN element : input) {
		IN inCopy = inSerializer.copy(element);
		function.flatMap(inCopy, resultCollector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #5
Source File: MapPartitionOperatorBase.java    From flink with Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> inputData, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	MapPartitionFunction<IN, OUT> function = this.userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, this.parameters);
	
	ArrayList<OUT> result = new ArrayList<OUT>(inputData.size() / 4);

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingIterator<IN> source = new CopyingIterator<IN>(inputData.iterator(), inSerializer);
	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	function.mapPartition(source, resultCollector);

	result.trimToSize();
	FunctionUtils.closeFunction(function);
	return result;
}
 
Example #6
Source File: FlatMapOperatorBase.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN> input, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	FlatMapFunction<IN, OUT> function = userFunction.getUserCodeObject();
	
	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	ArrayList<OUT> result = new ArrayList<OUT>(input.size());

	TypeSerializer<IN> inSerializer = getOperatorInfo().getInputType().createSerializer(executionConfig);
	TypeSerializer<OUT> outSerializer = getOperatorInfo().getOutputType().createSerializer(executionConfig);

	CopyingListCollector<OUT> resultCollector = new CopyingListCollector<OUT>(result, outSerializer);

	for (IN element : input) {
		IN inCopy = inSerializer.copy(element);
		function.flatMap(inCopy, resultCollector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #7
Source File: OuterJoinOperatorBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception {
	TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType();
	TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType();

	TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation);
	TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation);

	TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig);
	TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig);

	OuterJoinListIterator<IN1, IN2> outerJoinIterator =
			new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator,
					rightInput, rightSerializer, rightComparator, outerJoinType);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, runtimeContext);
	FunctionUtils.openFunction(function, this.parameters);

	List<OUT> result = new ArrayList<>();
	Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig));

	while (outerJoinIterator.next()) {
		IN1 left = outerJoinIterator.getLeft();
		IN2 right = outerJoinIterator.getRight();
		function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #8
Source File: OuterJoinOperatorBase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception {
	TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType();
	TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType();

	TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation);
	TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation);

	TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig);
	TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig);

	OuterJoinListIterator<IN1, IN2> outerJoinIterator =
			new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator,
					rightInput, rightSerializer, rightComparator, outerJoinType);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, runtimeContext);
	FunctionUtils.openFunction(function, this.parameters);

	List<OUT> result = new ArrayList<>();
	Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig));

	while (outerJoinIterator.next()) {
		IN1 left = outerJoinIterator.getLeft();
		IN2 right = outerJoinIterator.getRight();
		function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #9
Source File: OuterJoinOperatorBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> leftInput, List<IN2> rightInput, RuntimeContext runtimeContext, ExecutionConfig executionConfig) throws Exception {
	TypeInformation<IN1> leftInformation = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> rightInformation = getOperatorInfo().getSecondInputType();
	TypeInformation<OUT> outInformation = getOperatorInfo().getOutputType();

	TypeComparator<IN1> leftComparator = buildComparatorFor(0, executionConfig, leftInformation);
	TypeComparator<IN2> rightComparator = buildComparatorFor(1, executionConfig, rightInformation);

	TypeSerializer<IN1> leftSerializer = leftInformation.createSerializer(executionConfig);
	TypeSerializer<IN2> rightSerializer = rightInformation.createSerializer(executionConfig);

	OuterJoinListIterator<IN1, IN2> outerJoinIterator =
			new OuterJoinListIterator<>(leftInput, leftSerializer, leftComparator,
					rightInput, rightSerializer, rightComparator, outerJoinType);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	FlatJoinFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, runtimeContext);
	FunctionUtils.openFunction(function, this.parameters);

	List<OUT> result = new ArrayList<>();
	Collector<OUT> collector = new CopyingListCollector<>(result, outInformation.createSerializer(executionConfig));

	while (outerJoinIterator.next()) {
		IN1 left = outerJoinIterator.getLeft();
		IN2 right = outerJoinIterator.getRight();
		function.join(left == null ? null : leftSerializer.copy(left), right == null ? null : rightSerializer.copy(right), collector);
	}

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #10
Source File: InPlaceMutableHashTableTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithLengthChangingReduceFunction() throws Exception {
	Random rnd = new Random(RANDOM_SEED);

	final int numKeys = 10000;
	final int numVals = 10;
	final int numRecords = numKeys * numVals;

	StringPairSerializer serializer = new StringPairSerializer();
	StringPairComparator comparator = new StringPairComparator();
	ReduceFunction<StringPair> reducer = new ConcatReducer();

	// Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output.
	List<StringPair> expectedOutput = new ArrayList<>();
	InPlaceMutableHashTableWithJavaHashMap<StringPair, String> reference = new InPlaceMutableHashTableWithJavaHashMap<>(
		serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer));

	// Create the InPlaceMutableHashTable to test
	final int numMemPages = numRecords * 10 / PAGE_SIZE;

	List<StringPair> actualOutput = new ArrayList<>();

	InPlaceMutableHashTable<StringPair> table =
		new InPlaceMutableHashTable<>(serializer, comparator, getMemory(numMemPages, PAGE_SIZE));
	InPlaceMutableHashTable<StringPair>.ReduceFacade reduceFacade =
		table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true);

	// The loop is for checking the feature that multiple open / close are possible.
	for(int j = 0; j < 3; j++) {
		table.open();

		// Test emit when table is empty
		reduceFacade.emit();

		// Process some manual stuff
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")), "foo");
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")), "foo");
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")), "alma");
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")));
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")));
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")));
		for (int i = 0; i < 5; i++) {
			reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")));
			reference.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")), "korte");
		}
		reference.emitAndReset();
		reduceFacade.emitAndReset();

		// Generate some input
		UniformStringPairGenerator gen = new UniformStringPairGenerator(numKeys, numVals, true);
		List<StringPair> input = new ArrayList<>();
		StringPair cur = new StringPair();
		while (gen.next(cur) != null) {
			input.add(serializer.copy(cur));
		}
		Collections.shuffle(input, rnd);

		// Process the generated input
		final int numIntermingledEmits = 5;
		for (StringPair record : input) {
			reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey());
			reduceFacade.updateTableEntryWithReduce(serializer.copy(record));
			if (rnd.nextDouble() < 1.0 / ((double) numRecords / numIntermingledEmits)) {
				// this will fire approx. numIntermingledEmits times
				reference.emitAndReset();
				reduceFacade.emitAndReset();
			}
		}
		reference.emitAndReset();
		reduceFacade.emit();
		table.close();

		// Check results

		assertEquals(expectedOutput.size(), actualOutput.size());

		String[] expectedValues = new String[expectedOutput.size()];
		for (int i = 0; i < expectedOutput.size(); i++) {
			expectedValues[i] = expectedOutput.get(i).getValue();
		}
		String[] actualValues = new String[actualOutput.size()];
		for (int i = 0; i < actualOutput.size(); i++) {
			actualValues[i] = actualOutput.get(i).getValue();
		}

		Arrays.sort(expectedValues, Ordering.<String>natural());
		Arrays.sort(actualValues, Ordering.<String>natural());
		assertArrayEquals(expectedValues, actualValues);

		expectedOutput.clear();
		actualOutput.clear();
	}
}
 
Example #11
Source File: CoGroupRawOperatorBase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> input1, List<IN2> input2, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	// --------------------------------------------------------------------
	// Setup
	// --------------------------------------------------------------------
	TypeInformation<IN1> inputType1 = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> inputType2 = getOperatorInfo().getSecondInputType();

	int[] inputKeys1 = getKeyColumns(0);
	int[] inputKeys2 = getKeyColumns(1);

	boolean[] inputSortDirections1 = new boolean[inputKeys1.length];
	boolean[] inputSortDirections2 = new boolean[inputKeys2.length];

	Arrays.fill(inputSortDirections1, true);
	Arrays.fill(inputSortDirections2, true);

	final TypeSerializer<IN1> inputSerializer1 = inputType1.createSerializer(executionConfig);
	final TypeSerializer<IN2> inputSerializer2 = inputType2.createSerializer(executionConfig);

	final TypeComparator<IN1> inputComparator1 = getTypeComparator(executionConfig, inputType1, inputKeys1, inputSortDirections1);
	final TypeComparator<IN2> inputComparator2 = getTypeComparator(executionConfig, inputType2, inputKeys2, inputSortDirections2);

	SimpleListIterable<IN1> iterator1 = new SimpleListIterable<IN1>(input1, inputComparator1, inputSerializer1);
	SimpleListIterable<IN2> iterator2 = new SimpleListIterable<IN2>(input2, inputComparator2, inputSerializer2);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	CoGroupFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	List<OUT> result = new ArrayList<OUT>();
	Collector<OUT> resultCollector = new CopyingListCollector<OUT>(result, getOperatorInfo().getOutputType().createSerializer(executionConfig));

	function.coGroup(iterator1, iterator2, resultCollector);

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #12
Source File: InPlaceMutableHashTableTest.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithIntPair() throws Exception {
	Random rnd = new Random(RANDOM_SEED);

	// varying the keyRange between 1000 and 1000000 can make a 5x speed difference
	// (because of cache misses (also in the segment arrays))
	final int keyRange = 1000000;
	final int valueRange = 10;
	final int numRecords = 1000000;

	final IntPairSerializer serializer = new IntPairSerializer();
	final TypeComparator<IntPair> comparator = new IntPairComparator();
	final ReduceFunction<IntPair> reducer = new SumReducer();

	// Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output.
	List<IntPair> expectedOutput = new ArrayList<>();
	InPlaceMutableHashTableWithJavaHashMap<IntPair, Integer> reference = new InPlaceMutableHashTableWithJavaHashMap<>(
		serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer));

	// Create the InPlaceMutableHashTable to test
	final int numMemPages = keyRange * 32 / PAGE_SIZE; // memory use is proportional to the number of different keys
	List<IntPair> actualOutput = new ArrayList<>();

	InPlaceMutableHashTable<IntPair> table = new InPlaceMutableHashTable<>(
		serializer, comparator, getMemory(numMemPages, PAGE_SIZE));
	InPlaceMutableHashTable<IntPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer,
		new CopyingListCollector<>(actualOutput, serializer), true);
	table.open();

	// Generate some input
	final List<IntPair> input = new ArrayList<>();
	for(int i = 0; i < numRecords; i++) {
		input.add(new IntPair(rnd.nextInt(keyRange), rnd.nextInt(valueRange)));
	}

	//System.out.println("start");
	//long start = System.currentTimeMillis();

	// Process the generated input
	final int numIntermingledEmits = 5;
	for (IntPair record: input) {
		reduceFacade.updateTableEntryWithReduce(serializer.copy(record));
		reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey());
		if(rnd.nextDouble() < 1.0 / ((double)numRecords / numIntermingledEmits)) {
			// this will fire approx. numIntermingledEmits times
			reference.emitAndReset();
			reduceFacade.emitAndReset();
		}
	}
	reference.emitAndReset();
	reduceFacade.emit();
	table.close();

	//long end = System.currentTimeMillis();
	//System.out.println("stop, time: " + (end - start));

	// Check results

	assertEquals(expectedOutput.size(), actualOutput.size());

	Integer[] expectedValues = new Integer[expectedOutput.size()];
	for (int i = 0; i < expectedOutput.size(); i++) {
		expectedValues[i] = expectedOutput.get(i).getValue();
	}
	Integer[] actualValues = new Integer[actualOutput.size()];
	for (int i = 0; i < actualOutput.size(); i++) {
		actualValues[i] = actualOutput.get(i).getValue();
	}

	Arrays.sort(expectedValues, Ordering.<Integer>natural());
	Arrays.sort(actualValues, Ordering.<Integer>natural());
	assertArrayEquals(expectedValues, actualValues);
}
 
Example #13
Source File: InPlaceMutableHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithIntPair() throws Exception {
	Random rnd = new Random(RANDOM_SEED);

	// varying the keyRange between 1000 and 1000000 can make a 5x speed difference
	// (because of cache misses (also in the segment arrays))
	final int keyRange = 1000000;
	final int valueRange = 10;
	final int numRecords = 1000000;

	final IntPairSerializer serializer = new IntPairSerializer();
	final TypeComparator<IntPair> comparator = new IntPairComparator();
	final ReduceFunction<IntPair> reducer = new SumReducer();

	// Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output.
	List<IntPair> expectedOutput = new ArrayList<>();
	InPlaceMutableHashTableWithJavaHashMap<IntPair, Integer> reference = new InPlaceMutableHashTableWithJavaHashMap<>(
		serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer));

	// Create the InPlaceMutableHashTable to test
	final int numMemPages = keyRange * 32 / PAGE_SIZE; // memory use is proportional to the number of different keys
	List<IntPair> actualOutput = new ArrayList<>();

	InPlaceMutableHashTable<IntPair> table = new InPlaceMutableHashTable<>(
		serializer, comparator, getMemory(numMemPages, PAGE_SIZE));
	InPlaceMutableHashTable<IntPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer,
		new CopyingListCollector<>(actualOutput, serializer), true);
	table.open();

	// Generate some input
	final List<IntPair> input = new ArrayList<>();
	for(int i = 0; i < numRecords; i++) {
		input.add(new IntPair(rnd.nextInt(keyRange), rnd.nextInt(valueRange)));
	}

	//System.out.println("start");
	//long start = System.currentTimeMillis();

	// Process the generated input
	final int numIntermingledEmits = 5;
	for (IntPair record: input) {
		reduceFacade.updateTableEntryWithReduce(serializer.copy(record));
		reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey());
		if(rnd.nextDouble() < 1.0 / ((double)numRecords / numIntermingledEmits)) {
			// this will fire approx. numIntermingledEmits times
			reference.emitAndReset();
			reduceFacade.emitAndReset();
		}
	}
	reference.emitAndReset();
	reduceFacade.emit();
	table.close();

	//long end = System.currentTimeMillis();
	//System.out.println("stop, time: " + (end - start));

	// Check results

	assertEquals(expectedOutput.size(), actualOutput.size());

	Integer[] expectedValues = new Integer[expectedOutput.size()];
	for (int i = 0; i < expectedOutput.size(); i++) {
		expectedValues[i] = expectedOutput.get(i).getValue();
	}
	Integer[] actualValues = new Integer[actualOutput.size()];
	for (int i = 0; i < actualOutput.size(); i++) {
		actualValues[i] = actualOutput.get(i).getValue();
	}

	Arrays.sort(expectedValues, Ordering.<Integer>natural());
	Arrays.sort(actualValues, Ordering.<Integer>natural());
	assertArrayEquals(expectedValues, actualValues);
}
 
Example #14
Source File: InPlaceMutableHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithLengthChangingReduceFunction() throws Exception {
	Random rnd = new Random(RANDOM_SEED);

	final int numKeys = 10000;
	final int numVals = 10;
	final int numRecords = numKeys * numVals;

	StringPairSerializer serializer = new StringPairSerializer();
	StringPairComparator comparator = new StringPairComparator();
	ReduceFunction<StringPair> reducer = new ConcatReducer();

	// Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output.
	List<StringPair> expectedOutput = new ArrayList<>();
	InPlaceMutableHashTableWithJavaHashMap<StringPair, String> reference = new InPlaceMutableHashTableWithJavaHashMap<>(
		serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer));

	// Create the InPlaceMutableHashTable to test
	final int numMemPages = numRecords * 10 / PAGE_SIZE;

	List<StringPair> actualOutput = new ArrayList<>();

	InPlaceMutableHashTable<StringPair> table =
		new InPlaceMutableHashTable<>(serializer, comparator, getMemory(numMemPages, PAGE_SIZE));
	InPlaceMutableHashTable<StringPair>.ReduceFacade reduceFacade =
		table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true);

	// The loop is for checking the feature that multiple open / close are possible.
	for(int j = 0; j < 3; j++) {
		table.open();

		// Test emit when table is empty
		reduceFacade.emit();

		// Process some manual stuff
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")), "foo");
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")), "foo");
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")), "alma");
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")));
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")));
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")));
		for (int i = 0; i < 5; i++) {
			reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")));
			reference.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")), "korte");
		}
		reference.emitAndReset();
		reduceFacade.emitAndReset();

		// Generate some input
		UniformStringPairGenerator gen = new UniformStringPairGenerator(numKeys, numVals, true);
		List<StringPair> input = new ArrayList<>();
		StringPair cur = new StringPair();
		while (gen.next(cur) != null) {
			input.add(serializer.copy(cur));
		}
		Collections.shuffle(input, rnd);

		// Process the generated input
		final int numIntermingledEmits = 5;
		for (StringPair record : input) {
			reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey());
			reduceFacade.updateTableEntryWithReduce(serializer.copy(record));
			if (rnd.nextDouble() < 1.0 / ((double) numRecords / numIntermingledEmits)) {
				// this will fire approx. numIntermingledEmits times
				reference.emitAndReset();
				reduceFacade.emitAndReset();
			}
		}
		reference.emitAndReset();
		reduceFacade.emit();
		table.close();

		// Check results

		assertEquals(expectedOutput.size(), actualOutput.size());

		String[] expectedValues = new String[expectedOutput.size()];
		for (int i = 0; i < expectedOutput.size(); i++) {
			expectedValues[i] = expectedOutput.get(i).getValue();
		}
		String[] actualValues = new String[actualOutput.size()];
		for (int i = 0; i < actualOutput.size(); i++) {
			actualValues[i] = actualOutput.get(i).getValue();
		}

		Arrays.sort(expectedValues, Ordering.<String>natural());
		Arrays.sort(actualValues, Ordering.<String>natural());
		assertArrayEquals(expectedValues, actualValues);

		expectedOutput.clear();
		actualOutput.clear();
	}
}
 
Example #15
Source File: CoGroupRawOperatorBase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> input1, List<IN2> input2, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	// --------------------------------------------------------------------
	// Setup
	// --------------------------------------------------------------------
	TypeInformation<IN1> inputType1 = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> inputType2 = getOperatorInfo().getSecondInputType();

	int[] inputKeys1 = getKeyColumns(0);
	int[] inputKeys2 = getKeyColumns(1);

	boolean[] inputSortDirections1 = new boolean[inputKeys1.length];
	boolean[] inputSortDirections2 = new boolean[inputKeys2.length];

	Arrays.fill(inputSortDirections1, true);
	Arrays.fill(inputSortDirections2, true);

	final TypeSerializer<IN1> inputSerializer1 = inputType1.createSerializer(executionConfig);
	final TypeSerializer<IN2> inputSerializer2 = inputType2.createSerializer(executionConfig);

	final TypeComparator<IN1> inputComparator1 = getTypeComparator(executionConfig, inputType1, inputKeys1, inputSortDirections1);
	final TypeComparator<IN2> inputComparator2 = getTypeComparator(executionConfig, inputType2, inputKeys2, inputSortDirections2);

	SimpleListIterable<IN1> iterator1 = new SimpleListIterable<IN1>(input1, inputComparator1, inputSerializer1);
	SimpleListIterable<IN2> iterator2 = new SimpleListIterable<IN2>(input2, inputComparator2, inputSerializer2);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	CoGroupFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	List<OUT> result = new ArrayList<OUT>();
	Collector<OUT> resultCollector = new CopyingListCollector<OUT>(result, getOperatorInfo().getOutputType().createSerializer(executionConfig));

	function.coGroup(iterator1, iterator2, resultCollector);

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #16
Source File: CoGroupRawOperatorBase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected List<OUT> executeOnCollections(List<IN1> input1, List<IN2> input2, RuntimeContext ctx, ExecutionConfig executionConfig) throws Exception {
	// --------------------------------------------------------------------
	// Setup
	// --------------------------------------------------------------------
	TypeInformation<IN1> inputType1 = getOperatorInfo().getFirstInputType();
	TypeInformation<IN2> inputType2 = getOperatorInfo().getSecondInputType();

	int[] inputKeys1 = getKeyColumns(0);
	int[] inputKeys2 = getKeyColumns(1);

	boolean[] inputSortDirections1 = new boolean[inputKeys1.length];
	boolean[] inputSortDirections2 = new boolean[inputKeys2.length];

	Arrays.fill(inputSortDirections1, true);
	Arrays.fill(inputSortDirections2, true);

	final TypeSerializer<IN1> inputSerializer1 = inputType1.createSerializer(executionConfig);
	final TypeSerializer<IN2> inputSerializer2 = inputType2.createSerializer(executionConfig);

	final TypeComparator<IN1> inputComparator1 = getTypeComparator(executionConfig, inputType1, inputKeys1, inputSortDirections1);
	final TypeComparator<IN2> inputComparator2 = getTypeComparator(executionConfig, inputType2, inputKeys2, inputSortDirections2);

	SimpleListIterable<IN1> iterator1 = new SimpleListIterable<IN1>(input1, inputComparator1, inputSerializer1);
	SimpleListIterable<IN2> iterator2 = new SimpleListIterable<IN2>(input2, inputComparator2, inputSerializer2);

	// --------------------------------------------------------------------
	// Run UDF
	// --------------------------------------------------------------------
	CoGroupFunction<IN1, IN2, OUT> function = userFunction.getUserCodeObject();

	FunctionUtils.setFunctionRuntimeContext(function, ctx);
	FunctionUtils.openFunction(function, parameters);

	List<OUT> result = new ArrayList<OUT>();
	Collector<OUT> resultCollector = new CopyingListCollector<OUT>(result, getOperatorInfo().getOutputType().createSerializer(executionConfig));

	function.coGroup(iterator1, iterator2, resultCollector);

	FunctionUtils.closeFunction(function);

	return result;
}
 
Example #17
Source File: InPlaceMutableHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithIntPair() throws Exception {
	Random rnd = new Random(RANDOM_SEED);

	// varying the keyRange between 1000 and 1000000 can make a 5x speed difference
	// (because of cache misses (also in the segment arrays))
	final int keyRange = 1000000;
	final int valueRange = 10;
	final int numRecords = 1000000;

	final IntPairSerializer serializer = new IntPairSerializer();
	final TypeComparator<IntPair> comparator = new IntPairComparator();
	final ReduceFunction<IntPair> reducer = new SumReducer();

	// Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output.
	List<IntPair> expectedOutput = new ArrayList<>();
	InPlaceMutableHashTableWithJavaHashMap<IntPair, Integer> reference = new InPlaceMutableHashTableWithJavaHashMap<>(
		serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer));

	// Create the InPlaceMutableHashTable to test
	final int numMemPages = keyRange * 32 / PAGE_SIZE; // memory use is proportional to the number of different keys
	List<IntPair> actualOutput = new ArrayList<>();

	InPlaceMutableHashTable<IntPair> table = new InPlaceMutableHashTable<>(
		serializer, comparator, getMemory(numMemPages, PAGE_SIZE));
	InPlaceMutableHashTable<IntPair>.ReduceFacade reduceFacade = table.new ReduceFacade(reducer,
		new CopyingListCollector<>(actualOutput, serializer), true);
	table.open();

	// Generate some input
	final List<IntPair> input = new ArrayList<>();
	for(int i = 0; i < numRecords; i++) {
		input.add(new IntPair(rnd.nextInt(keyRange), rnd.nextInt(valueRange)));
	}

	//System.out.println("start");
	//long start = System.currentTimeMillis();

	// Process the generated input
	final int numIntermingledEmits = 5;
	for (IntPair record: input) {
		reduceFacade.updateTableEntryWithReduce(serializer.copy(record));
		reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey());
		if(rnd.nextDouble() < 1.0 / ((double)numRecords / numIntermingledEmits)) {
			// this will fire approx. numIntermingledEmits times
			reference.emitAndReset();
			reduceFacade.emitAndReset();
		}
	}
	reference.emitAndReset();
	reduceFacade.emit();
	table.close();

	//long end = System.currentTimeMillis();
	//System.out.println("stop, time: " + (end - start));

	// Check results

	assertEquals(expectedOutput.size(), actualOutput.size());

	Integer[] expectedValues = new Integer[expectedOutput.size()];
	for (int i = 0; i < expectedOutput.size(); i++) {
		expectedValues[i] = expectedOutput.get(i).getValue();
	}
	Integer[] actualValues = new Integer[actualOutput.size()];
	for (int i = 0; i < actualOutput.size(); i++) {
		actualValues[i] = actualOutput.get(i).getValue();
	}

	Arrays.sort(expectedValues, Ordering.<Integer>natural());
	Arrays.sort(actualValues, Ordering.<Integer>natural());
	assertArrayEquals(expectedValues, actualValues);
}
 
Example #18
Source File: InPlaceMutableHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testWithLengthChangingReduceFunction() throws Exception {
	Random rnd = new Random(RANDOM_SEED);

	final int numKeys = 10000;
	final int numVals = 10;
	final int numRecords = numKeys * numVals;

	StringPairSerializer serializer = new StringPairSerializer();
	StringPairComparator comparator = new StringPairComparator();
	ReduceFunction<StringPair> reducer = new ConcatReducer();

	// Create the InPlaceMutableHashTableWithJavaHashMap, which will provide the correct output.
	List<StringPair> expectedOutput = new ArrayList<>();
	InPlaceMutableHashTableWithJavaHashMap<StringPair, String> reference = new InPlaceMutableHashTableWithJavaHashMap<>(
		serializer, comparator, reducer, new CopyingListCollector<>(expectedOutput, serializer));

	// Create the InPlaceMutableHashTable to test
	final int numMemPages = numRecords * 10 / PAGE_SIZE;

	List<StringPair> actualOutput = new ArrayList<>();

	InPlaceMutableHashTable<StringPair> table =
		new InPlaceMutableHashTable<>(serializer, comparator, getMemory(numMemPages, PAGE_SIZE));
	InPlaceMutableHashTable<StringPair>.ReduceFacade reduceFacade =
		table.new ReduceFacade(reducer, new CopyingListCollector<>(actualOutput, serializer), true);

	// The loop is for checking the feature that multiple open / close are possible.
	for(int j = 0; j < 3; j++) {
		table.open();

		// Test emit when table is empty
		reduceFacade.emit();

		// Process some manual stuff
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")), "foo");
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")), "foo");
		reference.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")), "alma");
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "bar")));
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("foo", "baz")));
		reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("alma", "xyz")));
		for (int i = 0; i < 5; i++) {
			reduceFacade.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")));
			reference.updateTableEntryWithReduce(serializer.copy(new StringPair("korte", "abc")), "korte");
		}
		reference.emitAndReset();
		reduceFacade.emitAndReset();

		// Generate some input
		UniformStringPairGenerator gen = new UniformStringPairGenerator(numKeys, numVals, true);
		List<StringPair> input = new ArrayList<>();
		StringPair cur = new StringPair();
		while (gen.next(cur) != null) {
			input.add(serializer.copy(cur));
		}
		Collections.shuffle(input, rnd);

		// Process the generated input
		final int numIntermingledEmits = 5;
		for (StringPair record : input) {
			reference.updateTableEntryWithReduce(serializer.copy(record), record.getKey());
			reduceFacade.updateTableEntryWithReduce(serializer.copy(record));
			if (rnd.nextDouble() < 1.0 / ((double) numRecords / numIntermingledEmits)) {
				// this will fire approx. numIntermingledEmits times
				reference.emitAndReset();
				reduceFacade.emitAndReset();
			}
		}
		reference.emitAndReset();
		reduceFacade.emit();
		table.close();

		// Check results

		assertEquals(expectedOutput.size(), actualOutput.size());

		String[] expectedValues = new String[expectedOutput.size()];
		for (int i = 0; i < expectedOutput.size(); i++) {
			expectedValues[i] = expectedOutput.get(i).getValue();
		}
		String[] actualValues = new String[actualOutput.size()];
		for (int i = 0; i < actualOutput.size(); i++) {
			actualValues[i] = actualOutput.get(i).getValue();
		}

		Arrays.sort(expectedValues, Ordering.<String>natural());
		Arrays.sort(actualValues, Ordering.<String>natural());
		assertArrayEquals(expectedValues, actualValues);

		expectedOutput.clear();
		actualOutput.clear();
	}
}