Java Code Examples for org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator

The following examples show how to use org.apache.flink.runtime.operators.testutils.TestData.TupleGenerator. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Flink-CEPplus   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test behavior with overflow buckets (Overflow buckets must be initialized correctly
 * if the input is reopened again)
 */
@Test
public void testOverflow() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 200, 1024, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);
		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 2
Source Project: Flink-CEPplus   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Verify proper operation if the build side is spilled to disk.
 */
@Test
public void testDoubleProbeSpilling() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);
		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 3
Source Project: Flink-CEPplus   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This test case verifies that hybrid hash join is able to handle multiple probe phases
 * when the build side fits completely into memory.
 */
@Test
public void testDoubleProbeInMemory() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 0, 28, KeyMode.SORTED, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 28, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);

		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 4
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		Integer key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 5
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		int key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 6
Source Project: flink   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test behavior with overflow buckets (Overflow buckets must be initialized correctly
 * if the input is reopened again)
 */
@Test
public void testOverflow() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 200, 1024, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);
		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 7
Source Project: flink   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Verify proper operation if the build side is spilled to disk.
 */
@Test
public void testDoubleProbeSpilling() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);
		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 8
Source Project: flink   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This test case verifies that hybrid hash join is able to handle multiple probe phases
 * when the build side fits completely into memory.
 */
@Test
public void testDoubleProbeInMemory() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 0, 28, KeyMode.SORTED, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 28, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);

		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 9
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		Integer key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 10
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		int key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 11
Source Project: flink   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test behavior with overflow buckets (Overflow buckets must be initialized correctly
 * if the input is reopened again)
 */
@Test
public void testOverflow() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 200, 1024, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);
		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 12
Source Project: flink   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Verify proper operation if the build side is spilled to disk.
 */
@Test
public void testDoubleProbeSpilling() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 1024, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);
		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 13
Source Project: flink   Source File: ReOpenableHashTableTestBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * This test case verifies that hybrid hash join is able to handle multiple probe phases
 * when the build side fits completely into memory.
 */
@Test
public void testDoubleProbeInMemory() {

	int buildSize = 1000;
	int probeSize = 1000;
	try {
		TupleGenerator bgen = new TupleGenerator(SEED1, 0, 28, KeyMode.SORTED, ValueMode.FIX_LENGTH);
		TupleGenerator pgen = new TupleGenerator(SEED2, 0, 28, KeyMode.SORTED, ValueMode.FIX_LENGTH);

		final TupleGeneratorIterator buildInput = new TupleGeneratorIterator(bgen, buildSize);
		final TupleGeneratorIterator probeInput = new TupleGeneratorIterator(pgen, probeSize);

		doTest(buildInput,probeInput, bgen, pgen);
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 14
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		Integer key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 15
private Map<Integer, Collection<String>> collectData(TupleGenerator iter, int num)
throws Exception
{
	Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();
	
	for (int i = 0; i < num; i++) {
		iter.next(pair);
		int key = pair.f0;
		
		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		values.add(pair.f1);
	}
	return map;
}
 
Example 16
Source Project: flink   Source File: RandomSortMergeInnerJoinTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
	final TupleGenerator generator1 =
			new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
	final TupleGenerator generator2 =
			new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

	final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_FIRST_SIZE);
	final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_SECOND_SIZE);

	// collect expected data
	final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1), collectData(input2));

	// reset the generators
	generator1.reset();
	generator2.reset();
	input1.reset();
	input2.reset();

	StreamOperator operator = getOperator();

	match(expectedMatchesMap, transformToBinary(join(operator, input1, input2)));

	// assert that each expected match was seen
	for (Map.Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
		Assert.assertTrue("Collection for key " + entry.getKey() + " is not empty", entry.getValue().isEmpty());
	}
}
 
Example 17
Source Project: flink   Source File: RandomSortMergeInnerJoinTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws Exception {
	final TupleGenerator generator1 =
			new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
	final TupleGenerator generator2 =
			new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

	final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_FIRST_SIZE);
	final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_SECOND_SIZE);

	// collect expected data
	final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1), collectData(input2));

	// reset the generators
	generator1.reset();
	generator2.reset();
	input1.reset();
	input2.reset();

	StreamOperator operator = getOperator();

	match(expectedMatchesMap, transformToBinary(join(operator, input1, input2)));

	// assert that each expected match was seen
	for (Map.Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
		Assert.assertTrue("Collection for key " + entry.getKey() + " is not empty", entry.getValue().isEmpty());
	}
}
 
Example 18
@Test
public void testBuildFirst() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();

		//noinspection StatementWithEmptyBody
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 19
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 20
@Test
public void testBuildSecond() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values			
		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 21
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 22
@Test
public void testBuildFirstAndProbeSideOuterJoin() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 1000, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = rightOuterJoinTuples(
				collectTupleData(input1),
				collectTupleData(input2));

		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
						input1, input2, this.recordSerializer, this.record1Comparator,
						this.recordSerializer, this.record2Comparator, this.recordPairComparator,
						this.memoryManager, ioManager, this.parentTask, 1.0, true, false, false);

		iterator.open();

		while (iterator.callWithNextKey(matcher, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 23
@Test
public void testBuildFirstAndBuildSideOuterJoin() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 1000, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = leftOuterJoinTuples(
			collectTupleData(input1),
			collectTupleData(input2));

		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildFirstHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator,
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, true, false);

		iterator.open();

		while (iterator.callWithNextKey(matcher, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 24
@Test
public void testBuildFirstAndFullOuterJoin() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 1000, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = fullOuterJoinTuples(
			collectTupleData(input1),
			collectTupleData(input2));

		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildFirstHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator,
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, true, true, false);

		iterator.open();

		while (iterator.callWithNextKey(matcher, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 25
@Test
public void testBuildSecondAndProbeSideOuterJoin() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 1000, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = leftOuterJoinTuples(
				collectTupleData(input1),
				collectTupleData(input2));

		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildSecondHashJoinIterator<>(
						input1, input2, this.recordSerializer, this.record1Comparator,
						this.recordSerializer, this.record2Comparator, this.recordPairComparator,
						this.memoryManager, ioManager, this.parentTask, 1.0, true, false, false);

		iterator.open();

		while (iterator.callWithNextKey(matcher, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 26
@Test
public void testBuildSecondAndBuildSideOuterJoin() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 1000, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = rightOuterJoinTuples(
			collectTupleData(input1),
			collectTupleData(input2));

		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator,
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, true, false);

		iterator.open();

		while (iterator.callWithNextKey(matcher, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 27
@Test
public void testBuildSecondAndFullOuterJoin() {
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 1000, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = fullOuterJoinTuples(
			collectTupleData(input1),
			collectTupleData(input2));

		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator,
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, true, true, false);

		iterator.open();

		while (iterator.callWithNextKey(matcher, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 28
@Test
public void testMerge() {
	try {

		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
				collectData(input1),
				collectData(input2));

		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction =
				new MatchRemovingJoiner(expectedMatchesMap);

		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(joinFunction, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			Assert.assertTrue("Collection for key " + entry.getKey() + " is not empty", entry.getValue().isEmpty());
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 29
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;
	
	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(joinFunction, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 30
@Test
public void testMerge() {
	try {

		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator input1 = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator input2 = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));

		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction =
				new MatchRemovingJoiner(expectedMatchesMap);

		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		// reset the generators
		generator1.reset();
		generator2.reset();
		input1.reset();
		input2.reset();

		// compare with iterator values
		ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();

		while (iterator.callWithNextKey(joinFunction, collector));

		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			Assert.assertTrue("Collection for key " + entry.getKey() + " is not empty", entry.getValue().isEmpty());
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}