Java Code Examples for org.apache.flink.runtime.operators.testutils.TestData#TupleConstantValueIterator

The following examples show how to use org.apache.flink.runtime.operators.testutils.TestData#TupleConstantValueIterator . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 2
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 3
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 4
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 5
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;
	
	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(joinFunction, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 6
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;

	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;

	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> matcher = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 7
@Test
public void testMergeWithHighNumberOfCommonKeys() {
	// the size of the left and right inputs
	final int input1Size = 200;
	final int input2Size = 100;

	final int input1Duplicates = 10;
	final int input2Duplicates = 4000;
	final int duplicateKey = 13;

	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, input1Size);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, input2Size);

		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(duplicateKey, "LEFT String for Duplicate Keys", input1Duplicates);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(duplicateKey, "RIGHT String for Duplicate Keys", input2Duplicates);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());

		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
				collectData(input1),
				collectData(input2));

		// re-create the whole thing for actual processing

		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();

		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<>(inList2, comparator2.duplicate());

		StreamOperator operator = getOperator();

		match(expectedMatchesMap, transformToBinary(join(operator, input1, input2)));

		// assert that each expected match was seen
		for (Map.Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 8
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 9
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 10
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 11
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 12
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;
	
	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(joinFunction, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 13
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;

	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;

	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> matcher = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 14
@Test
public void testMergeWithHighNumberOfCommonKeys() {
	// the size of the left and right inputs
	final int input1Size = 200;
	final int input2Size = 100;

	final int input1Duplicates = 10;
	final int input2Duplicates = 4000;
	final int duplicateKey = 13;

	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, input1Size);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, input2Size);

		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(duplicateKey, "LEFT String for Duplicate Keys", input1Duplicates);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(duplicateKey, "RIGHT String for Duplicate Keys", input2Duplicates);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<>(inList2, comparator2.duplicate());

		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
				collectData(input1),
				collectData(input2));

		// re-create the whole thing for actual processing

		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();

		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<>(inList2, comparator2.duplicate());

		StreamOperator operator = getOperator();

		match(expectedMatchesMap, transformToBinary(join(operator, input1, input2)));

		// assert that each expected match was seen
		for (Map.Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 15
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 16
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 17
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 18
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 19
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;
	
	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);

		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);

		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);

		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> joinFunction = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(joinFunction, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example 20
@Test
public void testMergeWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;

	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 4000;
	final int DUPLICATE_KEY = 13;

	try {
		final TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		final TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.SORTED, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<MutableObjectIterator<Tuple2<Integer, String>>>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		// collect expected data
		final Map<Integer, Collection<Match>> expectedMatchesMap = matchValues(
			collectData(input1),
			collectData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new MergeIterator<Tuple2<Integer, String>>(inList1, comparator1.duplicate());
		input2 = new MergeIterator<Tuple2<Integer, String>>(inList2, comparator2.duplicate());
		
		final FlatJoinFunction<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> matcher = new MatchRemovingJoiner(expectedMatchesMap);
		
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<Tuple2<Integer, String>>();

		
		// we create this sort-merge iterator with little memory for the block-nested-loops fall-back to make sure it
		// needs to spill for the duplicate keys
		ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingMergeInnerJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>>(
				input1, input2, this.serializer1, this.comparator1, this.serializer2, this.comparator2,
				this.pairComparator, this.memoryManager, this.ioManager, PAGES_FOR_BNLJN, this.parentTask);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<Match>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}