org.apache.flink.runtime.operators.testutils.UnionIterator Java Examples

The following examples show how to use org.apache.flink.runtime.operators.testutils.UnionIterator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ReOpenableHashTableITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private MutableObjectIterator<Tuple2<Integer, Integer>> getProbeInput(final int numKeys,
																	  final int probeValsPerKey, final int repeatedValue1, final int repeatedValue2) {
	MutableObjectIterator<Tuple2<Integer, Integer>> probe1 = new UniformIntTupleGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe2 = new TestData.ConstantIntIntTuplesIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe3 = new TestData.ConstantIntIntTuplesIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<Tuple2<Integer, Integer>>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	return new UnionIterator<>(probes);
}
 
Example #2
Source File: ReOpenableHashTableITCase.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private MutableObjectIterator<Tuple2<Integer, Integer>> getProbeInput(final int numKeys,
																	  final int probeValsPerKey, final int repeatedValue1, final int repeatedValue2) {
	MutableObjectIterator<Tuple2<Integer, Integer>> probe1 = new UniformIntTupleGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe2 = new TestData.ConstantIntIntTuplesIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe3 = new TestData.ConstantIntIntTuplesIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<Tuple2<Integer, Integer>>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	return new UnionIterator<>(probes);
}
 
Example #3
Source File: ReOpenableHashTableITCase.java    From flink with Apache License 2.0 5 votes vote down vote up
private MutableObjectIterator<Tuple2<Integer, Integer>> getProbeInput(final int numKeys,
																	  final int probeValsPerKey, final int repeatedValue1, final int repeatedValue2) {
	MutableObjectIterator<Tuple2<Integer, Integer>> probe1 = new UniformIntTupleGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe2 = new TestData.ConstantIntIntTuplesIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<Tuple2<Integer, Integer>> probe3 = new TestData.ConstantIntIntTuplesIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<Tuple2<Integer, Integer>>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	return new UnionIterator<>(probes);
}
 
Example #4
Source File: CombinerOversizedRecordsTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testOversizedRecordCombineTask() {
	try {
		final int keyCnt = 100;
		final int valCnt = 20;
		
		// create a long heavy string payload
		StringBuilder bld = new StringBuilder(10 * 1024 * 1024);
		Random rnd = new Random();
		
		for (int i = 0; i < 10000000; i++) {
			bld.append((char) (rnd.nextInt(26) + 'a'));
		}
		
		String longString = bld.toString();
		bld = null;

		// construct the input as a union of
		// 1) long string
		// 2) some random values
		// 3) long string
		// 4) random values
		// 5) long string
		
		// random values 1
		MutableObjectIterator<Tuple2<Integer, Integer>> gen1 = 
			new UniformIntTupleGenerator(keyCnt, valCnt, false);

		// random values 2
		MutableObjectIterator<Tuple2<Integer, Integer>> gen2 =
				new UniformIntTupleGenerator(keyCnt, valCnt, false);

		@SuppressWarnings("unchecked")
		MutableObjectIterator<Tuple3<Integer, Integer, String>> input = 
				new UnionIterator<Tuple3<Integer, Integer, String>>(
						new SingleValueIterator<Tuple3<Integer, Integer, String>>(new Tuple3<Integer, Integer, String>(-1, -1, longString)),
						new StringIteratorDecorator(gen1),
						new SingleValueIterator<Tuple3<Integer, Integer, String>>(new Tuple3<Integer, Integer, String>(-1, -1, longString)),
						new StringIteratorDecorator(gen2),
						new SingleValueIterator<Tuple3<Integer, Integer, String>>(new Tuple3<Integer, Integer, String>(-1, -1, longString)));
		
		setInput(input, serializer);
		addDriverComparator(this.comparator);
		addDriverComparator(this.comparator);
		setOutput(this.outList, this.outSerializer);

		getTaskConfig().setDriverStrategy(DriverStrategy.SORTED_GROUP_COMBINE);
		getTaskConfig().setRelativeMemoryDriver(combine_frac);
		getTaskConfig().setFilehandlesDriver(2);

		GroupReduceCombineDriver<Tuple3<Integer, Integer, String>, Tuple3<Integer, Double, String>> testTask = 
				new GroupReduceCombineDriver<Tuple3<Integer, Integer, String>, Tuple3<Integer, Double, String>>();
		
		testDriver(testTask, TestCombiner.class);

		assertEquals(3, testTask.getOversizedRecordCount());
		assertTrue(keyCnt + 3 == outList.size() || 2*keyCnt + 3 == outList.size());
	}
	catch (Exception e) {
		e.printStackTrace();
		fail(e.getMessage());
	}
}
 
Example #5
Source File: NonReusingHashJoinIteratorITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #6
Source File: NonReusingHashJoinIteratorITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #7
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #8
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #9
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCount = 3000000;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	try {
		join(table, buildInput, probeInput);
		fail("Hash Join must have failed due to too many recursions.");
	} catch (Exception ex) {
		// expected
	}

	table.close();

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #10
Source File: BinaryHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);
	MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
	// ----------------------------------------------------------------------------------------

	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);

	final BinaryRowData recordReuse = new BinaryRowData(2);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			testJoin(table, map);
		}
	}

	while (table.nextMatching()){
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #11
Source File: BinaryHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	// ----------------------------------------------------------------------------------------
	MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);
	final BinaryRowData recordReuse = new BinaryRowData(2);

	BinaryRowData buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRowData probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			testJoin(table, map);
		}
	}

	while (table.nextMatching()){
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #12
Source File: BinaryHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCount = 3000000;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRowData> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRowData> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRowData> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRowData> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRowData> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRowData> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRowData>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRowData> probeInput = new UnionIterator<>(probes);
	// ----------------------------------------------------------------------------------------
	MemoryManager memManager = MemoryManagerBuilder.newBuilder().setMemorySize(896 * PAGE_SIZE).build();
	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);

	try {
		join(table, buildInput, probeInput);
		fail("Hash Join must have failed due to too many recursions.");
	} catch (Exception ex) {
		// expected
	}

	table.close();

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #13
Source File: HashTableITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example #14
Source File: ReusingHashJoinIteratorITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #15
Source File: ReusingHashJoinIteratorITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #16
Source File: HashTableITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example #17
Source File: HashTableITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();

	try {
		while (join.nextRecord())
		{	
			MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example #18
Source File: NonReusingHashJoinIteratorITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #19
Source File: NonReusingHashJoinIteratorITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #20
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCount = 3000000;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCount);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCount);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);
	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	try {
		join(table, buildInput, probeInput);
		fail("Hash Join must have failed due to too many recursions.");
	} catch (Exception ex) {
		// expected
	}

	table.close();

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #21
Source File: ReusingHashJoinIteratorITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new ReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #22
Source File: ReusingHashJoinIteratorITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TestData.TupleGenerator generator1 = new TestData.TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TestData.TupleGenerator generator2 = new TestData.TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final FlatJoinFunction matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		ReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new ReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #23
Source File: HashTableITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example #24
Source File: HashTableITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();

	try {
		while (join.nextRecord())
		{	
			MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example #25
Source File: NonReusingHashJoinIteratorITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildFirstWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildFirstHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
				new NonReusingBuildFirstHashJoinIterator<>(
					input1, input2, this.recordSerializer, this.record1Comparator, 
					this.recordSerializer, this.record2Comparator, this.recordPairComparator,
					this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);

		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #26
Source File: NonReusingHashJoinIteratorITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testBuildSecondWithHighNumberOfCommonKeys()
{
	// the size of the left and right inputs
	final int INPUT_1_SIZE = 200;
	final int INPUT_2_SIZE = 100;
	
	final int INPUT_1_DUPLICATES = 10;
	final int INPUT_2_DUPLICATES = 2000;
	final int DUPLICATE_KEY = 13;
	
	try {
		TupleGenerator generator1 = new TupleGenerator(SEED1, 500, 4096, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		TupleGenerator generator2 = new TupleGenerator(SEED2, 500, 2048, KeyMode.RANDOM, ValueMode.RANDOM_LENGTH);
		
		final TestData.TupleGeneratorIterator gen1Iter = new TestData.TupleGeneratorIterator(generator1, INPUT_1_SIZE);
		final TestData.TupleGeneratorIterator gen2Iter = new TestData.TupleGeneratorIterator(generator2, INPUT_2_SIZE);
		
		final TestData.TupleConstantValueIterator const1Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "LEFT String for Duplicate Keys", INPUT_1_DUPLICATES);
		final TestData.TupleConstantValueIterator const2Iter = new TestData.TupleConstantValueIterator(DUPLICATE_KEY, "RIGHT String for Duplicate Keys", INPUT_2_DUPLICATES);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList1 = new ArrayList<>();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		final List<MutableObjectIterator<Tuple2<Integer, String>>> inList2 = new ArrayList<>();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);
		
		MutableObjectIterator<Tuple2<Integer, String>> input1 = new UnionIterator<>(inList1);
		MutableObjectIterator<Tuple2<Integer, String>> input2 = new UnionIterator<>(inList2);
		
		
		// collect expected data
		final Map<Integer, Collection<TupleMatch>> expectedMatchesMap = joinTuples(
				collectTupleData(input1),
				collectTupleData(input2));
		
		// re-create the whole thing for actual processing
		
		// reset the generators and iterators
		generator1.reset();
		generator2.reset();
		const1Iter.reset();
		const2Iter.reset();
		gen1Iter.reset();
		gen2Iter.reset();
		
		inList1.clear();
		inList1.add(gen1Iter);
		inList1.add(const1Iter);
		
		inList2.clear();
		inList2.add(gen2Iter);
		inList2.add(const2Iter);

		input1 = new UnionIterator<>(inList1);
		input2 = new UnionIterator<>(inList2);
		
		final TupleMatchRemovingJoin matcher = new TupleMatchRemovingJoin(expectedMatchesMap);
		final Collector<Tuple2<Integer, String>> collector = new DiscardingOutputCollector<>();

		NonReusingBuildSecondHashJoinIterator<Tuple2<Integer, String>, Tuple2<Integer, String>, Tuple2<Integer, String>> iterator =
			new NonReusingBuildSecondHashJoinIterator<>(
				input1, input2, this.recordSerializer, this.record1Comparator, 
				this.recordSerializer, this.record2Comparator, this.recordPairComparator,
				this.memoryManager, ioManager, this.parentTask, 1.0, false, false, true);
		
		iterator.open();
		
		while (iterator.callWithNextKey(matcher, collector));
		
		iterator.close();

		// assert that each expected match was seen
		for (Entry<Integer, Collection<TupleMatch>> entry : expectedMatchesMap.entrySet()) {
			if (!entry.getValue().isEmpty()) {
				Assert.fail("Collection for key " + entry.getKey() + " is not empty");
			}
		}
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail("An exception occurred during the test: " + e.getMessage());
	}
}
 
Example #27
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #28
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example #29
Source File: HashTableITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursionsIntPair() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<IntPair> build1 = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<IntPair> build2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> build3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> builds = new ArrayList<MutableObjectIterator<IntPair>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<IntPair> buildInput = new UnionIterator<IntPair>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probe1 = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<IntPair> probe2 = new ConstantsIntPairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<IntPair> probe3 = new ConstantsIntPairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<IntPair>> probes = new ArrayList<MutableObjectIterator<IntPair>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<IntPair> probeInput = new UnionIterator<IntPair>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();

	try {
		while (join.nextRecord())
		{	
			MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example #30
Source File: BinaryHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithMassiveCollisions() throws IOException {
	// the following two values are known to have a hash-code collision on the initial level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);
	MemoryManager memManager = new MemoryManager(896 * PAGE_SIZE, 1);
	// ----------------------------------------------------------------------------------------

	final BinaryHashTable table = newBinaryHashTable(
			this.buildSideSerializer, this.probeSideSerializer,
			new MyProjection(), new MyProjection(), memManager,
			896 * PAGE_SIZE, ioManager);

	final BinaryRow recordReuse = new BinaryRow(2);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			testJoin(table, map);
		}
	}

	while (table.nextMatching()){
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}