Java Code Examples for org.apache.flink.util.MutableObjectIterator#next()

The following examples show how to use org.apache.flink.util.MutableObjectIterator#next() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BinaryMergeIteratorTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testOneStream() throws Exception {
	List<MutableObjectIterator<BinaryRow>> iterators = new ArrayList<>();
	iterators.add(newIterator(
			new int[]{1, 2, 4, 5, 10}, new String[]{"1", "2", "4", "5", "10"}));

	final int[] expected = new int[]{1, 2, 4, 5, 10};

	MutableObjectIterator<BinaryRow> iterator =
			new BinaryMergeIterator<>(
					iterators,
					Collections.singletonList(serializer.createInstance()),
					(o1, o2) -> this.comparator.compare(o1, o2));

	BinaryRow row = serializer.createInstance();

	int pos = 0;
	while ((row = iterator.next(row)) != null) {
		Assert.assertEquals(expected[pos++], row.getInt(0));
	}
}
 
Example 2
Source File: AbstractSortMergeOuterJoinIteratorITCase.java    From flink with Apache License 2.0 6 votes vote down vote up
private Map<Integer, Collection<String>> collectData(MutableObjectIterator<Tuple2<Integer, String>> iter)
		throws Exception {
	final Map<Integer, Collection<String>> map = new HashMap<>();
	Tuple2<Integer, String> pair = new Tuple2<>();

	while ((pair = iter.next(pair)) != null) {
		final Integer key = pair.getField(0);

		if (!map.containsKey(key)) {
			map.put(key, new ArrayList<String>());
		}

		Collection<String> values = map.get(key);
		final String value = pair.getField(1);
		values.add(value);
	}

	return map;
}
 
Example 3
Source File: BinaryHashTableTest.java    From flink with Apache License 2.0 6 votes vote down vote up
private int join(
		BinaryHashTable table,
		MutableObjectIterator<BinaryRow> buildInput,
		MutableObjectIterator<BinaryRow> probeInput,
		boolean buildOuterJoin) throws IOException {
	int count = 0;

	BinaryRow reuseBuildSizeRow = buildSideSerializer.createInstance();
	BinaryRow buildRow;
	while ((buildRow = buildInput.next(reuseBuildSizeRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)){
			count += joinWithNextKey(table, buildOuterJoin);
		}
	}

	while (table.nextMatching()){
		count += joinWithNextKey(table, buildOuterJoin);
	}
	return count;
}
 
Example 4
Source File: DataSourceTaskTest.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
public static void prepareInputFile(MutableObjectIterator<Record> inIt, File inputFile, boolean insertInvalidData)
throws IOException {

	try (BufferedWriter bw = new BufferedWriter(new FileWriter(inputFile))) {
		if (insertInvalidData) {
			bw.write("####_I_AM_INVALID_########\n");
		}

		Record rec = new Record();
		while ((rec = inIt.next(rec)) != null) {
			IntValue key = rec.getField(0, IntValue.class);
			IntValue value = rec.getField(1, IntValue.class);

			bw.write(key.getValue() + "_" + value.getValue() + "\n");
		}
		if (insertInvalidData) {
			bw.write("####_I_AM_INVALID_########\n");
		}

		bw.flush();
	}
}
 
Example 5
Source File: IterationHeadTask.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void streamOutFinalOutputBulk(MutableObjectIterator<X> results) throws IOException {
	final Collector<X> out = this.finalOutputCollector;
	X record = this.solutionTypeSerializer.getSerializer().createInstance();

	while ((record = results.next(record)) != null) {
		out.collect(record);
	}
}
 
Example 6
Source File: IterationHeadTask.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void readInitialSolutionSet(JoinHashMap<X> solutionSet, MutableObjectIterator<X> solutionSetInput) throws IOException {
	TypeSerializer<X> serializer = solutionTypeSerializer.getSerializer();

	X next;
	while ((next = solutionSetInput.next(serializer.createInstance())) != null) {
		solutionSet.insertOrReplace(next);
	}
}
 
Example 7
Source File: MutableHashTableTestBase.java    From flink with Apache License 2.0 5 votes vote down vote up
@Test
public void testEntryIterator() throws Exception {
	final int NUM_MEM_PAGES = SIZE * NUM_LISTS / PAGE_SIZE;
	AbstractMutableHashTable<IntList> table = getHashTable(serializerV, comparatorV, getMemory(NUM_MEM_PAGES));

	final Random rnd = new Random(RANDOM_SEED);
	final IntList[] lists = getRandomizedIntLists(NUM_LISTS, rnd);

	table.open();
	int result = 0;
	for (int i = 0; i < NUM_LISTS; i++) {
		table.insert(lists[i]);
		result += lists[i].getKey();
	}

	MutableObjectIterator<IntList> iter = table.getEntryIterator();
	IntList target = new IntList();

	int sum = 0;
	while((target = iter.next(target)) != null) {
		sum += target.getKey();
	}
	table.close();

	assertTrue(sum == result);
	assertEquals("Memory lost", NUM_MEM_PAGES, table.getFreeMemory().size());
}
 
Example 8
Source File: ExternalSortITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testInMemorySort() {
	try {
		// comparator
		final TypeComparator<Integer> keyComparator = new IntComparator(true);
		
		final TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.CONSTANT, VAL);
		final MutableObjectIterator<Tuple2<Integer, String>> source = new TestData.TupleGeneratorIterator(generator, NUM_PAIRS);

		// merge iterator
		LOG.debug("Initializing sortmerger...");
		
		Sorter<Tuple2<Integer, String>> merger = new UnilateralSortMerger<>(this.memoryManager, this.ioManager,
			source, this.parentTask, this.pactRecordSerializer, this.pactRecordComparator,
				(double)64/78, 2, 0.9f, true /*use large record handler*/, true);

		// emit data
		LOG.debug("Reading and sorting data...");

		// check order
		MutableObjectIterator<Tuple2<Integer, String>> iterator = merger.getIterator();
		
		LOG.debug("Checking results...");
		int pairsEmitted = 1;

		Tuple2<Integer, String> rec1 = new Tuple2<>();
		Tuple2<Integer, String> rec2 = new Tuple2<>();
		
		Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
		while ((rec2 = iterator.next(rec2)) != null) {
			pairsEmitted++;
			
			Assert.assertTrue(keyComparator.compare(rec1.f0, rec2.f0) <= 0);

			Tuple2<Integer, String> tmp = rec1;
			rec1 = rec2;
			rec2 = tmp;
		}
		Assert.assertTrue(NUM_PAIRS == pairsEmitted);
		
		merger.close();
		testSuccess = true;
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 9
Source File: HashTableITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinOneRecursionPerformanceIntPair() throws IOException
{
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key
	MutableObjectIterator<IntPair> buildInput = new UniformIntPairGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<IntPair> probeInput = new UniformIntPairGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);

	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<IntPair, IntPair> join = new MutableHashTable<IntPair, IntPair>(
			this.pairBuildSideAccesssor, this.pairProbeSideAccesssor, 
			this.pairBuildSideComparator, this.pairProbeSideComparator, this.pairComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final IntPair recordReuse = new IntPair();
	int numRecordsInJoinResult = 0;
	
	while (join.nextRecord()) {
		MutableObjectIterator<IntPair> buildSide = join.getBuildSideIterator();
		while (buildSide.next(recordReuse) != null) {
			numRecordsInJoinResult++;
		}
	}
	Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult);
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example 10
Source File: HashTableITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSparseProbeSpillingWithOuterJoin() throws IOException, MemoryAllocationException
{
	final int NUM_BUILD_KEYS = 1000000;
	final int NUM_BUILD_VALS = 1;
	final int NUM_PROBE_KEYS = 20;
	final int NUM_PROBE_VALS = 1;

	MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(
			NUM_BUILD_KEYS, NUM_BUILD_VALS, false);

	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 96);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}

	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor,
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, new UniformRecordGenerator(NUM_PROBE_KEYS, NUM_PROBE_VALS, true), true);

	int expectedNumResults = (Math.max(NUM_PROBE_KEYS, NUM_BUILD_KEYS) * NUM_BUILD_VALS)
			* NUM_PROBE_VALS;

	final Record recordReuse = new Record();
	int numRecordsInJoinResult = 0;

	while (join.nextRecord()) {
		MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
		while (buildSide.next(recordReuse) != null) {
			numRecordsInJoinResult++;
		}
	}
	Assert.assertEquals("Wrong number of records in join result.", expectedNumResults, numRecordsInJoinResult);

	join.close();

	this.memManager.release(join.getFreedMemory());
}
 
Example 11
Source File: SumHashAggTestOperator.java    From flink with Apache License 2.0 4 votes vote down vote up
public void endInput() throws Exception {

		StreamRecord<BaseRow> outElement = new StreamRecord<>(null);
		JoinedRow hashAggOutput = new JoinedRow();
		GenericRow aggValueOutput = new GenericRow(1);

		if (sorter == null) {
			// no spilling, output by iterating aggregate map.
			MutableObjectIterator<BytesHashMap.Entry> iter = aggregateMap.getEntryIterator();

			BinaryRow reuseAggMapKey = new BinaryRow(1);
			BinaryRow reuseAggBuffer = new BinaryRow(1);
			BytesHashMap.Entry reuseAggMapEntry = new BytesHashMap.Entry(reuseAggMapKey, reuseAggBuffer);

			while (iter.next(reuseAggMapEntry) != null) {
				// set result and output
				aggValueOutput.setField(0, reuseAggBuffer.isNullAt(0) ? null : reuseAggBuffer.getLong(0));
				hashAggOutput.replace(reuseAggMapKey, aggValueOutput);
				getOutput().collect(outElement.replace(hashAggOutput));
			}
		} else {
			// spill last part of input' aggregation output buffer
			sorter.sortAndSpill(
					aggregateMap.getRecordAreaMemorySegments(),
					aggregateMap.getNumElements(),
					new BytesHashMapSpillMemorySegmentPool(aggregateMap.getBucketAreaMemorySegments()));

			// only release non-data memory in advance.
			aggregateMap.free(true);

			// fall back to sort based aggregation
			BinaryRow lastKey = null;
			JoinedRow fallbackInput = new JoinedRow();
			boolean aggSumIsNull = false;
			long aggSum = -1;

			// free hash map memory, but not release back to memory manager
			MutableObjectIterator<Tuple2<BinaryRow, BinaryRow>> iterator = sorter.getKVIterator();
			Tuple2<BinaryRow, BinaryRow> kv;
			while ((kv = iterator.next()) != null) {
				BinaryRow key = kv.f0;
				BinaryRow value = kv.f1;
				// prepare input
				fallbackInput.replace(key, value);
				if (lastKey == null) {
					// found first key group
					lastKey = key.copy();
					aggSumIsNull = true;
					aggSum = -1L;
				} else if (key.getSizeInBytes() != lastKey.getSizeInBytes() ||
						!(BinaryRowUtil.byteArrayEquals(
								key.getSegments()[0].getArray(),
								lastKey.getSegments()[0].getArray(),
								key.getSizeInBytes()))) {

					// output current group aggregate result
					aggValueOutput.setField(0, aggSumIsNull ? null : aggSum);
					hashAggOutput.replace(lastKey, aggValueOutput);
					getOutput().collect(outElement.replace(hashAggOutput));

					// found new group
					lastKey = key.copy();
					aggSumIsNull = true;
					aggSum = -1L;
				}

				if (!fallbackInput.isNullAt(1)) {
					long sumInput = fallbackInput.getLong(1);
					if (aggSumIsNull) {
						aggSum = sumInput;
					} else {
						aggSum = aggSum + sumInput;
					}
					aggSumIsNull = false;
				}
			}

			// output last key group aggregate result
			aggValueOutput.setField(0, aggSumIsNull ? null : aggSum);
			hashAggOutput.replace(lastKey, aggValueOutput);
			getOutput().collect(outElement.replace(hashAggOutput));
		}
	}
 
Example 12
Source File: NormalizedKeySorterTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSortShortStringKeys() throws Exception {
	final int numSegments = MEMORY_SIZE / MEMORY_PAGE_SIZE;
	final List<MemorySegment> memory = this.memoryManager.allocatePages(new DummyInvokable(), numSegments);
	
	@SuppressWarnings("unchecked")
	TypeComparator<Tuple2<Integer, String>> accessors = TestData.getIntStringTupleTypeInfo().createComparator(new int[]{1}, new boolean[]{true}, 0, null);
	NormalizedKeySorter<Tuple2<Integer, String>> sorter = new NormalizedKeySorter<>(TestData.getIntStringTupleSerializer(), accessors, memory);
	
	TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, 5, KeyMode.RANDOM,
		ValueMode.FIX_LENGTH);
	
	// write the records
	Tuple2<Integer, String> record = new Tuple2<>();
	do {
		generator.next(record);
	}
	while (sorter.write(record));
	
	QuickSort qs = new QuickSort();
	qs.sort(sorter);
	
	MutableObjectIterator<Tuple2<Integer, String>> iter = sorter.getIterator();
	Tuple2<Integer, String> readTarget = new Tuple2<>();

	iter.next(readTarget);
	String last = readTarget.f1;
	
	while ((readTarget = iter.next(readTarget)) != null) {
		String current = readTarget.f1;
		
		final int cmp = last.compareTo(current);
		if (cmp > 0) {
			Assert.fail("Next value is not larger or equal to previous value.");
		}
		
		last = current;
	}
	
	// release the memory occupied by the buffers
	sorter.dispose();
	this.memoryManager.release(memory);
}
 
Example 13
Source File: HashTableITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinOneRecursionPerformance() throws IOException
{
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key
	MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probeInput = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);

	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();
	int numRecordsInJoinResult = 0;
	
	while (join.nextRecord()) {
		MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
		while (buildSide.next(recordReuse) != null) {
			numRecordsInJoinResult++;
		}
	}
	Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult);
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example 14
Source File: ExternalSortITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingSortWithIntermediateMergeIntPair() {
	try {
		// amount of pairs
		final int PAIRS = 50000000;

		// comparator
		final RandomIntPairGenerator generator = new RandomIntPairGenerator(12345678, PAIRS);
		
		final TypeSerializerFactory<IntPair> serializerFactory = new IntPairSerializer.IntPairSerializerFactory();
		final TypeComparator<IntPair> comparator = new TestData.IntPairComparator();
		
		// merge iterator
		LOG.debug("Initializing sortmerger...");
		
		Sorter<IntPair> merger = new UnilateralSortMerger<IntPair>(this.memoryManager, this.ioManager, 
				generator, this.parentTask, serializerFactory, comparator, (double)64/78, 4, 0.7f,
				true /*use large record handler*/, true);

		// emit data
		LOG.debug("Emitting data...");
		
		// check order
		MutableObjectIterator<IntPair> iterator = merger.getIterator();
		
		LOG.debug("Checking results...");
		int pairsRead = 1;
		int nextStep = PAIRS / 20;

		IntPair rec1 = new IntPair();
		IntPair rec2 = new IntPair();
		
		Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
		
		while ((rec2 = iterator.next(rec2)) != null) {
			final int k1 = rec1.getKey();
			final int k2 = rec2.getKey();
			pairsRead++;
			
			Assert.assertTrue(k1 - k2 <= 0); 
			
			IntPair tmp = rec1;
			rec1 = rec2;
			rec2 = tmp;
			
			// log
			if (pairsRead == nextStep) {
				nextStep += PAIRS / 20;
			}
		}
		Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
		merger.close();
		testSuccess = true;
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 15
Source File: HashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Tests that the MutableHashTable spills its partitions when creating the initial table
 * without overflow segments in the partitions. This means that the records are large.
 */
@Test
public void testSpillingWhenBuildingTableWithoutOverflow() throws Exception {
	try (final IOManager ioMan = new IOManagerAsync()) {
		final TypeSerializer<byte[]> serializer = BytePrimitiveArraySerializer.INSTANCE;
		final TypeComparator<byte[]> buildComparator = new BytePrimitiveArrayComparator(true);
		final TypeComparator<byte[]> probeComparator = new BytePrimitiveArrayComparator(true);

		@SuppressWarnings("unchecked") final TypePairComparator<byte[], byte[]> pairComparator =
			new GenericPairComparator<>(
				new BytePrimitiveArrayComparator(true), new BytePrimitiveArrayComparator(true));

		final int pageSize = 128;
		final int numSegments = 33;

		List<MemorySegment> memory = getMemory(numSegments, pageSize);

		MutableHashTable<byte[], byte[]> table = new MutableHashTable<byte[], byte[]>(
			serializer,
			serializer,
			buildComparator,
			probeComparator,
			pairComparator,
			memory,
			ioMan,
			1,
			false);

		int numElements = 9;

		table.open(
			new CombiningIterator<byte[]>(
				new ByteArrayIterator(numElements, 128, (byte) 0),
				new ByteArrayIterator(numElements, 128, (byte) 1)),
			new CombiningIterator<byte[]>(
				new ByteArrayIterator(1, 128, (byte) 0),
				new ByteArrayIterator(1, 128, (byte) 1)));

		while (table.nextRecord()) {
			MutableObjectIterator<byte[]> iterator = table.getBuildSideIterator();

			int counter = 0;

			while (iterator.next() != null) {
				counter++;
			}

			// check that we retrieve all our elements
			Assert.assertEquals(numElements, counter);
		}

		table.close();
	}
}
 
Example 16
Source File: HashTableITCase.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Test
public void testFailingHashJoinTooManyRecursions() throws IOException
{
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int REPEATED_VALUE_1 = 40559;
	final int REPEATED_VALUE_2 = 92882;
	final int REPEATED_VALUE_COUNT = 3000000; 
	
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<Record> build1 = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);
	MutableObjectIterator<Record> build2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> build3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> builds = new ArrayList<MutableObjectIterator<Record>>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<Record> buildInput = new UnionIterator<Record>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probe1 = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);
	MutableObjectIterator<Record> probe2 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_1, 17, REPEATED_VALUE_COUNT);
	MutableObjectIterator<Record> probe3 = new ConstantsKeyValuePairsIterator(REPEATED_VALUE_2, 23, REPEATED_VALUE_COUNT);
	List<MutableObjectIterator<Record>> probes = new ArrayList<MutableObjectIterator<Record>>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<Record> probeInput = new UnionIterator<Record>(probes);
	
	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();

	try {
		while (join.nextRecord()) {	
			MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
			if (buildSide.next(recordReuse) == null) {
				fail("No build side values found for a probe key.");
			}
			while (buildSide.next(recordReuse) != null);
		}
		
		fail("Hash Join must have failed due to too many recursions.");
	}
	catch (Exception ex) {
		// expected
	}
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example 17
Source File: LongHashTableTest.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinWithTwoRecursions() throws IOException {
	// the following two values are known to have a hash-code collision on the first recursion level.
	// we use them to make sure one partition grows over-proportionally large
	final int repeatedValue1 = 40559;
	final int repeatedValue2 = 92882;
	final int repeatedValueCountBuild = 200000;
	final int repeatedValueCountProbe = 5;

	final int numKeys = 1000000;
	final int buildValsPerKey = 3;
	final int probeValsPerKey = 10;

	// create a build input that gives 3 million pairs with 3 values sharing the same key, plus 400k pairs with two colliding keys
	MutableObjectIterator<BinaryRow> build1 = new UniformBinaryRowGenerator(numKeys, buildValsPerKey, false);
	MutableObjectIterator<BinaryRow> build2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, repeatedValueCountBuild);
	MutableObjectIterator<BinaryRow> build3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, repeatedValueCountBuild);
	List<MutableObjectIterator<BinaryRow>> builds = new ArrayList<>();
	builds.add(build1);
	builds.add(build2);
	builds.add(build3);
	MutableObjectIterator<BinaryRow> buildInput = new UnionIterator<>(builds);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<BinaryRow> probe1 = new UniformBinaryRowGenerator(numKeys, probeValsPerKey, true);
	MutableObjectIterator<BinaryRow> probe2 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue1, 17, 5);
	MutableObjectIterator<BinaryRow> probe3 = new BinaryHashTableTest.ConstantsKeyValuePairsIterator(repeatedValue2, 23, 5);
	List<MutableObjectIterator<BinaryRow>> probes = new ArrayList<>();
	probes.add(probe1);
	probes.add(probe2);
	probes.add(probe3);
	MutableObjectIterator<BinaryRow> probeInput = new UnionIterator<>(probes);

	// create the map for validating the results
	HashMap<Integer, Long> map = new HashMap<>(numKeys);

	final MyHashTable table = new MyHashTable(896 * PAGE_SIZE);

	BinaryRow buildRow = buildSideSerializer.createInstance();
	while ((buildRow = buildInput.next(buildRow)) != null) {
		table.putBuildRow(buildRow);
	}
	table.endBuild();

	BinaryRow probeRow = probeSideSerializer.createInstance();
	while ((probeRow = probeInput.next(probeRow)) != null) {
		if (table.tryProbe(probeRow)) {
			testJoin(table, map);
		}
	}

	while (table.nextMatching()) {
		testJoin(table, map);
	}

	table.close();

	Assert.assertEquals("Wrong number of keys", numKeys, map.size());
	for (Map.Entry<Integer, Long> entry : map.entrySet()) {
		long val = entry.getValue();
		int key = entry.getKey();

		Assert.assertEquals("Wrong number of values in per-key cross product for key " + key,
				(key == repeatedValue1 || key == repeatedValue2) ?
						(probeValsPerKey + repeatedValueCountProbe) * (buildValsPerKey + repeatedValueCountBuild) :
						probeValsPerKey * buildValsPerKey, val);
	}

	// ----------------------------------------------------------------------------------------

	table.free();
}
 
Example 18
Source File: ExternalSortITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingSortWithIntermediateMerge() {
	try {
		// amount of pairs
		final int PAIRS = 10000000;

		// comparator
		final TypeComparator<Integer> keyComparator = new IntComparator(true);

		final TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
		final MutableObjectIterator<Tuple2<Integer, String>> source = new TestData.TupleGeneratorIterator(generator, PAIRS);
		
		// merge iterator
		LOG.debug("Initializing sortmerger...");
		
		Sorter<Tuple2<Integer, String>> merger = new UnilateralSortMerger<>(this.memoryManager, this.ioManager,
				source, this.parentTask, this.pactRecordSerializer, this.pactRecordComparator,
				(double)64/78, 16, 0.7f, true /*use large record handler*/, false);
		
		// emit data
		LOG.debug("Emitting data...");

		// check order
		MutableObjectIterator<Tuple2<Integer, String>> iterator = merger.getIterator();
		
		LOG.debug("Checking results...");
		int pairsRead = 1;
		int nextStep = PAIRS / 20;

		Tuple2<Integer, String> rec1 = new Tuple2<>();
		Tuple2<Integer, String> rec2 = new Tuple2<>();
		
		Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
		while ((rec2 = iterator.next(rec2)) != null) {
			pairsRead++;
			
			Assert.assertTrue(keyComparator.compare(rec1.f0, rec2.f0) <= 0);

			Tuple2<Integer, String> tmp = rec1;
			rec1 = rec2;
			rec2 = tmp;
			
			// log
			if (pairsRead == nextStep) {
				nextStep += PAIRS / 20;
			}
			
		}
		Assert.assertEquals("Not all pairs were read back in.", PAIRS, pairsRead);
		merger.close();
		testSuccess = true;
	}
	catch (Exception e) {
		e.printStackTrace();
		Assert.fail(e.getMessage());
	}
}
 
Example 19
Source File: HashTableITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSpillingHashJoinOneRecursionPerformance() throws IOException
{
	final int NUM_KEYS = 1000000;
	final int BUILD_VALS_PER_KEY = 3;
	final int PROBE_VALS_PER_KEY = 10;
	
	// create a build input that gives 3 million pairs with 3 values sharing the same key
	MutableObjectIterator<Record> buildInput = new UniformRecordGenerator(NUM_KEYS, BUILD_VALS_PER_KEY, false);

	// create a probe input that gives 10 million pairs with 10 values sharing a key
	MutableObjectIterator<Record> probeInput = new UniformRecordGenerator(NUM_KEYS, PROBE_VALS_PER_KEY, true);

	// allocate the memory for the HashTable
	List<MemorySegment> memSegments;
	try {
		memSegments = this.memManager.allocatePages(MEM_OWNER, 896);
	}
	catch (MemoryAllocationException maex) {
		fail("Memory for the Join could not be provided.");
		return;
	}
	
	// ----------------------------------------------------------------------------------------
	
	final MutableHashTable<Record, Record> join = new MutableHashTable<Record, Record>(
			this.recordBuildSideAccesssor, this.recordProbeSideAccesssor, 
			this.recordBuildSideComparator, this.recordProbeSideComparator, this.pactRecordComparator,
			memSegments, ioManager);
	join.open(buildInput, probeInput);
	
	final Record recordReuse = new Record();
	int numRecordsInJoinResult = 0;
	
	while (join.nextRecord()) {
		MutableObjectIterator<Record> buildSide = join.getBuildSideIterator();
		while (buildSide.next(recordReuse) != null) {
			numRecordsInJoinResult++;
		}
	}
	Assert.assertEquals("Wrong number of records in join result.", NUM_KEYS * BUILD_VALS_PER_KEY * PROBE_VALS_PER_KEY, numRecordsInJoinResult);
	
	join.close();
	
	// ----------------------------------------------------------------------------------------
	
	this.memManager.release(join.getFreedMemory());
}
 
Example 20
Source File: CombiningUnilateralSortMergerITCase.java    From flink with Apache License 2.0 4 votes vote down vote up
@Test
public void testSortAndValidate() throws Exception
{
	final Hashtable<Integer, Integer> countTable = new Hashtable<>(KEY_MAX);
	for (int i = 1; i <= KEY_MAX; i++) {
		countTable.put(i, 0);
	}

	// comparator
	final TypeComparator<Integer> keyComparator = new IntComparator(true);

	// reader
	TestData.MockTuple2Reader<Tuple2<Integer, String>> reader = TestData.getIntStringTupleReader();

	// merge iterator
	LOG.debug("initializing sortmerger");
	
	TestCountCombiner2 comb = new TestCountCombiner2();
	
	Sorter<Tuple2<Integer, String>> merger = new CombiningUnilateralSortMerger<>(comb,
			this.memoryManager, this.ioManager, reader, this.parentTask, this.serializerFactory1, this.comparator1,
			0.25, 2, 0.7f, true /* use large record handler */, false);

	// emit data
	LOG.debug("emitting data");
	TestData.TupleGenerator generator = new TestData.TupleGenerator(SEED, KEY_MAX, VALUE_LENGTH, KeyMode.RANDOM, ValueMode.FIX_LENGTH);
	Tuple2<Integer, String> rec = new Tuple2<>();
	
	for (int i = 0; i < NUM_PAIRS; i++) {
		Assert.assertTrue((rec = generator.next(rec)) != null);
		final Integer key = rec.f0;
		rec.setField("1", 1);
		reader.emit(rec);
		
		countTable.put(key, countTable.get(key) + 1);
	}
	reader.close();

	// check order
	MutableObjectIterator<Tuple2<Integer, String>> iterator = merger.getIterator();
	
	LOG.debug("checking results");
	
	Tuple2<Integer, String> rec1 = new Tuple2<>();
	Tuple2<Integer, String> rec2 = new Tuple2<>();
	
	Assert.assertTrue((rec1 = iterator.next(rec1)) != null);
	countTable.put(rec1.f0, countTable.get(rec1.f0) - (Integer.parseInt(rec1.f1)));

	while ((rec2 = iterator.next(rec2)) != null) {
		int k1 = rec1.f0;
		int k2 = rec2.f0;
		
		Assert.assertTrue(keyComparator.compare(k1, k2) <= 0); 
		countTable.put(k2, countTable.get(k2) - (Integer.parseInt(rec2.f1)));
		
		rec1 = rec2;
	}

	for (Integer cnt : countTable.values()) {
		Assert.assertTrue(cnt == 0);
	}
	
	merger.close();
	
	// if the combiner was opened, it must have been closed
	Assert.assertTrue(comb.opened == comb.closed);
}