Java Code Examples for org.apache.flink.core.memory.MemorySegment#getInt()

The following examples show how to use org.apache.flink.core.memory.MemorySegment#getInt() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SerializedUpdateBuffer.java    From flink with Apache License 2.0 6 votes vote down vote up
private ReadEnd(MemorySegment firstMemSegment, LinkedBlockingQueue<MemorySegment> emptyBufferTarget,
								Deque<MemorySegment> fullBufferSource, BlockChannelReader<MemorySegment> spilledBufferSource,
								List<MemorySegment> emptyBuffers, int numBuffersSpilled)
	throws IOException {
	super(firstMemSegment, firstMemSegment.getInt(0), HEADER_LENGTH);

	this.emptyBufferTarget = emptyBufferTarget;
	this.fullBufferSource = fullBufferSource;

	this.spilledBufferSource = spilledBufferSource;

	requestsRemaining = numBuffersSpilled;
	this.spilledBuffersRemaining = numBuffersSpilled;

	// send the first requests
	while (requestsRemaining > 0 && emptyBuffers.size() > 0) {
		this.spilledBufferSource.readBlock(emptyBuffers.remove(emptyBuffers.size() - 1));
		requestsRemaining--;
	}
}
 
Example 2
Source File: MutableHashTable.java    From Flink-CEPplus with Apache License 2.0 6 votes vote down vote up
/**
 * Set all the bucket memory except bucket header as the bit set of bloom filter, and use hash code of build records
 * to build bloom filter.
 */
final void buildBloomFilterForBucket(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	final int count = bucket.getShort(bucketInSegmentPos + HEADER_COUNT_OFFSET);
	if (count <= 0) {
		return;
	}

	int[] hashCodes = new int[count];
	// As the hashcode and bloom filter occupy same bytes, so we read all hashcode out at first and then write back to bloom filter.
	for (int i = 0; i < count; i++) {
		hashCodes[i] = bucket.getInt(bucketInSegmentPos + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
	}
	this.bloomFilter.setBitsLocation(bucket, bucketInSegmentPos + BUCKET_HEADER_LENGTH);
	for (int hashCode : hashCodes) {
		this.bloomFilter.addHash(hashCode);
	}
	buildBloomFilterForExtraOverflowSegments(bucketInSegmentPos, bucket, p);
}
 
Example 3
Source File: MutableHashTable.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Set all the bucket memory except bucket header as the bit set of bloom filter, and use hash code of build records
 * to build bloom filter.
 */
final void buildBloomFilterForBucket(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	final int count = bucket.getShort(bucketInSegmentPos + HEADER_COUNT_OFFSET);
	if (count <= 0) {
		return;
	}

	int[] hashCodes = new int[count];
	// As the hashcode and bloom filter occupy same bytes, so we read all hashcode out at first and then write back to bloom filter.
	for (int i = 0; i < count; i++) {
		hashCodes[i] = bucket.getInt(bucketInSegmentPos + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
	}
	this.bloomFilter.setBitsLocation(bucket, bucketInSegmentPos + BUCKET_HEADER_LENGTH);
	for (int hashCode : hashCodes) {
		this.bloomFilter.addHash(hashCode);
	}
	buildBloomFilterForExtraOverflowSegments(bucketInSegmentPos, bucket, p);
}
 
Example 4
Source File: SkipListKeySerializer.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Gets serialized key and namespace from the byte buffer.
 *
 * @param memorySegment the memory segment which stores the skip list key.
 * @param offset     the start position of the skip list key in the byte buffer.
 * @return tuple of serialized key and namespace.
 */
Tuple2<byte[], byte[]> getSerializedKeyAndNamespace(MemorySegment memorySegment, int offset) {
	// read namespace
	int namespaceLen = memorySegment.getInt(offset);
	MemorySegment namespaceSegment = MemorySegmentFactory.allocateUnpooledSegment(namespaceLen);
	memorySegment.copyTo(offset + Integer.BYTES, namespaceSegment, 0, namespaceLen);

	// read key
	int keyOffset = offset + Integer.BYTES + namespaceLen;
	int keyLen = memorySegment.getInt(keyOffset);
	MemorySegment keySegment = MemorySegmentFactory.allocateUnpooledSegment(keyLen);
	memorySegment.copyTo(keyOffset + Integer.BYTES, keySegment, 0, keyLen);

	return Tuple2.of(keySegment.getArray(), namespaceSegment.getArray());
}
 
Example 5
Source File: IntNormalizedKeyComputer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void swapKey(MemorySegment segI, int offsetI, MemorySegment segJ, int offsetJ) {

	int temp0 = segI.getInt(offsetI);
	segI.putInt(offsetI, segJ.getInt(offsetJ));
	segJ.putInt(offsetJ, temp0);

	byte temp1 = segI.get(offsetI + 4);
	segI.put(offsetI + 4, segJ.get(offsetJ + 4));
	segJ.put(offsetJ + 4, temp1);

}
 
Example 6
Source File: BufferFileWriterReaderTest.java    From flink with Apache License 2.0 5 votes vote down vote up
static int verifyBufferFilledWithAscendingNumbers(Buffer buffer, int currentNumber) {
	MemorySegment segment = buffer.getMemorySegment();

	int size = buffer.getSize();

	for (int i = 0; i < size; i += 4) {
		if (segment.getInt(i) != currentNumber++) {
			throw new IllegalStateException("Read unexpected number from buffer.");
		}
	}

	return currentNumber;
}
 
Example 7
Source File: MurmurHashUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
private static int hashBytesByInt(MemorySegment segment, int offset, int lengthInBytes, int seed) {
	assert (lengthInBytes % 4 == 0);
	int h1 = seed;
	for (int i = 0; i < lengthInBytes; i += 4) {
		int halfWord = segment.getInt(offset + i);
		int k1 = mixK1(halfWord);
		h1 = mixH1(h1, k1);
	}
	return h1;
}
 
Example 8
Source File: MurmurHashUtil.java    From flink with Apache License 2.0 5 votes vote down vote up
private static int hashBytesByInt(MemorySegment segment, int offset, int lengthInBytes, int seed) {
	assert (lengthInBytes % 4 == 0);
	int h1 = seed;
	for (int i = 0; i < lengthInBytes; i += 4) {
		int halfWord = segment.getInt(offset + i);
		int k1 = mixK1(halfWord);
		h1 = mixH1(h1, k1);
	}
	return h1;
}
 
Example 9
Source File: IntNormalizedKeyComputer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public int compareKey(MemorySegment segI, int offsetI, MemorySegment segJ, int offsetJ) {
	int int1 = segI.getInt(offsetI);
	int int2 = segJ.getInt(offsetJ);
	if (int1 != int2) {
		return (int1 < int2) ^ (int1 < 0) ^ (int2 < 0) ? -1 : 1;
	}

	byte byte1 = segI.get(offsetI + 4);
	byte byte2 = segJ.get(offsetJ + 4);
	if (byte1 != byte2) {
		return (byte1 < byte2) ^ (byte1 < 0) ^ (byte2 < 0) ? -1 : 1;
	}
	return 0;
}
 
Example 10
Source File: IOManagerAsyncTest.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
@Test
public void channelReadWriteOneSegment() {
	final int NUM_IOS = 1111;
	
	try {
		final FileIOChannel.ID channelID = this.ioManager.createChannel();
		final BlockChannelWriter<MemorySegment> writer = this.ioManager.createBlockChannelWriter(channelID);
		
		MemorySegment memSeg = MemorySegmentFactory.allocateUnpooledSegment(32 * 1024);
		
		for (int i = 0; i < NUM_IOS; i++) {
			for (int pos = 0; pos < memSeg.size(); pos += 4) {
				memSeg.putInt(pos, i);
			}
			
			writer.writeBlock(memSeg);
			memSeg = writer.getNextReturnedBlock();
		}
		
		writer.close();
		
		final BlockChannelReader<MemorySegment> reader = this.ioManager.createBlockChannelReader(channelID);
		for (int i = 0; i < NUM_IOS; i++) {
			reader.readBlock(memSeg);
			memSeg = reader.getNextReturnedBlock();
			
			for (int pos = 0; pos < memSeg.size(); pos += 4) {
				if (memSeg.getInt(pos) != i) {
					fail("Read memory segment contains invalid data.");
				}
			}
		}
		
		reader.closeAndDelete();
	}
	catch (Exception ex) {
		ex.printStackTrace();
		fail("Test encountered an exception: " + ex.getMessage());
	}
}
 
Example 11
Source File: SerializedUpdateBuffer.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
@Override
protected int getLimitForSegment(MemorySegment segment) {
	return segment.getInt(0);
}
 
Example 12
Source File: BinaryHashBucketArea.java    From flink with Apache License 2.0 4 votes vote down vote up
private void buildBloomFilterAndFree(
		MemorySegment[] buckets,
		int numBuckets,
		MemorySegment[] overflowSegments) {
	if (table.useBloomFilters) {
		long numRecords = (long) Math.max(partition.getBuildSideRecordCount() * 1.5, estimatedRowCount);

		// BloomFilter size min of:
		// 1.remain buffers
		// 2.bf size for numRecords when fpp is 0.05
		// 3.max init bucket area buffers.
		int segSize = Math.min(
				Math.min(table.remainBuffers(),
				HashTableBloomFilter.optimalSegmentNumber(numRecords, table.pageSize(), 0.05)),
				table.maxInitBufferOfBucketArea(table.partitionsBeingBuilt.size()));

		if (segSize > 0) {
			HashTableBloomFilter filter = new HashTableBloomFilter(
					table.getNextBuffers(MathUtils.roundDownToPowerOf2(segSize)), numRecords);

			// Add all records to bloom filter.
			int scanCount = -1;
			while (true) {
				scanCount++;
				if (scanCount >= numBuckets) {
					break;
				}
				// move to next bucket, update all the current bucket status with new bucket information.
				final int bucketArrayPos = scanCount >> table.bucketsPerSegmentBits;
				int bucketInSegOffset = (scanCount & table.bucketsPerSegmentMask) << BUCKET_SIZE_BITS;
				MemorySegment bucketSeg = buckets[bucketArrayPos];

				int countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
				int numInBucket = 0;
				while (countInBucket != 0) {
					int hashCodeOffset = bucketInSegOffset + BUCKET_HEADER_LENGTH;
					while (numInBucket < countInBucket) {
						filter.addHash(bucketSeg.getInt(hashCodeOffset));
						numInBucket++;
						hashCodeOffset += HASH_CODE_LEN;
					}

					// this segment is done. check if there is another chained bucket
					int forwardPointer = bucketSeg.getInt(bucketInSegOffset + HEADER_FORWARD_OFFSET);
					if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
						break;
					}

					final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
					bucketSeg = overflowSegments[overflowSegIndex];
					bucketInSegOffset = forwardPointer & table.segmentSizeMask;
					countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
					numInBucket = 0;
				}
			}

			partition.bloomFilter = filter;
		}
	}

	freeMemory(buckets, overflowSegments);
}
 
Example 13
Source File: BinaryHashBucketArea.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * For distinct build.
 */
private boolean findFirstSameBuildRow(
		MemorySegment bucket,
		int searchHashCode,
		int bucketInSegmentOffset,
		BinaryRow buildRowToInsert) {
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	int countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInBucket = 0;
	RandomAccessInputView view = partition.getBuildStateInputView();
	while (countInBucket != 0) {
		while (numInBucket < countInBucket) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			if (thisCode == searchHashCode) {
				final int pointer = bucket.getInt(bucketInSegmentOffset +
						BUCKET_POINTER_START_OFFSET + (numInBucket * POINTER_LEN));
				numInBucket++;
				try {
					view.setReadPosition(pointer);
					BinaryRow row = table.binaryBuildSideSerializer.mapFromPages(table.reuseBuildRow, view);
					if (buildRowToInsert.equals(row)) {
						return true;
					}
				} catch (IOException e) {
					throw new RuntimeException("Error deserializing key or value from the hashtable: " +
							e.getMessage(), e);
				}
			} else {
				numInBucket++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final int forwardPointer = bucket.getInt(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return false;
		}

		final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
		bucket = overflowSegments[overflowSegIndex];
		bucketInSegmentOffset = forwardPointer & table.segmentSizeMask;
		countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInBucket = 0;
	}
	return false;
}
 
Example 14
Source File: BinaryHashBucketArea.java    From flink with Apache License 2.0 4 votes vote down vote up
private void reHash(
		MemorySegment[] oldBuckets,
		int oldNumBuckets,
		MemorySegment[] oldOverflowSegments) throws IOException {
	long reHashStartTime = System.currentTimeMillis();
	inReHash = true;
	int scanCount = -1;
	while (true) {
		scanCount++;
		if (scanCount >= oldNumBuckets) {
			break;
		}
		// move to next bucket, update all the current bucket status with new bucket information.
		final int bucketArrayPos = scanCount >> table.bucketsPerSegmentBits;
		int bucketInSegOffset = (scanCount & table.bucketsPerSegmentMask) << BUCKET_SIZE_BITS;
		MemorySegment bucketSeg = oldBuckets[bucketArrayPos];

		int countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
		int numInBucket = 0;
		while (countInBucket != 0) {
			int hashCodeOffset = bucketInSegOffset + BUCKET_HEADER_LENGTH;
			int pointerOffset = bucketInSegOffset + BUCKET_POINTER_START_OFFSET;
			while (numInBucket < countInBucket) {
				int hashCode = bucketSeg.getInt(hashCodeOffset);
				int pointer = bucketSeg.getInt(pointerOffset);
				if (!insertToBucket(hashCode, pointer, false)) {
					buildBloomFilterAndFree(oldBuckets, oldNumBuckets, oldOverflowSegments);
					return;
				}
				numInBucket++;
				hashCodeOffset += HASH_CODE_LEN;
				pointerOffset += POINTER_LEN;
			}

			// this segment is done. check if there is another chained bucket
			int forwardPointer = bucketSeg.getInt(bucketInSegOffset + HEADER_FORWARD_OFFSET);
			if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
				break;
			}

			final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
			bucketSeg = oldOverflowSegments[overflowSegIndex];
			bucketInSegOffset = forwardPointer & table.segmentSizeMask;
			countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
			numInBucket = 0;
		}
	}

	freeMemory(oldBuckets, oldOverflowSegments);
	inReHash = false;
	LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), numBuckets);
}
 
Example 15
Source File: BytesHashMap.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * @throws EOFException if the map can't allocate much more memory.
 */
private void growAndRehash() throws EOFException {
	// allocate the new data structures
	int required = 2 * bucketSegments.size();
	if (required * numBucketsPerSegment > Integer.MAX_VALUE) {
		LOG.warn("We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)");
		throw new EOFException();
	}
	List<MemorySegment> newBucketSegments = new ArrayList<>(required);

	try {
		int freeNumSegments = freeMemorySegments.size();
		int numAllocatedSegments = required - freeNumSegments;
		if (numAllocatedSegments > 0) {
			throw new MemoryAllocationException();
		}
		int needNumFromFreeSegments = required - newBucketSegments.size();
		for (int end = needNumFromFreeSegments; end > 0; end--) {
			newBucketSegments.add(freeMemorySegments.remove(freeMemorySegments.size() - 1));
		}

		int numBuckets = newBucketSegments.size() * numBucketsPerSegment;
		this.log2NumBuckets = MathUtils.log2strict(numBuckets);
		this.numBucketsMask = (1 << MathUtils.log2strict(numBuckets)) - 1;
		this.numBucketsMask2 = (1 << MathUtils.log2strict(numBuckets >> 1)) - 1;
		this.growthThreshold = (int) (numBuckets * LOAD_FACTOR);
	} catch (MemoryAllocationException e) {
		LOG.warn("BytesHashMap can't allocate {} pages, and now used {} pages",
				required, reservedNumBuffers, e);
		throw new EOFException();
	}
	long reHashStartTime = System.currentTimeMillis();
	resetBucketSegments(newBucketSegments);
	// Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
	for (MemorySegment memorySegment : bucketSegments) {
		for (int j = 0; j < numBucketsPerSegment; j++) {
			final long recordPointer = memorySegment.getLong(j * BUCKET_SIZE);
			if (recordPointer != END_OF_LIST) {
				final int hashCode1 = memorySegment.getInt(j * BUCKET_SIZE + ELEMENT_POINT_LENGTH);
				int newPos = hashCode1 & numBucketsMask;
				int bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits;
				int bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS;
				int step = STEP_INCREMENT;
				long hashCode2 = 0;
				while (newBucketSegments.get(bucketSegmentIndex).getLong(bucketOffset) != END_OF_LIST) {
					if (step == 1) {
						hashCode2 = calcSecondHashCode(hashCode1);
					}
					newPos = (int) ((hashCode1 + step * hashCode2) & numBucketsMask);
					bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits;
					bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS;
					step += STEP_INCREMENT;
				}
				newBucketSegments.get(bucketSegmentIndex).putLong(bucketOffset, recordPointer);
				newBucketSegments.get(bucketSegmentIndex).putInt(bucketOffset + ELEMENT_POINT_LENGTH, hashCode1);
			}
		}
	}
	LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), required);
	this.freeMemorySegments.addAll(this.bucketSegments);
	this.bucketSegments = newBucketSegments;
}
 
Example 16
Source File: CompactingHashTable.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public T getMatchFor(PT probeSideRecord) {
	if (closed) {
		return null;
	}
	final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord));

	final int posHashCode = searchHashCode % numBuckets;

	// get the bucket for the given hash code
	MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits];
	int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;

	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> p = partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = p.overflowSegments;

	this.pairComparator.setReference(probeSideRecord);

	int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {

		while (numInSegment < countInSegment) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			// check if the hash code matches
			if (thisCode == searchHashCode) {
				// get the pointer to the pair
				final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN);
				final long pointer = bucket.getLong(pointerOffset);
				numInSegment++;

				// deserialize the key to check whether it is really equal, or whether we had only a hash collision
				try {
					T result = p.readRecordAt(pointer);

					if (this.pairComparator.equalToReference(result)) {
						this.partition = p;
						this.bucket = bucket;
						this.pointerOffsetInBucket = pointerOffset;
						return result;
					}
				}
				catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e);
				}
			}
			else {
				numInSegment++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return null;
		}

		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketInSegmentOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInSegment = 0;
	}
}
 
Example 17
Source File: CompactingHashTable.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
public T getMatchFor(PT probeSideRecord, T reuse) {
	if (closed) {
		return null;
	}
	final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord));
	
	final int posHashCode = searchHashCode % numBuckets;
	
	// get the bucket for the given hash code
	MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits];
	int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
	
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> p = partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = p.overflowSegments;
	
	this.pairComparator.setReference(probeSideRecord);
	
	int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		
		while (numInSegment < countInSegment) {
			
			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;
				
			// check if the hash code matches
			if (thisCode == searchHashCode) {
				// get the pointer to the pair
				final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN);
				final long pointer = bucket.getLong(pointerOffset);
				numInSegment++;
				
				// deserialize the key to check whether it is really equal, or whether we had only a hash collision
				try {
					reuse = p.readRecordAt(pointer, reuse);
					
					if (this.pairComparator.equalToReference(reuse)) {
						this.partition = p;
						this.bucket = bucket;
						this.pointerOffsetInBucket = pointerOffset;
						return reuse;
					}
				}
				catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e);
				}
			}
			else {
				numInSegment++;
			}
		}
		
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return null;
		}
		
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketInSegmentOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInSegment = 0;
	}
}
 
Example 18
Source File: CompactingHashTable.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * utility function that inserts all entries from a bucket and its overflow buckets into the cache
 * 
 * @return true if last bucket was not reached yet
 * @throws IOException
 */
private boolean fillCache() throws IOException {
	if(currentBucketIndex >= table.numBuckets) {
		return false;
	}
	MemorySegment bucket = table.buckets[currentSegmentIndex];
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> partition = table.partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = partition.overflowSegments;
	
	int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET;
	int bucketOffset = currentBucketOffset;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		while (numInSegment < countInSegment) {
			long pointer = bucket.getLong(posInSegment);
			posInSegment += POINTER_LEN;
			numInSegment++;
			T target = table.buildSideSerializer.createInstance();
			try {
				target = partition.readRecordAt(pointer, target);
				cache.add(target);
			} catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e);
			}
		}
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			break;
		}
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET;
		numInSegment = 0;
	}
	currentBucketIndex++;
	if(currentBucketIndex % bucketsPerSegment == 0) {
		currentSegmentIndex++;
		currentBucketOffset = 0;
	} else {
		currentBucketOffset += HASH_BUCKET_SIZE;
	}
	return true;
}
 
Example 19
Source File: BinaryHashBucketArea.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * For distinct build.
 */
private boolean findFirstSameBuildRow(
		MemorySegment bucket,
		int searchHashCode,
		int bucketInSegmentOffset,
		BinaryRowData buildRowToInsert) {
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	int countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInBucket = 0;
	RandomAccessInputView view = partition.getBuildStateInputView();
	while (countInBucket != 0) {
		while (numInBucket < countInBucket) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			if (thisCode == searchHashCode) {
				final int pointer = bucket.getInt(bucketInSegmentOffset +
						BUCKET_POINTER_START_OFFSET + (numInBucket * POINTER_LEN));
				numInBucket++;
				try {
					view.setReadPosition(pointer);
					BinaryRowData row = table.binaryBuildSideSerializer.mapFromPages(table.reuseBuildRow, view);
					if (buildRowToInsert.equals(row)) {
						return true;
					}
				} catch (IOException e) {
					throw new RuntimeException("Error deserializing key or value from the hashtable: " +
							e.getMessage(), e);
				}
			} else {
				numInBucket++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final int forwardPointer = bucket.getInt(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return false;
		}

		final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
		bucket = overflowSegments[overflowSegIndex];
		bucketInSegmentOffset = forwardPointer & table.segmentSizeMask;
		countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInBucket = 0;
	}
	return false;
}
 
Example 20
Source File: SkipListUtils.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the length of the key.
 *
 * @param memorySegment memory segment for key space.
 * @param offset offset of key space in the memory segment.
 */
public static int getKeyLen(MemorySegment memorySegment, int offset) {
	return memorySegment.getInt(offset + KEY_LEN_OFFSET);
}