Java Code Examples for org.apache.flink.core.memory.MemorySegment#getShort()

The following examples show how to use org.apache.flink.core.memory.MemorySegment#getShort() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: MutableHashTable.java From flink with Apache License 2.0

6 votes

/**
 * Set all the bucket memory except bucket header as the bit set of bloom filter, and use hash code of build records
 * to build bloom filter.
 */
final void buildBloomFilterForBucket(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	final int count = bucket.getShort(bucketInSegmentPos + HEADER_COUNT_OFFSET);
	if (count <= 0) {
		return;
	}

	int[] hashCodes = new int[count];
	// As the hashcode and bloom filter occupy same bytes, so we read all hashcode out at first and then write back to bloom filter.
	for (int i = 0; i < count; i++) {
		hashCodes[i] = bucket.getInt(bucketInSegmentPos + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
	}
	this.bloomFilter.setBitsLocation(bucket, bucketInSegmentPos + BUCKET_HEADER_LENGTH);
	for (int hashCode : hashCodes) {
		this.bloomFilter.addHash(hashCode);
	}
	buildBloomFilterForExtraOverflowSegments(bucketInSegmentPos, bucket, p);
}

Example 2

Source File: MutableHashTable.java From flink with Apache License 2.0

6 votes

/**
 * Set all the bucket memory except bucket header as the bit set of bloom filter, and use hash code of build records
 * to build bloom filter.
 */
final void buildBloomFilterForBucket(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	final int count = bucket.getShort(bucketInSegmentPos + HEADER_COUNT_OFFSET);
	if (count <= 0) {
		return;
	}

	int[] hashCodes = new int[count];
	// As the hashcode and bloom filter occupy same bytes, so we read all hashcode out at first and then write back to bloom filter.
	for (int i = 0; i < count; i++) {
		hashCodes[i] = bucket.getInt(bucketInSegmentPos + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
	}
	this.bloomFilter.setBitsLocation(bucket, bucketInSegmentPos + BUCKET_HEADER_LENGTH);
	for (int hashCode : hashCodes) {
		this.bloomFilter.addHash(hashCode);
	}
	buildBloomFilterForExtraOverflowSegments(bucketInSegmentPos, bucket, p);
}

Example 3

Source File: BuildSideIterator.java From flink with Apache License 2.0

5 votes

private void setBucket(
		MemorySegment bucket, MemorySegment[] overflowSegments,
		int bucketInSegmentOffset) {
	this.bucketSegment = bucket;
	this.overflowSegments = overflowSegments;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.pointerOffset = bucketInSegmentOffset + BinaryHashBucketArea.BUCKET_POINTER_START_OFFSET;
	this.countInBucket = bucket.getShort(bucketInSegmentOffset + BinaryHashBucketArea.HEADER_COUNT_OFFSET);
	this.numInBucket = 0;
	// reset probedSet with probedFlags offset in this bucket.
	this.probedSet.setMemorySegment(bucketSegment, this.bucketInSegmentOffset + BinaryHashBucketArea.PROBED_FLAG_OFFSET);
}

Example 4

Source File: MutableHashTable.java From flink with Apache License 2.0

5 votes

void set(MemorySegment bucket, MemorySegment[] overflowSegments, HashPartition<BT, PT> partition,
		int searchHashCode, int bucketInSegmentOffset)
{
	this.bucket = bucket;
	this.originalBucket = bucket;
	this.overflowSegments = overflowSegments;
	this.partition = partition;
	this.searchHashCode = searchHashCode;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.originalBucketInSegmentOffset = bucketInSegmentOffset;
	this.posInSegment = this.bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	this.countInSegment = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	this.numInSegment = 0;
}

Example 5

Source File: LookupBucketIterator.java From flink with Apache License 2.0

5 votes

public void set(
		MemorySegment bucket, MemorySegment[] overflowSegments,
		BinaryHashPartition partition,
		int searchHashCode, int bucketInSegmentOffset) {
	this.bucket = bucket;
	this.overflowSegments = overflowSegments;
	this.partition = partition;
	this.searchHashCode = searchHashCode;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.pointerOffset = bucketInSegmentOffset + BinaryHashBucketArea.BUCKET_POINTER_START_OFFSET;
	this.hashCodeOffset = this.bucketInSegmentOffset + BinaryHashBucketArea.BUCKET_HEADER_LENGTH;
	this.countInBucket = bucket.getShort(bucketInSegmentOffset + BinaryHashBucketArea.HEADER_COUNT_OFFSET);
	this.numInBucket = 0;
}

Example 6

Source File: ChannelReaderInputView.java From flink with Apache License 2.0

5 votes

/**
 * Gets the next segment from the asynchronous block reader. If more requests are to be issued, the method
 * first sends a new request with the current memory segment. If no more requests are pending, the method
 * adds the segment to the readers return queue, which thereby effectively collects all memory segments.
 * Secondly, the method fetches the next non-consumed segment
 * returned by the reader. If no further segments are available, this method thrown an {@link EOFException}.
 *
 * @param current The memory segment used for the next request.
 * @return The memory segment to read from next.
 *
 * @throws EOFException Thrown, if no further segments are available.
 * @throws IOException Thrown, if an I/O error occurred while reading
 * @see AbstractPagedInputView#nextSegment(org.apache.flink.core.memory.MemorySegment)
 */
@Override
protected MemorySegment nextSegment(MemorySegment current) throws IOException {
	// check if we are at our end
	if (this.inLastBlock) {
		throw new EOFException();
	}

	// send a request first. if we have only a single segment, this same segment will be the one obtained in
	// the next lines
	if (current != null) {
		sendReadRequest(current);
	}

	// get the next segment
	final MemorySegment seg = this.reader.getNextReturnedBlock();

	// check the header
	if (seg.getShort(0) != ChannelWriterOutputView.HEADER_MAGIC_NUMBER) {
		throw new IOException("The current block does not belong to a ChannelWriterOutputView / " +
				"ChannelReaderInputView: Wrong magic number.");
	}
	if ( (seg.getShort(ChannelWriterOutputView.HEADER_FLAGS_OFFSET) & ChannelWriterOutputView.FLAG_LAST_BLOCK) != 0) {
		// last block
		this.numRequestsRemaining = 0;
		this.inLastBlock = true;
	}

	return seg;
}

Example 7

Source File: MutableHashTable.java From flink with Apache License 2.0

5 votes

void set(MemorySegment bucket, MemorySegment[] overflowSegments, HashPartition<BT, PT> partition,
		int searchHashCode, int bucketInSegmentOffset)
{
	this.bucket = bucket;
	this.originalBucket = bucket;
	this.overflowSegments = overflowSegments;
	this.partition = partition;
	this.searchHashCode = searchHashCode;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.originalBucketInSegmentOffset = bucketInSegmentOffset;
	this.posInSegment = this.bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	this.countInSegment = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	this.numInSegment = 0;
}

Example 8

Source File: MutableHashTable.java From flink with Apache License 2.0

5 votes

private void buildBloomFilterForExtraOverflowSegments(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	int totalCount = 0;
	boolean skip = false;
	long forwardPointer = bucket.getLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET);
	while (forwardPointer != BUCKET_FORWARD_POINTER_NOT_SET) {
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		if (overflowSegNum < 0 || overflowSegNum >= p.numOverflowSegments) {
			skip = true;
			break;
		}
		MemorySegment overflowSegment = p.overflowSegments[overflowSegNum];
		int bucketInOverflowSegmentOffset = (int) forwardPointer;
		
		final int count = overflowSegment.getShort(bucketInOverflowSegmentOffset + HEADER_COUNT_OFFSET);
		totalCount += count;
		// The bits size of bloom filter per bucket is 112 * 8, while expected input entries is greater than 2048, the fpp would higher than 0.9,
		// which make the bloom filter an overhead instead of optimization.
		if (totalCount > 2048) {
			skip = true;
			break;
		}
		
		for (int i = 0; i < count; i++) {
			int hashCode = overflowSegment.getInt(bucketInOverflowSegmentOffset + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
			this.bloomFilter.addHash(hashCode);
		}
		
		forwardPointer = overflowSegment.getLong(bucketInOverflowSegmentOffset + HEADER_FORWARD_OFFSET);
		
	}
	
	if (!skip) {
		bucket.put(bucketInSegmentPos + HEADER_STATUS_OFFSET, BUCKET_STATUS_IN_FILTER);
	}
}

Example 9

Source File: BuildSideIterator.java From flink with Apache License 2.0

5 votes

private void setBucket(
		MemorySegment bucket, MemorySegment[] overflowSegments,
		int bucketInSegmentOffset) {
	this.bucketSegment = bucket;
	this.overflowSegments = overflowSegments;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.pointerOffset = bucketInSegmentOffset + BinaryHashBucketArea.BUCKET_POINTER_START_OFFSET;
	this.countInBucket = bucket.getShort(bucketInSegmentOffset + BinaryHashBucketArea.HEADER_COUNT_OFFSET);
	this.numInBucket = 0;
	// reset probedSet with probedFlags offset in this bucket.
	this.probedSet.setMemorySegment(bucketSegment, this.bucketInSegmentOffset + BinaryHashBucketArea.PROBED_FLAG_OFFSET);
}

Example 10

Source File: LookupBucketIterator.java From flink with Apache License 2.0

5 votes

public void set(
		MemorySegment bucket, MemorySegment[] overflowSegments,
		BinaryHashPartition partition,
		int searchHashCode, int bucketInSegmentOffset) {
	this.bucket = bucket;
	this.overflowSegments = overflowSegments;
	this.partition = partition;
	this.searchHashCode = searchHashCode;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.pointerOffset = bucketInSegmentOffset + BinaryHashBucketArea.BUCKET_POINTER_START_OFFSET;
	this.hashCodeOffset = this.bucketInSegmentOffset + BinaryHashBucketArea.BUCKET_HEADER_LENGTH;
	this.countInBucket = bucket.getShort(bucketInSegmentOffset + BinaryHashBucketArea.HEADER_COUNT_OFFSET);
	this.numInBucket = 0;
}

Example 11

Source File: ChannelReaderInputView.java From Flink-CEPplus with Apache License 2.0

5 votes

/**
 * Gets the next segment from the asynchronous block reader. If more requests are to be issued, the method
 * first sends a new request with the current memory segment. If no more requests are pending, the method
 * adds the segment to the readers return queue, which thereby effectively collects all memory segments.
 * Secondly, the method fetches the next non-consumed segment
 * returned by the reader. If no further segments are available, this method thrown an {@link EOFException}.
 * 
 * @param current The memory segment used for the next request.
 * @return The memory segment to read from next.
 * 
 * @throws EOFException Thrown, if no further segments are available.
 * @throws IOException Thrown, if an I/O error occurred while reading 
 * @see AbstractPagedInputView#nextSegment(org.apache.flink.core.memory.MemorySegment)
 */
@Override
protected MemorySegment nextSegment(MemorySegment current) throws IOException {
	// check if we are at our end
	if (this.inLastBlock) {
		throw new EOFException();
	}
			
	// send a request first. if we have only a single segment, this same segment will be the one obtained in
	// the next lines
	if (current != null) {
		sendReadRequest(current);
	}
	
	// get the next segment
	final MemorySegment seg = this.reader.getNextReturnedBlock();
	
	// check the header
	if (seg.getShort(0) != ChannelWriterOutputView.HEADER_MAGIC_NUMBER) {
		throw new IOException("The current block does not belong to a ChannelWriterOutputView / " +
				"ChannelReaderInputView: Wrong magic number.");
	}
	if ( (seg.getShort(ChannelWriterOutputView.HEADER_FLAGS_OFFSET) & ChannelWriterOutputView.FLAG_LAST_BLOCK) != 0) {
		// last block
		this.numRequestsRemaining = 0;
		this.inLastBlock = true;
	}
	
	return seg;
}

Example 12

Source File: MutableHashTable.java From flink with Apache License 2.0

5 votes

private void setBucket(MemorySegment bucket, MemorySegment[] overflowSegments, HashPartition<BT, PT> partition,
	int bucketInSegmentOffset) {
	this.bucketSegment = bucket;
	this.overflowSegments = overflowSegments;
	this.partition = partition;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.countInSegment = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	this.numInSegment = 0;
	// reset probedSet with probedFlags offset in this bucket.
	this.probedSet.setMemorySegment(bucketSegment, this.bucketInSegmentOffset + HEADER_PROBED_FLAGS_OFFSET);
}

Example 13

Source File: MutableHashTable.java From Flink-CEPplus with Apache License 2.0

5 votes

void set(MemorySegment bucket, MemorySegment[] overflowSegments, HashPartition<BT, PT> partition,
		int searchHashCode, int bucketInSegmentOffset)
{
	this.bucket = bucket;
	this.originalBucket = bucket;
	this.overflowSegments = overflowSegments;
	this.partition = partition;
	this.searchHashCode = searchHashCode;
	this.bucketInSegmentOffset = bucketInSegmentOffset;
	this.originalBucketInSegmentOffset = bucketInSegmentOffset;
	this.posInSegment = this.bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	this.countInSegment = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	this.numInSegment = 0;
}

Example 14

Source File: ChannelReaderInputView.java From flink with Apache License 2.0

5 votes

/**
 * Gets the next segment from the asynchronous block reader. If more requests are to be issued, the method
 * first sends a new request with the current memory segment. If no more requests are pending, the method
 * adds the segment to the readers return queue, which thereby effectively collects all memory segments.
 * Secondly, the method fetches the next non-consumed segment
 * returned by the reader. If no further segments are available, this method thrown an {@link EOFException}.
 *
 * @param current The memory segment used for the next request.
 * @return The memory segment to read from next.
 *
 * @throws EOFException Thrown, if no further segments are available.
 * @throws IOException Thrown, if an I/O error occurred while reading
 * @see AbstractPagedInputView#nextSegment(org.apache.flink.core.memory.MemorySegment)
 */
@Override
protected MemorySegment nextSegment(MemorySegment current) throws IOException {
	// check if we are at our end
	if (this.inLastBlock) {
		throw new EOFException();
	}

	// send a request first. if we have only a single segment, this same segment will be the one obtained in
	// the next lines
	if (current != null) {
		sendReadRequest(current);
	}

	// get the next segment
	final MemorySegment seg = this.reader.getNextReturnedBlock();

	// check the header
	if (seg.getShort(0) != ChannelWriterOutputView.HEADER_MAGIC_NUMBER) {
		throw new IOException("The current block does not belong to a ChannelWriterOutputView / " +
				"ChannelReaderInputView: Wrong magic number.");
	}
	if ( (seg.getShort(ChannelWriterOutputView.HEADER_FLAGS_OFFSET) & ChannelWriterOutputView.FLAG_LAST_BLOCK) != 0) {
		// last block
		this.numRequestsRemaining = 0;
		this.inLastBlock = true;
	}

	return seg;
}

Example 15

Source File: BinaryHashBucketArea.java From flink with Apache License 2.0

4 votes

private void buildBloomFilterAndFree(
		MemorySegment[] buckets,
		int numBuckets,
		MemorySegment[] overflowSegments) {
	if (table.useBloomFilters) {
		long numRecords = (long) Math.max(partition.getBuildSideRecordCount() * 1.5, estimatedRowCount);

		// BloomFilter size min of:
		// 1.remain buffers
		// 2.bf size for numRecords when fpp is 0.05
		// 3.max init bucket area buffers.
		int segSize = Math.min(
				Math.min(table.remainBuffers(),
				HashTableBloomFilter.optimalSegmentNumber(numRecords, table.pageSize(), 0.05)),
				table.maxInitBufferOfBucketArea(table.partitionsBeingBuilt.size()));

		if (segSize > 0) {
			HashTableBloomFilter filter = new HashTableBloomFilter(
					table.getNextBuffers(MathUtils.roundDownToPowerOf2(segSize)), numRecords);

			// Add all records to bloom filter.
			int scanCount = -1;
			while (true) {
				scanCount++;
				if (scanCount >= numBuckets) {
					break;
				}
				// move to next bucket, update all the current bucket status with new bucket information.
				final int bucketArrayPos = scanCount >> table.bucketsPerSegmentBits;
				int bucketInSegOffset = (scanCount & table.bucketsPerSegmentMask) << BUCKET_SIZE_BITS;
				MemorySegment bucketSeg = buckets[bucketArrayPos];

				int countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
				int numInBucket = 0;
				while (countInBucket != 0) {
					int hashCodeOffset = bucketInSegOffset + BUCKET_HEADER_LENGTH;
					while (numInBucket < countInBucket) {
						filter.addHash(bucketSeg.getInt(hashCodeOffset));
						numInBucket++;
						hashCodeOffset += HASH_CODE_LEN;
					}

					// this segment is done. check if there is another chained bucket
					int forwardPointer = bucketSeg.getInt(bucketInSegOffset + HEADER_FORWARD_OFFSET);
					if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
						break;
					}

					final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
					bucketSeg = overflowSegments[overflowSegIndex];
					bucketInSegOffset = forwardPointer & table.segmentSizeMask;
					countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
					numInBucket = 0;
				}
			}

			partition.bloomFilter = filter;
		}
	}

	freeMemory(buckets, overflowSegments);
}

Example 16

Source File: BinaryHashBucketArea.java From flink with Apache License 2.0

4 votes

/**
 * For distinct build.
 */
private boolean findFirstSameBuildRow(
		MemorySegment bucket,
		int searchHashCode,
		int bucketInSegmentOffset,
		BinaryRow buildRowToInsert) {
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	int countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInBucket = 0;
	RandomAccessInputView view = partition.getBuildStateInputView();
	while (countInBucket != 0) {
		while (numInBucket < countInBucket) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			if (thisCode == searchHashCode) {
				final int pointer = bucket.getInt(bucketInSegmentOffset +
						BUCKET_POINTER_START_OFFSET + (numInBucket * POINTER_LEN));
				numInBucket++;
				try {
					view.setReadPosition(pointer);
					BinaryRow row = table.binaryBuildSideSerializer.mapFromPages(table.reuseBuildRow, view);
					if (buildRowToInsert.equals(row)) {
						return true;
					}
				} catch (IOException e) {
					throw new RuntimeException("Error deserializing key or value from the hashtable: " +
							e.getMessage(), e);
				}
			} else {
				numInBucket++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final int forwardPointer = bucket.getInt(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return false;
		}

		final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
		bucket = overflowSegments[overflowSegIndex];
		bucketInSegmentOffset = forwardPointer & table.segmentSizeMask;
		countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInBucket = 0;
	}
	return false;
}

Example 17

Source File: BinaryHashBucketArea.java From flink with Apache License 2.0

4 votes

private void reHash(
		MemorySegment[] oldBuckets,
		int oldNumBuckets,
		MemorySegment[] oldOverflowSegments) throws IOException {
	long reHashStartTime = System.currentTimeMillis();
	inReHash = true;
	int scanCount = -1;
	while (true) {
		scanCount++;
		if (scanCount >= oldNumBuckets) {
			break;
		}
		// move to next bucket, update all the current bucket status with new bucket information.
		final int bucketArrayPos = scanCount >> table.bucketsPerSegmentBits;
		int bucketInSegOffset = (scanCount & table.bucketsPerSegmentMask) << BUCKET_SIZE_BITS;
		MemorySegment bucketSeg = oldBuckets[bucketArrayPos];

		int countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
		int numInBucket = 0;
		while (countInBucket != 0) {
			int hashCodeOffset = bucketInSegOffset + BUCKET_HEADER_LENGTH;
			int pointerOffset = bucketInSegOffset + BUCKET_POINTER_START_OFFSET;
			while (numInBucket < countInBucket) {
				int hashCode = bucketSeg.getInt(hashCodeOffset);
				int pointer = bucketSeg.getInt(pointerOffset);
				if (!insertToBucket(hashCode, pointer, true, false)) {
					buildBloomFilterAndFree(oldBuckets, oldNumBuckets, oldOverflowSegments);
					return;
				}
				numInBucket++;
				hashCodeOffset += HASH_CODE_LEN;
				pointerOffset += POINTER_LEN;
			}

			// this segment is done. check if there is another chained bucket
			int forwardPointer = bucketSeg.getInt(bucketInSegOffset + HEADER_FORWARD_OFFSET);
			if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
				break;
			}

			final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
			bucketSeg = oldOverflowSegments[overflowSegIndex];
			bucketInSegOffset = forwardPointer & table.segmentSizeMask;
			countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
			numInBucket = 0;
		}
	}

	freeMemory(oldBuckets, oldOverflowSegments);
	inReHash = false;
	LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), numBuckets);
}

Example 18

Source File: BinaryHashBucketArea.java From flink with Apache License 2.0

4 votes

private void reHash(
		MemorySegment[] oldBuckets,
		int oldNumBuckets,
		MemorySegment[] oldOverflowSegments) throws IOException {
	long reHashStartTime = System.currentTimeMillis();
	inReHash = true;
	int scanCount = -1;
	while (true) {
		scanCount++;
		if (scanCount >= oldNumBuckets) {
			break;
		}
		// move to next bucket, update all the current bucket status with new bucket information.
		final int bucketArrayPos = scanCount >> table.bucketsPerSegmentBits;
		int bucketInSegOffset = (scanCount & table.bucketsPerSegmentMask) << BUCKET_SIZE_BITS;
		MemorySegment bucketSeg = oldBuckets[bucketArrayPos];

		int countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
		int numInBucket = 0;
		while (countInBucket != 0) {
			int hashCodeOffset = bucketInSegOffset + BUCKET_HEADER_LENGTH;
			int pointerOffset = bucketInSegOffset + BUCKET_POINTER_START_OFFSET;
			while (numInBucket < countInBucket) {
				int hashCode = bucketSeg.getInt(hashCodeOffset);
				int pointer = bucketSeg.getInt(pointerOffset);
				if (!insertToBucket(hashCode, pointer, false)) {
					buildBloomFilterAndFree(oldBuckets, oldNumBuckets, oldOverflowSegments);
					return;
				}
				numInBucket++;
				hashCodeOffset += HASH_CODE_LEN;
				pointerOffset += POINTER_LEN;
			}

			// this segment is done. check if there is another chained bucket
			int forwardPointer = bucketSeg.getInt(bucketInSegOffset + HEADER_FORWARD_OFFSET);
			if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
				break;
			}

			final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
			bucketSeg = oldOverflowSegments[overflowSegIndex];
			bucketInSegOffset = forwardPointer & table.segmentSizeMask;
			countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
			numInBucket = 0;
		}
	}

	freeMemory(oldBuckets, oldOverflowSegments);
	inReHash = false;
	LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), numBuckets);
}

Example 19

Source File: BinaryHashBucketArea.java From flink with Apache License 2.0

4 votes

/**
 * For distinct build.
 */
private boolean findFirstSameBuildRow(
		MemorySegment bucket,
		int searchHashCode,
		int bucketInSegmentOffset,
		BinaryRowData buildRowToInsert) {
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
	int countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInBucket = 0;
	RandomAccessInputView view = partition.getBuildStateInputView();
	while (countInBucket != 0) {
		while (numInBucket < countInBucket) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			if (thisCode == searchHashCode) {
				final int pointer = bucket.getInt(bucketInSegmentOffset +
						BUCKET_POINTER_START_OFFSET + (numInBucket * POINTER_LEN));
				numInBucket++;
				try {
					view.setReadPosition(pointer);
					BinaryRowData row = table.binaryBuildSideSerializer.mapFromPages(table.reuseBuildRow, view);
					if (buildRowToInsert.equals(row)) {
						return true;
					}
				} catch (IOException e) {
					throw new RuntimeException("Error deserializing key or value from the hashtable: " +
							e.getMessage(), e);
				}
			} else {
				numInBucket++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final int forwardPointer = bucket.getInt(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return false;
		}

		final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
		bucket = overflowSegments[overflowSegIndex];
		bucketInSegmentOffset = forwardPointer & table.segmentSizeMask;
		countInBucket = bucket.getShort(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInBucket = 0;
	}
	return false;
}

Example 20

Source File: BinaryHashBucketArea.java From flink with Apache License 2.0

4 votes

private void buildBloomFilterAndFree(
		MemorySegment[] buckets,
		int numBuckets,
		MemorySegment[] overflowSegments) {
	if (table.useBloomFilters) {
		long numRecords = (long) Math.max(partition.getBuildSideRecordCount() * 1.5, estimatedRowCount);

		// BloomFilter size min of:
		// 1.remain buffers
		// 2.bf size for numRecords when fpp is 0.05
		// 3.max init bucket area buffers.
		int segSize = Math.min(
				Math.min(table.remainBuffers(),
				HashTableBloomFilter.optimalSegmentNumber(numRecords, table.pageSize(), 0.05)),
				table.maxInitBufferOfBucketArea(table.partitionsBeingBuilt.size()));

		if (segSize > 0) {
			HashTableBloomFilter filter = new HashTableBloomFilter(
					table.getNextBuffers(MathUtils.roundDownToPowerOf2(segSize)), numRecords);

			// Add all records to bloom filter.
			int scanCount = -1;
			while (true) {
				scanCount++;
				if (scanCount >= numBuckets) {
					break;
				}
				// move to next bucket, update all the current bucket status with new bucket information.
				final int bucketArrayPos = scanCount >> table.bucketsPerSegmentBits;
				int bucketInSegOffset = (scanCount & table.bucketsPerSegmentMask) << BUCKET_SIZE_BITS;
				MemorySegment bucketSeg = buckets[bucketArrayPos];

				int countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
				int numInBucket = 0;
				while (countInBucket != 0) {
					int hashCodeOffset = bucketInSegOffset + BUCKET_HEADER_LENGTH;
					while (numInBucket < countInBucket) {
						filter.addHash(bucketSeg.getInt(hashCodeOffset));
						numInBucket++;
						hashCodeOffset += HASH_CODE_LEN;
					}

					// this segment is done. check if there is another chained bucket
					int forwardPointer = bucketSeg.getInt(bucketInSegOffset + HEADER_FORWARD_OFFSET);
					if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
						break;
					}

					final int overflowSegIndex = forwardPointer >>> table.segmentSizeBits;
					bucketSeg = overflowSegments[overflowSegIndex];
					bucketInSegOffset = forwardPointer & table.segmentSizeMask;
					countInBucket = bucketSeg.getShort(bucketInSegOffset + HEADER_COUNT_OFFSET);
					numInBucket = 0;
				}
			}

			partition.bloomFilter = filter;
		}
	}

	freeMemory(buckets, overflowSegments);
}