Java Code Examples for org.apache.flink.core.memory.MemorySegment#getLong()

The following examples show how to use org.apache.flink.core.memory.MemorySegment#getLong() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: InPlaceMutableHashTable.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Inserts the given record into the hash table.
 * Note: this method doesn't care about whether a record with the same key is already present.
 * @param record The record to insert.
 * @throws IOException (EOFException specifically, if memory ran out)
    */
@Override
public void insert(T record) throws IOException {
	if (closed) {
		return;
	}

	final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record));
	final int bucket = hashCode & numBucketsMask;
	final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
	final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
	final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment
	final long firstPointer = bucketSegment.getLong(bucketOffset);

	try {
		final long newFirstPointer = recordArea.appendPointerAndRecord(firstPointer, record);
		bucketSegment.putLong(bucketOffset, newFirstPointer);
	} catch (EOFException ex) {
		compactOrThrow();
		insert(record);
		return;
	}

	numElements++;
	resizeTableIfNecessary();
}
 
Example 2
Source File: InPlaceMutableHashTable.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Searches the hash table for the record with the given key.
 * (If there would be multiple matches, only one is returned.)
 * @param record The record whose key we are searching for
 * @param targetForMatch If a match is found, it will be written here
       * @return targetForMatch if a match is found, otherwise null.
       */
@Override
public T getMatchFor(PT record, T targetForMatch) {
	if (closed) {
		return null;
	}

	final int hashCode = MathUtils.jenkinsHash(probeTypeComparator.hash(record));
	final int bucket = hashCode & numBucketsMask;
	bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
	final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
	bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment

	curElemPtr = bucketSegment.getLong(bucketOffset);

	pairComparator.setReference(record);

	T currentRecordInList = targetForMatch;

	prevElemPtr = INVALID_PREV_POINTER;
	try {
		while (curElemPtr != END_OF_LIST && !closed) {
			recordArea.setReadPosition(curElemPtr);
			nextPtr = recordArea.readPointer();

			currentRecordInList = recordArea.readRecord(currentRecordInList);
			recordEnd = recordArea.getReadPosition();
			if (pairComparator.equalToReference(currentRecordInList)) {
				// we found an element with a matching key, and not just a hash collision
				return currentRecordInList;
			}

			prevElemPtr = curElemPtr;
			curElemPtr = nextPtr;
		}
	} catch (IOException ex) {
		throw new RuntimeException("Error deserializing record from the hashtable: " + ex.getMessage(), ex);
	}
	return null;
}
 
Example 3
Source File: LongHashPartition.java    From flink with Apache License 2.0 5 votes vote down vote up
void iteratorToDenseBucket(MemorySegment[] denseBuckets, long addressOffset,
		long globalMinKey) {
	int bucketOffset = 0;
	MemorySegment segment = buckets[bucketOffset];
	int segOffset = 0;
	for (int i = 0; i < numBuckets; i++) {
		long address = segment.getLong(segOffset + 8);
		if (address != INVALID_ADDRESS) {
			long key = segment.getLong(segOffset);
			long denseBucket = key - globalMinKey;
			long denseBucketOffset = denseBucket << 3;
			int denseSegIndex = (int) (denseBucketOffset >>> segmentSizeBits);
			int denseSegOffset = (int) (denseBucketOffset & segmentSizeMask);
			denseBuckets[denseSegIndex].putLong(denseSegOffset, address + addressOffset);
		}

		// not last bucket, move to next.
		if (i != numBuckets - 1) {
			if (segOffset + 16 < segmentSize) {
				segOffset += 16;
			} else {
				segment = buckets[++bucketOffset];
				segOffset = 0;
			}
		}
	}
}
 
Example 4
Source File: StringNormalizedKeyComputer.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void swapKey(MemorySegment segI, int offsetI,
		MemorySegment segJ, int offsetJ) {
	long temp0 = segI.getLong(offsetI);
	segI.putLong(offsetI, segJ.getLong(offsetJ));
	segJ.putLong(offsetJ, temp0);
}
 
Example 5
Source File: MutableHashTable.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private void buildBloomFilterForExtraOverflowSegments(int bucketInSegmentPos, MemorySegment bucket, HashPartition<BT, PT> p) {
	int totalCount = 0;
	boolean skip = false;
	long forwardPointer = bucket.getLong(bucketInSegmentPos + HEADER_FORWARD_OFFSET);
	while (forwardPointer != BUCKET_FORWARD_POINTER_NOT_SET) {
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		if (overflowSegNum < 0 || overflowSegNum >= p.numOverflowSegments) {
			skip = true;
			break;
		}
		MemorySegment overflowSegment = p.overflowSegments[overflowSegNum];
		int bucketInOverflowSegmentOffset = (int) forwardPointer;
		
		final int count = overflowSegment.getShort(bucketInOverflowSegmentOffset + HEADER_COUNT_OFFSET);
		totalCount += count;
		// The bits size of bloom filter per bucket is 112 * 8, while expected input entries is greater than 2048, the fpp would higher than 0.9,
		// which make the bloom filter an overhead instead of optimization.
		if (totalCount > 2048) {
			skip = true;
			break;
		}
		
		for (int i = 0; i < count; i++) {
			int hashCode = overflowSegment.getInt(bucketInOverflowSegmentOffset + BUCKET_HEADER_LENGTH + i * HASH_CODE_LEN);
			this.bloomFilter.addHash(hashCode);
		}
		
		forwardPointer = overflowSegment.getLong(bucketInOverflowSegmentOffset + HEADER_FORWARD_OFFSET);
		
	}
	
	if (!skip) {
		bucket.put(bucketInSegmentPos + HEADER_STATUS_OFFSET, BUCKET_STATUS_IN_FILTER);
	}
}
 
Example 6
Source File: BinaryIndexedSortable.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public void swap(int segmentNumberI, int segmentOffsetI, int segmentNumberJ, int segmentOffsetJ) {
	final MemorySegment segI = this.sortIndex.get(segmentNumberI);
	final MemorySegment segJ = this.sortIndex.get(segmentNumberJ);

	// swap offset
	long index = segI.getLong(segmentOffsetI);
	segI.putLong(segmentOffsetI, segJ.getLong(segmentOffsetJ));
	segJ.putLong(segmentOffsetJ, index);

	// swap key
	normalizedKeyComputer.swapKey(segI, segmentOffsetI + OFFSET_LEN, segJ, segmentOffsetJ + OFFSET_LEN);
}
 
Example 7
Source File: InPlaceMutableHashTable.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Searches the hash table for the record with the given key.
 * (If there would be multiple matches, only one is returned.)
 * @param record The record whose key we are searching for
 * @param targetForMatch If a match is found, it will be written here
       * @return targetForMatch if a match is found, otherwise null.
       */
@Override
public T getMatchFor(PT record, T targetForMatch) {
	if (closed) {
		return null;
	}

	final int hashCode = MathUtils.jenkinsHash(probeTypeComparator.hash(record));
	final int bucket = hashCode & numBucketsMask;
	bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
	final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
	bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment

	curElemPtr = bucketSegment.getLong(bucketOffset);

	pairComparator.setReference(record);

	T currentRecordInList = targetForMatch;

	prevElemPtr = INVALID_PREV_POINTER;
	try {
		while (curElemPtr != END_OF_LIST && !closed) {
			recordArea.setReadPosition(curElemPtr);
			nextPtr = recordArea.readPointer();

			currentRecordInList = recordArea.readRecord(currentRecordInList);
			recordEnd = recordArea.getReadPosition();
			if (pairComparator.equalToReference(currentRecordInList)) {
				// we found an element with a matching key, and not just a hash collision
				return currentRecordInList;
			}

			prevElemPtr = curElemPtr;
			curElemPtr = nextPtr;
		}
	} catch (IOException ex) {
		throw new RuntimeException("Error deserializing record from the hashtable: " + ex.getMessage(), ex);
	}
	return null;
}
 
Example 8
Source File: LongHashPartition.java    From flink with Apache License 2.0 5 votes vote down vote up
/**
 * Returns an iterator for all the values for the given key, or null if no value found.
 */
public MatchIterator get(long key, int hashCode) {
	int bucket = hashCode & numBucketsMask;

	int bucketOffset = bucket << 4;
	MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits];
	int segOffset = bucketOffset & segmentSizeMask;

	while (true) {
		long address = segment.getLong(segOffset + 8);
		if (address != INVALID_ADDRESS) {
			if (segment.getLong(segOffset) == key) {
				return valueIter(address);
			} else {
				bucket = (bucket + 1) & numBucketsMask;
				if (segOffset + 16 < segmentSize) {
					segOffset += 16;
				} else {
					bucketOffset = bucket << 4;
					segOffset = bucketOffset & segmentSizeMask;
					segment = buckets[bucketOffset >>> segmentSizeBits];
				}
			}
		} else {
			return valueIter(INVALID_ADDRESS);
		}
	}
}
 
Example 9
Source File: InPlaceMutableHashTable.java    From flink with Apache License 2.0 5 votes vote down vote up
/** Same as above, but the number of bucket segments of the new table can be specified. */
private void rebuild(long newNumBucketSegments) throws IOException {
	// Get new bucket segments
	releaseBucketSegments();
	allocateBucketSegments((int)newNumBucketSegments);

	T record = buildSideSerializer.createInstance();
	try {
		EntryIterator iter = getEntryIterator();
		recordArea.resetAppendPosition();
		recordArea.setWritePosition(0);
		while ((record = iter.next(record)) != null && !closed) {
			final int hashCode = MathUtils.jenkinsHash(buildSideComparator.hash(record));
			final int bucket = hashCode & numBucketsMask;
			final int bucketSegmentIndex = bucket >>> numBucketsPerSegmentBits; // which segment contains the bucket
			final MemorySegment bucketSegment = bucketSegments[bucketSegmentIndex];
			final int bucketOffset = (bucket & numBucketsPerSegmentMask) << bucketSizeBits; // offset of the bucket in the segment
			final long firstPointer = bucketSegment.getLong(bucketOffset);

			long ptrToAppended = recordArea.noSeekAppendPointerAndRecord(firstPointer, record);
			bucketSegment.putLong(bucketOffset, ptrToAppended);
		}
		recordArea.freeSegmentsAfterAppendPosition();
		holes = 0;

	} catch (EOFException ex) {
		throw new RuntimeException("Bug in InPlaceMutableHashTable: we shouldn't get out of memory during a rebuild, " +
			"because we aren't allocating any new memory.");
	}
}
 
Example 10
Source File: NormalizedKeySorter.java    From flink with Apache License 2.0 4 votes vote down vote up
@Override
public void writeToOutput(ChannelWriterOutputView output, LargeRecordHandler<T> largeRecordsOutput)
		throws IOException
{
	if (LOG.isDebugEnabled()) {
		if (largeRecordsOutput == null) {
			LOG.debug("Spilling sort buffer without large record handling.");
		} else {
			LOG.debug("Spilling sort buffer with large record handling.");
		}
	}
	
	final int numRecords = this.numRecords;
	int currentMemSeg = 0;
	int currentRecord = 0;
	
	while (currentRecord < numRecords) {
		final MemorySegment currentIndexSegment = this.sortIndex.get(currentMemSeg++);

		// go through all records in the memory segment
		for (int offset = 0; currentRecord < numRecords && offset <= this.lastIndexEntryOffset; currentRecord++, offset += this.indexEntrySize) {
			final long pointer = currentIndexSegment.getLong(offset);
			
			// small records go into the regular spill file, large records into the special code path
			if (pointer >= 0 || largeRecordsOutput == null) {
				this.recordBuffer.setReadPosition(pointer);
				this.serializer.copy(this.recordBuffer, output);
			}
			else {
				
				if (LOG.isDebugEnabled()) {
					LOG.debug("Spilling large record to large record fetch file.");
				}
				
				this.recordBuffer.setReadPosition(pointer & POINTER_MASK);
				T record = this.serializer.deserialize(this.recordBuffer);
				largeRecordsOutput.addRecord(record);
			}
		}
	}
}
 
Example 11
Source File: LongHashPartition.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Update the address in array for given key.
 */
private void updateIndex(
		long key,
		int hashCode,
		long address,
		int size,
		MemorySegment dataSegment,
		int currentPositionInSegment) throws IOException {
	assert (numKeys <= numBuckets / 2);
	int bucketId = hashCode & numBucketsMask;

	// each bucket occupied 16 bytes (long key + long pointer to data address)
	int bucketOffset = bucketId * SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
	MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits];
	int segOffset = bucketOffset & segmentSizeMask;
	long currAddress;

	while (true) {
		currAddress = segment.getLong(segOffset + 8);
		if (segment.getLong(segOffset) != key && currAddress != INVALID_ADDRESS) {
			// hash conflicts, the bucket is occupied by another key

			// TODO test Conflict resolution:
			// now:    +1 +1 +1... cache friendly but more conflict, so we set factor to 0.5
			// other1: +1 +2 +3... less conflict, factor can be 0.75
			// other2: Secondary hashCode... less and less conflict, but need compute hash again
			bucketId = (bucketId + 1) & numBucketsMask;
			if (segOffset + SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES < segmentSize) {
				// if the new bucket still in current segment, we only need to update offset
				// within this segment
				segOffset += SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
			} else {
				// otherwise, we should re-calculate segment and offset
				bucketOffset = bucketId * 16;
				segment = buckets[bucketOffset >>> segmentSizeBits];
				segOffset = bucketOffset & segmentSizeMask;
			}
		} else {
			break;
		}
	}
	if (currAddress == INVALID_ADDRESS) {
		// this is the first value for this key, put the address in array.
		segment.putLong(segOffset, key);
		segment.putLong(segOffset + 8, address);
		numKeys += 1;
		// dataSegment may be null if we only have to rehash bucket area
		if (dataSegment != null) {
			dataSegment.putLong(currentPositionInSegment, toAddrAndLen(INVALID_ADDRESS, size));
		}
		if (numKeys * 2 > numBuckets) {
			resize();
		}
	} else {
		// there are some values for this key, put the address in the front of them.
		dataSegment.putLong(currentPositionInSegment, toAddrAndLen(currAddress, size));
		segment.putLong(segOffset + 8, address);
	}
}
 
Example 12
Source File: CompactingHashTable.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * utility function that inserts all entries from a bucket and its overflow buckets into the cache
 * 
 * @return true if last bucket was not reached yet
 * @throws IOException
 */
private boolean fillCache() throws IOException {
	if(currentBucketIndex >= table.numBuckets) {
		return false;
	}
	MemorySegment bucket = table.buckets[currentSegmentIndex];
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> partition = table.partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = partition.overflowSegments;
	
	int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET;
	int bucketOffset = currentBucketOffset;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		while (numInSegment < countInSegment) {
			long pointer = bucket.getLong(posInSegment);
			posInSegment += POINTER_LEN;
			numInSegment++;
			T target = table.buildSideSerializer.createInstance();
			try {
				target = partition.readRecordAt(pointer, target);
				cache.add(target);
			} catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e);
			}
		}
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			break;
		}
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET;
		numInSegment = 0;
	}
	currentBucketIndex++;
	if(currentBucketIndex % bucketsPerSegment == 0) {
		currentSegmentIndex++;
		currentBucketOffset = 0;
	} else {
		currentBucketOffset += HASH_BUCKET_SIZE;
	}
	return true;
}
 
Example 13
Source File: CompactingHashTable.java    From flink with Apache License 2.0 4 votes vote down vote up
public T getMatchFor(PT probeSideRecord, T reuse) {
	if (closed) {
		return null;
	}
	final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord));
	
	final int posHashCode = searchHashCode % numBuckets;
	
	// get the bucket for the given hash code
	MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits];
	int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
	
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> p = partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = p.overflowSegments;
	
	this.pairComparator.setReference(probeSideRecord);
	
	int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		
		while (numInSegment < countInSegment) {
			
			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;
				
			// check if the hash code matches
			if (thisCode == searchHashCode) {
				// get the pointer to the pair
				final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN);
				final long pointer = bucket.getLong(pointerOffset);
				numInSegment++;
				
				// deserialize the key to check whether it is really equal, or whether we had only a hash collision
				try {
					reuse = p.readRecordAt(pointer, reuse);
					
					if (this.pairComparator.equalToReference(reuse)) {
						this.partition = p;
						this.bucket = bucket;
						this.pointerOffsetInBucket = pointerOffset;
						return reuse;
					}
				}
				catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e);
				}
			}
			else {
				numInSegment++;
			}
		}
		
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return null;
		}
		
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketInSegmentOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInSegment = 0;
	}
}
 
Example 14
Source File: CompactingHashTable.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * utility function that inserts all entries from a bucket and its overflow buckets into the cache
 * 
 * @return true if last bucket was not reached yet
 * @throws IOException
 */
private boolean fillCache() throws IOException {
	if(currentBucketIndex >= table.numBuckets) {
		return false;
	}
	MemorySegment bucket = table.buckets[currentSegmentIndex];
	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(currentBucketOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> partition = table.partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = partition.overflowSegments;
	
	int countInSegment = bucket.getInt(currentBucketOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = currentBucketOffset + BUCKET_POINTER_START_OFFSET;
	int bucketOffset = currentBucketOffset;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {
		while (numInSegment < countInSegment) {
			long pointer = bucket.getLong(posInSegment);
			posInSegment += POINTER_LEN;
			numInSegment++;
			T target = table.buildSideSerializer.createInstance();
			try {
				target = partition.readRecordAt(pointer, target);
				cache.add(target);
			} catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the Hash Table: " + e.getMessage(), e);
			}
		}
		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			break;
		}
		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketOffset + BUCKET_POINTER_START_OFFSET;
		numInSegment = 0;
	}
	currentBucketIndex++;
	if(currentBucketIndex % bucketsPerSegment == 0) {
		currentSegmentIndex++;
		currentBucketOffset = 0;
	} else {
		currentBucketOffset += HASH_BUCKET_SIZE;
	}
	return true;
}
 
Example 15
Source File: CompactingHashTable.java    From Flink-CEPplus with Apache License 2.0 4 votes vote down vote up
/**
 * Compacts (garbage collects) partition with copy-compact strategy using compaction partition
 * 
 * @param partitionNumber partition to compact
 * @throws IOException 
 */
private void compactPartition(final int partitionNumber) throws IOException {
	// do nothing if table was closed, parameter is invalid or no garbage exists
	if (this.closed || partitionNumber >= this.partitions.size() || this.partitions.get(partitionNumber).isCompacted()) {
		return;
	}
	// release all segments owned by compaction partition
	this.compactionMemory.clearAllMemory(availableMemory);
	this.compactionMemory.allocateSegments(1);
	this.compactionMemory.pushDownPages();
	T tempHolder = this.buildSideSerializer.createInstance();
	final int numPartitions = this.partitions.size();
	InMemoryPartition<T> partition = this.partitions.remove(partitionNumber);
	MemorySegment[] overflowSegments = partition.overflowSegments;
	long pointer;
	int pointerOffset;
	int bucketOffset;
	final int bucketsPerSegment = this.bucketsPerSegmentMask + 1;
	for (int i = 0, bucket = partitionNumber; i < this.buckets.length && bucket < this.numBuckets; i++) {
		MemorySegment segment = this.buckets[i];
		// go over all buckets in the segment belonging to the partition
		for (int k = bucket % bucketsPerSegment; k < bucketsPerSegment && bucket < this.numBuckets; k += numPartitions, bucket += numPartitions) {
			bucketOffset = k * HASH_BUCKET_SIZE;
			if((int)segment.get(bucketOffset + HEADER_PARTITION_OFFSET) != partitionNumber) {
				throw new IOException("Accessed wrong bucket! wanted: " + partitionNumber + " got: " + segment.get(bucketOffset + HEADER_PARTITION_OFFSET));
			}
			// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
			int countInSegment = segment.getInt(bucketOffset + HEADER_COUNT_OFFSET);
			int numInSegment = 0;
			pointerOffset = bucketOffset + BUCKET_POINTER_START_OFFSET;
			while (true) {
				while (numInSegment < countInSegment) {
					pointer = segment.getLong(pointerOffset);
					tempHolder = partition.readRecordAt(pointer, tempHolder);
					pointer = this.compactionMemory.appendRecord(tempHolder);
					segment.putLong(pointerOffset, pointer);
					pointerOffset += POINTER_LEN;
					numInSegment++;
				}
				// this segment is done. check if there is another chained bucket
				final long forwardPointer = segment.getLong(bucketOffset + HEADER_FORWARD_OFFSET);
				if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
					break;
				}
				final int overflowSegNum = (int) (forwardPointer >>> 32);
				segment = overflowSegments[overflowSegNum];
				bucketOffset = (int) forwardPointer;
				countInSegment = segment.getInt(bucketOffset + HEADER_COUNT_OFFSET);
				pointerOffset = bucketOffset + BUCKET_POINTER_START_OFFSET;
				numInSegment = 0;
			}
			segment = this.buckets[i];
		}
	}
	// swap partition with compaction partition
	this.compactionMemory.setPartitionNumber(partitionNumber);
	this.partitions.add(partitionNumber, compactionMemory);
	this.partitions.get(partitionNumber).overflowSegments = partition.overflowSegments;
	this.partitions.get(partitionNumber).numOverflowSegments = partition.numOverflowSegments;
	this.partitions.get(partitionNumber).nextOverflowBucket = partition.nextOverflowBucket;
	this.partitions.get(partitionNumber).setIsCompacted(true);
	//this.partitions.get(partitionNumber).pushDownPages();
	this.compactionMemory = partition;
	this.compactionMemory.resetRecordCounter();
	this.compactionMemory.setPartitionNumber(-1);
	this.compactionMemory.overflowSegments = null;
	this.compactionMemory.numOverflowSegments = 0;
	this.compactionMemory.nextOverflowBucket = 0;
	// try to allocate maximum segment count
	this.compactionMemory.clearAllMemory(this.availableMemory);
	int maxSegmentNumber = this.getMaxPartition();
	this.compactionMemory.allocateSegments(maxSegmentNumber);
	this.compactionMemory.resetRWViews();
	this.compactionMemory.pushDownPages();
}
 
Example 16
Source File: LongHashPartition.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * Update the address in array for given key.
 */
private void updateIndex(
		long key,
		int hashCode,
		long address,
		int size,
		MemorySegment dataSegment,
		int currentPositionInSegment) throws IOException {
	assert (numKeys <= numBuckets / 2);
	int bucketId = findBucket(hashCode);

	// each bucket occupied 16 bytes (long key + long pointer to data address)
	int bucketOffset = bucketId * SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
	MemorySegment segment = buckets[bucketOffset >>> segmentSizeBits];
	int segOffset = bucketOffset & segmentSizeMask;
	long currAddress;

	while (true) {
		currAddress = segment.getLong(segOffset + 8);
		if (segment.getLong(segOffset) != key && currAddress != INVALID_ADDRESS) {
			// hash conflicts, the bucket is occupied by another key

			// TODO test Conflict resolution:
			// now:    +1 +1 +1... cache friendly but more conflict, so we set factor to 0.5
			// other1: +1 +2 +3... less conflict, factor can be 0.75
			// other2: Secondary hashCode... less and less conflict, but need compute hash again
			bucketId = (bucketId + 1) & numBucketsMask;
			if (segOffset + SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES < segmentSize) {
				// if the new bucket still in current segment, we only need to update offset
				// within this segment
				segOffset += SPARSE_BUCKET_ELEMENT_SIZE_IN_BYTES;
			} else {
				// otherwise, we should re-calculate segment and offset
				bucketOffset = bucketId * 16;
				segment = buckets[bucketOffset >>> segmentSizeBits];
				segOffset = bucketOffset & segmentSizeMask;
			}
		} else {
			break;
		}
	}
	if (currAddress == INVALID_ADDRESS) {
		// this is the first value for this key, put the address in array.
		segment.putLong(segOffset, key);
		segment.putLong(segOffset + 8, address);
		numKeys += 1;
		// dataSegment may be null if we only have to rehash bucket area
		if (dataSegment != null) {
			dataSegment.putLong(currentPositionInSegment, toAddrAndLen(INVALID_ADDRESS, size));
		}
		if (numKeys * 2 > numBuckets) {
			resize();
		}
	} else {
		// there are some values for this key, put the address in the front of them.
		dataSegment.putLong(currentPositionInSegment, toAddrAndLen(currAddress, size));
		segment.putLong(segOffset + 8, address);
	}
}
 
Example 17
Source File: CompactingHashTable.java    From flink with Apache License 2.0 4 votes vote down vote up
public T getMatchFor(PT probeSideRecord) {
	if (closed) {
		return null;
	}
	final int searchHashCode = MathUtils.jenkinsHash(this.probeTypeComparator.hash(probeSideRecord));

	final int posHashCode = searchHashCode % numBuckets;

	// get the bucket for the given hash code
	MemorySegment bucket = buckets[posHashCode >> bucketsPerSegmentBits];
	int bucketInSegmentOffset = (posHashCode & bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;

	// get the basic characteristics of the bucket
	final int partitionNumber = bucket.get(bucketInSegmentOffset + HEADER_PARTITION_OFFSET);
	final InMemoryPartition<T> p = partitions.get(partitionNumber);
	final MemorySegment[] overflowSegments = p.overflowSegments;

	this.pairComparator.setReference(probeSideRecord);

	int countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
	int numInSegment = 0;
	int posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;

	// loop over all segments that are involved in the bucket (original bucket plus overflow buckets)
	while (true) {

		while (numInSegment < countInSegment) {

			final int thisCode = bucket.getInt(posInSegment);
			posInSegment += HASH_CODE_LEN;

			// check if the hash code matches
			if (thisCode == searchHashCode) {
				// get the pointer to the pair
				final int pointerOffset = bucketInSegmentOffset + BUCKET_POINTER_START_OFFSET + (numInSegment * POINTER_LEN);
				final long pointer = bucket.getLong(pointerOffset);
				numInSegment++;

				// deserialize the key to check whether it is really equal, or whether we had only a hash collision
				try {
					T result = p.readRecordAt(pointer);

					if (this.pairComparator.equalToReference(result)) {
						this.partition = p;
						this.bucket = bucket;
						this.pointerOffsetInBucket = pointerOffset;
						return result;
					}
				}
				catch (IOException e) {
					throw new RuntimeException("Error deserializing record from the hashtable: " + e.getMessage(), e);
				}
			}
			else {
				numInSegment++;
			}
		}

		// this segment is done. check if there is another chained bucket
		final long forwardPointer = bucket.getLong(bucketInSegmentOffset + HEADER_FORWARD_OFFSET);
		if (forwardPointer == BUCKET_FORWARD_POINTER_NOT_SET) {
			return null;
		}

		final int overflowSegNum = (int) (forwardPointer >>> 32);
		bucket = overflowSegments[overflowSegNum];
		bucketInSegmentOffset = (int) forwardPointer;
		countInSegment = bucket.getInt(bucketInSegmentOffset + HEADER_COUNT_OFFSET);
		posInSegment = bucketInSegmentOffset + BUCKET_HEADER_LENGTH;
		numInSegment = 0;
	}
}
 
Example 18
Source File: BytesHashMap.java    From flink with Apache License 2.0 4 votes vote down vote up
/**
 * @throws EOFException if the map can't allocate much more memory.
 */
private void growAndRehash() throws EOFException {
	// allocate the new data structures
	int required = 2 * bucketSegments.size();
	if (required * (long) numBucketsPerSegment > Integer.MAX_VALUE) {
		LOG.warn("We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)");
		throw new EOFException();
	}
	List<MemorySegment> newBucketSegments = new ArrayList<>(required);

	try {
		int numAllocatedSegments = required - memoryPool.freePages();
		if (numAllocatedSegments > 0) {
			throw new MemoryAllocationException();
		}
		int needNumFromFreeSegments = required - newBucketSegments.size();
		for (int end = needNumFromFreeSegments; end > 0; end--) {
			newBucketSegments.add(memoryPool.nextSegment());
		}

		setBucketVariables(newBucketSegments);
	} catch (MemoryAllocationException e) {
		LOG.warn("BytesHashMap can't allocate {} pages, and now used {} pages",
				required, reservedNumBuffers);
		throw new EOFException();
	}
	long reHashStartTime = System.currentTimeMillis();
	resetBucketSegments(newBucketSegments);
	// Re-mask (we don't recompute the hashcode because we stored all 32 bits of it)
	for (MemorySegment memorySegment : bucketSegments) {
		for (int j = 0; j < numBucketsPerSegment; j++) {
			final long recordPointer = memorySegment.getLong(j * BUCKET_SIZE);
			if (recordPointer != END_OF_LIST) {
				final int hashCode1 = memorySegment.getInt(j * BUCKET_SIZE + ELEMENT_POINT_LENGTH);
				int newPos = hashCode1 & numBucketsMask;
				int bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits;
				int bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS;
				int step = STEP_INCREMENT;
				long hashCode2 = 0;
				while (newBucketSegments.get(bucketSegmentIndex).getLong(bucketOffset) != END_OF_LIST) {
					if (step == 1) {
						hashCode2 = calcSecondHashCode(hashCode1);
					}
					newPos = (int) ((hashCode1 + step * hashCode2) & numBucketsMask);
					// which segment contains the bucket
					bucketSegmentIndex = newPos >>> numBucketsPerSegmentBits;
					// offset of the bucket in the segment
					bucketOffset = (newPos & numBucketsPerSegmentMask) << BUCKET_SIZE_BITS;
					step += STEP_INCREMENT;
				}
				newBucketSegments.get(bucketSegmentIndex).putLong(bucketOffset, recordPointer);
				newBucketSegments.get(bucketSegmentIndex).putInt(bucketOffset + ELEMENT_POINT_LENGTH, hashCode1);
			}
		}
	}
	LOG.info("The rehash take {} ms for {} segments", (System.currentTimeMillis() - reHashStartTime), required);
	this.memoryPool.returnAll(this.bucketSegments);
	this.bucketSegments = newBucketSegments;
}
 
Example 19
Source File: SkipListUtils.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Returns the value pointer.
 *
 * @param memorySegment memory segment for key space.
 * @param offset offset of key space in the memory segment.
 */
public static long getValuePointer(MemorySegment memorySegment, int offset) {
	return memorySegment.getLong(offset + VALUE_POINTER_OFFSET);
}
 
Example 20
Source File: SkipListUtils.java    From flink with Apache License 2.0 2 votes vote down vote up
/**
 * Return the pointer to key space.
 *
 * @param memorySegment memory segment for value space.
 * @param offset offset of value space in memory segment.
 */
public static long getKeyPointer(MemorySegment memorySegment, int offset) {
	return memorySegment.getLong(offset + KEY_POINTER_OFFSET);
}