Java Code Examples for org.apache.flink.util.MathUtils#roundDownToPowerOf2()

The following examples show how to use org.apache.flink.util.MathUtils#roundDownToPowerOf2() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LongHashPartition.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Entrance 2: build table from spilled partition when the partition fits entirely into main
 * memory.
 */
LongHashPartition(
		LongHybridHashTable longTable,
		int partitionNum,
		BinaryRowSerializer buildSideSerializer,
		int bucketNumSegs,
		int recursionLevel,
		List<MemorySegment> buffers,
		int lastSegmentLimit) {
	this(longTable, buildSideSerializer, listToArray(buffers));
	this.partitionNum = partitionNum;
	this.recursionLevel = recursionLevel;

	int numBuckets = MathUtils.roundDownToPowerOf2(bucketNumSegs * segmentSize / 16);
	MemorySegment[] buckets = new MemorySegment[bucketNumSegs];
	for (int i = 0; i < bucketNumSegs; i++) {
		buckets[i] = longTable.nextSegment();
	}
	setNewBuckets(buckets, numBuckets);
	this.finalBufferLimit = lastSegmentLimit;
}
 
Example 2
Source File: BinaryHashBucketArea.java    From flink with Apache License 2.0 6 votes vote down vote up
private BinaryHashBucketArea(BinaryHashTable table, double estimatedRowCount, int maxSegs, double loadFactor) {
	this.table = table;
	this.estimatedRowCount = estimatedRowCount;
	this.loadFactor = loadFactor;
	this.size = 0;

	int minNumBuckets = (int) Math.ceil((estimatedRowCount / loadFactor / NUM_ENTRIES_PER_BUCKET));
	int bucketNumSegs = MathUtils.roundDownToPowerOf2(Math.max(1, Math.min(maxSegs, (minNumBuckets >>> table.bucketsPerSegmentBits) +
			((minNumBuckets & table.bucketsPerSegmentMask) == 0 ? 0 : 1))));
	int numBuckets = bucketNumSegs << table.bucketsPerSegmentBits;

	int threshold = (int) (numBuckets * NUM_ENTRIES_PER_BUCKET * loadFactor);

	MemorySegment[] buckets = new MemorySegment[bucketNumSegs];
	table.ensureNumBuffersReturned(bucketNumSegs);

	// go over all segments that are part of the table
	for (int i = 0; i < bucketNumSegs; i++) {
		final MemorySegment seg = table.getNextBuffer();
		initMemorySegment(seg);
		buckets[i] = seg;
	}

	setNewBuckets(buckets, numBuckets, threshold);
}
 
Example 3
Source File: LongHashPartition.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Entrance 2: build table from spilled partition when the partition fits entirely into main
 * memory.
 */
LongHashPartition(
		LongHybridHashTable longTable,
		int partitionNum,
		BinaryRowDataSerializer buildSideSerializer,
		int bucketNumSegs,
		int recursionLevel,
		List<MemorySegment> buffers,
		int lastSegmentLimit) {
	this(longTable, buildSideSerializer, listToArray(buffers));
	this.partitionNum = partitionNum;
	this.recursionLevel = recursionLevel;

	int numBuckets = MathUtils.roundDownToPowerOf2(bucketNumSegs * segmentSize / 16);
	MemorySegment[] buckets = new MemorySegment[bucketNumSegs];
	for (int i = 0; i < bucketNumSegs; i++) {
		buckets[i] = longTable.nextSegment();
	}
	setNewBuckets(buckets, numBuckets);
	this.finalBufferLimit = lastSegmentLimit;
}
 
Example 4
Source File: InPlaceMutableHashTable.java    From Flink-CEPplus with Apache License 2.0 5 votes vote down vote up
private int calcInitialNumBucketSegments() {
	int recordLength = buildSideSerializer.getLength();
	double fraction; // fraction of memory to use for the buckets
	if (recordLength == -1) {
		// We don't know the record length, so we start with a small number of buckets, and do resizes if
		// necessary.
		// It seems that resizing is quite efficient, so we can err here on the too few bucket segments side.
		// Even with small records, we lose only ~15% speed.
		fraction = 0.1;
	} else {
		// We know the record length, so we can find a good value for the number of buckets right away, and
		// won't need any resizes later. (enableResize is false in this case, so no resizing will happen.)
		// Reasoning behind the formula:
		// We are aiming for one bucket per record, and one bucket contains one 8 byte pointer. The total
		// memory overhead of an element will be approximately 8+8 bytes, as the record in the record area
		// is preceded by a pointer (for the linked list).
		fraction = 8.0 / (16 + recordLength);
	}

	// We make the number of buckets a power of 2 so that taking modulo is efficient.
	int ret = Math.max(1, MathUtils.roundDownToPowerOf2((int)(numAllMemorySegments * fraction)));

	// We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)
	if ((long)ret * numBucketsPerSegment > Integer.MAX_VALUE) {
		ret = MathUtils.roundDownToPowerOf2(Integer.MAX_VALUE / numBucketsPerSegment);
	}
	return ret;
}
 
Example 5
Source File: BytesHashMap.java    From flink with Apache License 2.0 5 votes vote down vote up
private int calcNumBucketSegments(LogicalType[] keyTypes, LogicalType[] valueTypes) {
	int calcRecordLength = reusedValue.getFixedLengthPartSize() + getVariableLength(valueTypes) +
			reusedKey.getFixedLengthPartSize() + getVariableLength(keyTypes);
	// We aim for a 200% utilization of the bucket table.
	double averageBucketSize = BUCKET_SIZE / LOAD_FACTOR;
	double fraction = averageBucketSize / (averageBucketSize + calcRecordLength + RECORD_EXTRA_LENGTH);
	// We make the number of buckets a power of 2 so that taking modulo is efficient.
	// To avoid rehash as far as possible, here use roundUpToPowerOfTwo firstly
	int ret = Math.max(1, MathUtils.roundDownToPowerOf2((int) (reservedNumBuffers * fraction)));
	// We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)
	if ((long) ret * numBucketsPerSegment > Integer.MAX_VALUE) {
		ret = MathUtils.roundDownToPowerOf2(Integer.MAX_VALUE / numBucketsPerSegment);
	}
	return ret;
}
 
Example 6
Source File: LongHashPartition.java    From flink with Apache License 2.0 5 votes vote down vote up
private void resize() throws IOException {
	MemorySegment[] oldBuckets = this.buckets;
	int oldNumBuckets = numBuckets;
	int newNumSegs = oldBuckets.length * 2;
	int newNumBuckets = MathUtils.roundDownToPowerOf2(newNumSegs * segmentSize / 16);

	// request new buckets.
	MemorySegment[] newBuckets = new MemorySegment[newNumSegs];
	for (int i = 0; i < newNumSegs; i++) {
		MemorySegment seg = longTable.getNextBuffer();
		if (seg == null) {
			final int spilledPart = longTable.spillPartition();
			if (spilledPart == partitionNum) {
				// this bucket is no longer in-memory
				// free new segments.
				longTable.returnAll(Arrays.asList(newBuckets));
				return;
			}
			seg = longTable.getNextBuffer();
			if (seg == null) {
				throw new RuntimeException(
						"Bug in HybridHashJoin: No memory became available after spilling a partition.");
			}
		}
		newBuckets[i] = seg;
	}

	setNewBuckets(newBuckets, newNumBuckets);
	reHash(oldBuckets, oldNumBuckets);
}
 
Example 7
Source File: InPlaceMutableHashTable.java    From flink with Apache License 2.0 5 votes vote down vote up
private int calcInitialNumBucketSegments() {
	int recordLength = buildSideSerializer.getLength();
	double fraction; // fraction of memory to use for the buckets
	if (recordLength == -1) {
		// We don't know the record length, so we start with a small number of buckets, and do resizes if
		// necessary.
		// It seems that resizing is quite efficient, so we can err here on the too few bucket segments side.
		// Even with small records, we lose only ~15% speed.
		fraction = 0.1;
	} else {
		// We know the record length, so we can find a good value for the number of buckets right away, and
		// won't need any resizes later. (enableResize is false in this case, so no resizing will happen.)
		// Reasoning behind the formula:
		// We are aiming for one bucket per record, and one bucket contains one 8 byte pointer. The total
		// memory overhead of an element will be approximately 8+8 bytes, as the record in the record area
		// is preceded by a pointer (for the linked list).
		fraction = 8.0 / (16 + recordLength);
	}

	// We make the number of buckets a power of 2 so that taking modulo is efficient.
	int ret = Math.max(1, MathUtils.roundDownToPowerOf2((int)(numAllMemorySegments * fraction)));

	// We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)
	if ((long)ret * numBucketsPerSegment > Integer.MAX_VALUE) {
		ret = MathUtils.roundDownToPowerOf2(Integer.MAX_VALUE / numBucketsPerSegment);
	}
	return ret;
}
 
Example 8
Source File: BytesHashMap.java    From flink with Apache License 2.0 5 votes vote down vote up
private int calcNumBucketSegments(LogicalType[] keyTypes, LogicalType[] valueTypes) {
	int calcRecordLength = reusedValue.getFixedLengthPartSize() + getVariableLength(valueTypes) +
			reusedKey.getFixedLengthPartSize() + getVariableLength(keyTypes);
	// We aim for a 200% utilization of the bucket table.
	double averageBucketSize = BUCKET_SIZE / LOAD_FACTOR;
	double fraction = averageBucketSize / (averageBucketSize + calcRecordLength + RECORD_EXTRA_LENGTH);
	// We make the number of buckets a power of 2 so that taking modulo is efficient.
	// To avoid rehash as far as possible, here use roundUpToPowerOfTwo firstly
	int ret = Math.max(1, MathUtils.roundDownToPowerOf2((int) (reservedNumBuffers * fraction)));
	// We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)
	if ((long) ret * numBucketsPerSegment > Integer.MAX_VALUE) {
		ret = MathUtils.roundDownToPowerOf2(Integer.MAX_VALUE / numBucketsPerSegment);
	}
	return ret;
}
 
Example 9
Source File: LongHashPartition.java    From flink with Apache License 2.0 5 votes vote down vote up
private void resize() throws IOException {
	MemorySegment[] oldBuckets = this.buckets;
	int oldNumBuckets = numBuckets;
	int newNumSegs = oldBuckets.length * 2;
	int newNumBuckets = MathUtils.roundDownToPowerOf2(newNumSegs * segmentSize / 16);

	// request new buckets.
	MemorySegment[] newBuckets = new MemorySegment[newNumSegs];
	for (int i = 0; i < newNumSegs; i++) {
		MemorySegment seg = longTable.getNextBuffer();
		if (seg == null) {
			final int spilledPart = longTable.spillPartition();
			if (spilledPart == partitionNum) {
				// this bucket is no longer in-memory
				// free new segments.
				longTable.returnAll(Arrays.asList(newBuckets));
				return;
			}
			seg = longTable.getNextBuffer();
			if (seg == null) {
				throw new RuntimeException(
						"Bug in HybridHashJoin: No memory became available after spilling a partition.");
			}
		}
		newBuckets[i] = seg;
	}

	setNewBuckets(newBuckets, newNumBuckets);
	reHash(oldBuckets, oldNumBuckets);
}
 
Example 10
Source File: BinaryHashBucketArea.java    From flink with Apache License 2.0 5 votes vote down vote up
private BinaryHashBucketArea(
		BinaryHashTable table,
		double estimatedRowCount,
		int maxSegs,
		double loadFactor,
		boolean spillingAllowed) {
	this.table = table;
	this.estimatedRowCount = estimatedRowCount;
	this.loadFactor = loadFactor;
	this.spillingAllowed = spillingAllowed;
	this.size = 0;

	int minNumBuckets = (int) Math.ceil((estimatedRowCount / loadFactor / NUM_ENTRIES_PER_BUCKET));
	int bucketNumSegs = MathUtils.roundDownToPowerOf2(Math.max(1, Math.min(maxSegs, (minNumBuckets >>> table.bucketsPerSegmentBits) +
			((minNumBuckets & table.bucketsPerSegmentMask) == 0 ? 0 : 1))));
	int numBuckets = bucketNumSegs << table.bucketsPerSegmentBits;

	int threshold = (int) (numBuckets * NUM_ENTRIES_PER_BUCKET * loadFactor);

	MemorySegment[] buckets = new MemorySegment[bucketNumSegs];
	table.ensureNumBuffersReturned(bucketNumSegs);

	// go over all segments that are part of the table
	for (int i = 0; i < bucketNumSegs; i++) {
		final MemorySegment seg = table.getNextBuffer();
		initMemorySegment(seg);
		buckets[i] = seg;
	}

	setNewBuckets(buckets, numBuckets, threshold);
}
 
Example 11
Source File: InPlaceMutableHashTable.java    From flink with Apache License 2.0 5 votes vote down vote up
private int calcInitialNumBucketSegments() {
	int recordLength = buildSideSerializer.getLength();
	double fraction; // fraction of memory to use for the buckets
	if (recordLength == -1) {
		// We don't know the record length, so we start with a small number of buckets, and do resizes if
		// necessary.
		// It seems that resizing is quite efficient, so we can err here on the too few bucket segments side.
		// Even with small records, we lose only ~15% speed.
		fraction = 0.1;
	} else {
		// We know the record length, so we can find a good value for the number of buckets right away, and
		// won't need any resizes later. (enableResize is false in this case, so no resizing will happen.)
		// Reasoning behind the formula:
		// We are aiming for one bucket per record, and one bucket contains one 8 byte pointer. The total
		// memory overhead of an element will be approximately 8+8 bytes, as the record in the record area
		// is preceded by a pointer (for the linked list).
		fraction = 8.0 / (16 + recordLength);
	}

	// We make the number of buckets a power of 2 so that taking modulo is efficient.
	int ret = Math.max(1, MathUtils.roundDownToPowerOf2((int)(numAllMemorySegments * fraction)));

	// We can't handle more than Integer.MAX_VALUE buckets (eg. because hash functions return int)
	if ((long)ret * numBucketsPerSegment > Integer.MAX_VALUE) {
		ret = MathUtils.roundDownToPowerOf2(Integer.MAX_VALUE / numBucketsPerSegment);
	}
	return ret;
}
 
Example 12
Source File: BytesHashMap.java    From flink with Apache License 2.0 4 votes vote down vote up
public BytesHashMap(
		final Object owner,
		MemoryManager memoryManager,
		long memorySize,
		LogicalType[] keyTypes,
		LogicalType[] valueTypes,
		boolean inferBucketMemory) {
	this.segmentSize = memoryManager.getPageSize();
	this.reservedNumBuffers = (int) (memorySize / segmentSize);
	this.memoryManager = memoryManager;
	try {
		this.freeMemorySegments = memoryManager.allocatePages(owner, reservedNumBuffers);
	} catch (MemoryAllocationException e) {
		throw new IllegalArgumentException("BytesHashMap can't allocate " + reservedNumBuffers + " pages", e);
	}
	this.numBucketsPerSegment = segmentSize / BUCKET_SIZE;
	this.numBucketsPerSegmentBits = MathUtils.log2strict(this.numBucketsPerSegment);
	this.numBucketsPerSegmentMask = (1 << this.numBucketsPerSegmentBits) - 1;
	this.lastBucketPosition = (numBucketsPerSegment - 1) * BUCKET_SIZE;

	checkArgument(keyTypes.length > 0);
	this.keySerializer = new BinaryRowSerializer(keyTypes.length);
	this.reusedKey = this.keySerializer.createInstance();

	if (valueTypes.length == 0) {
		this.valueSerializer = new BinaryRowSerializer(0);
		this.hashSetMode = true;
		this.reusedValue = new BinaryRow(0);
		this.reusedValue.pointTo(MemorySegmentFactory.wrap(new byte[8]), 0, 8);
		LOG.info("BytesHashMap with hashSetMode = true.");
	} else {
		this.valueSerializer = new BinaryRowSerializer(valueTypes.length);
		this.hashSetMode = false;
		this.reusedValue = this.valueSerializer.createInstance();
	}

	this.reuseLookInfo = new LookupInfo();

	this.recordArea = new RecordArea();

	int initBucketSegmentNum;
	if (inferBucketMemory) {
		initBucketSegmentNum = calcNumBucketSegments(keyTypes, valueTypes);
	} else {
		checkArgument(memorySize > INIT_BUCKET_MEMORY_IN_BYTES, "The minBucketMemorySize is not valid!");
		initBucketSegmentNum = MathUtils.roundDownToPowerOf2((int) (INIT_BUCKET_MEMORY_IN_BYTES / segmentSize));
	}

	// allocate and initialize MemorySegments for bucket area
	initBucketSegments(initBucketSegmentNum);

	LOG.info("BytesHashMap with initial memory segments {}, {} in bytes, init allocating {} for bucket area.",
			reservedNumBuffers, reservedNumBuffers * segmentSize, initBucketSegmentNum);
}
 
Example 13
Source File: LongHashPartition.java    From flink with Apache License 2.0 4 votes vote down vote up
private static int getBucketBuffersByRowCount(long rowCount, int maxSegs, int segmentSize) {
	int minNumBuckets = (int) Math.ceil((rowCount / 0.5));
	Preconditions.checkArgument(segmentSize % 16 == 0);
	return MathUtils.roundDownToPowerOf2((int) Math.max(1,
			Math.min(maxSegs, Math.ceil(((double) minNumBuckets) * 16 / segmentSize))));
}
 
Example 14
Source File: BytesHashMap.java    From flink with Apache License 2.0 4 votes vote down vote up
public BytesHashMap(
		final Object owner,
		MemoryManager memoryManager,
		long memorySize,
		LogicalType[] keyTypes,
		LogicalType[] valueTypes,
		boolean inferBucketMemory) {
	this.segmentSize = memoryManager.getPageSize();
	this.reservedNumBuffers = (int) (memorySize / segmentSize);
	this.memoryPool = new LazyMemorySegmentPool(owner, memoryManager, reservedNumBuffers);
	this.numBucketsPerSegment = segmentSize / BUCKET_SIZE;
	this.numBucketsPerSegmentBits = MathUtils.log2strict(this.numBucketsPerSegment);
	this.numBucketsPerSegmentMask = (1 << this.numBucketsPerSegmentBits) - 1;
	this.lastBucketPosition = (numBucketsPerSegment - 1) * BUCKET_SIZE;

	checkArgument(keyTypes.length > 0);
	this.keySerializer = new BinaryRowDataSerializer(keyTypes.length);
	this.reusedKey = this.keySerializer.createInstance();

	if (valueTypes.length == 0) {
		this.valueSerializer = new BinaryRowDataSerializer(0);
		this.hashSetMode = true;
		this.reusedValue = new BinaryRowData(0);
		this.reusedValue.pointTo(MemorySegmentFactory.wrap(new byte[8]), 0, 8);
		LOG.info("BytesHashMap with hashSetMode = true.");
	} else {
		this.valueSerializer = new BinaryRowDataSerializer(valueTypes.length);
		this.hashSetMode = false;
		this.reusedValue = this.valueSerializer.createInstance();
	}

	this.reuseLookInfo = new LookupInfo();

	this.recordArea = new RecordArea();

	int initBucketSegmentNum;
	if (inferBucketMemory) {
		initBucketSegmentNum = calcNumBucketSegments(keyTypes, valueTypes);
	} else {
		checkArgument(memorySize > INIT_BUCKET_MEMORY_IN_BYTES, "The minBucketMemorySize is not valid!");
		initBucketSegmentNum = MathUtils.roundDownToPowerOf2((int) (INIT_BUCKET_MEMORY_IN_BYTES / segmentSize));
	}

	// allocate and initialize MemorySegments for bucket area
	initBucketSegments(initBucketSegmentNum);

	LOG.info("BytesHashMap with initial memory segments {}, {} in bytes, init allocating {} for bucket area.",
			reservedNumBuffers, reservedNumBuffers * segmentSize, initBucketSegmentNum);
}
 
Example 15
Source File: LongHashPartition.java    From flink with Apache License 2.0 4 votes vote down vote up
private static int getBucketBuffersByRowCount(long rowCount, int maxSegs, int segmentSize) {
	int minNumBuckets = (int) Math.ceil((rowCount / 0.5));
	Preconditions.checkArgument(segmentSize % 16 == 0);
	return MathUtils.roundDownToPowerOf2((int) Math.max(1,
			Math.min(maxSegs, Math.ceil(((double) minNumBuckets) * 16 / segmentSize))));
}