com.indeed.util.mmap.MMapBuffer Java Examples

The following examples show how to use com.indeed.util.mmap.MMapBuffer. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MMapGOV4Function.java    From mph-table with Apache License 2.0 6 votes vote down vote up
public static <T> MMapGOV4Function<T> readFrom(final String path) throws IOException, NoSuchFieldException,
        IllegalAccessException, ClassNotFoundException {
    final GOV4Function<T> gov4Function;
    try (final InputStream file = new FileInputStream(Files.buildPath(path, "GOV4Function.bin"));
         final ObjectInput input = new ObjectInputStream(file)) {
        gov4Function = (GOV4Function<T>)(input.readObject());
    }
    final Field widthField = gov4Function.getClass().getDeclaredField("width");
    widthField.setAccessible(true);
    final int width = (Integer) widthField.get(gov4Function);
    final MMapBuffer buffer = new MMapBuffer(new File(Files.buildPath(path, "signatures.bin")),
                                             FileChannel.MapMode.READ_ONLY, ByteOrder.LITTLE_ENDIAN);
    final MMapLongBigList signaturesData = new MMapLongBigList(buffer.memory().longArray(0, buffer.memory().length() / 8), width);
    final Field dataField = gov4Function.getClass().getDeclaredField("data");
    dataField.setAccessible(true);
    dataField.set(gov4Function, signaturesData);
    return new MMapGOV4Function<>(gov4Function, buffer, width);
}
 
Example #2
Source File: TableReader.java    From mph-table with Apache License 2.0 6 votes vote down vote up
public TableReader(@Nonnull final TableMeta<K, V> meta,
                   @Nonnull final MMapBuffer data,
                   @Nullable final MMapBuffer offsets) {
    this.meta = meta;
    this.data = data;
    this.dataMemory = data.memory();
    this.offsets = offsets;
    if (offsets != null) {
        if (TableConfig.OffsetStorage.SELECTED.equals(meta.getConfig().getOffsetStorage())) {
            select = new HintedBsearchSelect(new Rank9(new MMapBitVector(offsets)));
            memory = null;
        } else {
            select = null;
            memory = offsets.memory();
        }
    } else {
        select = null;
        memory = null;
    }
    minKey = meta.getMinKey();
    maxKey = meta.getMaxKey();
}
 
Example #3
Source File: TableReader.java    From mph-table with Apache License 2.0 6 votes vote down vote up
/**
 * General interface to opening a TableReader.  Only the metaPath
 * is required, variants below are conveniences with default
 * parameters.
 *
 * @param metaPath          path to the table's meta-data, either the exact file or the containing directory
 * @param offsetsPath       path to the raw offsets if different from the meta-data setting
 * @param dataPath          path to the raw serialized data if different from the meta-data setting
 * @param maxDataHeapUsage  if specified and positive, raw data less than this size will be
 *                            stored directly in the heap instead of mmaped
 * @param <K> key type
 * @param <V> value type
 * @return a new TableReader on the data
 * @throws IOException if unable to open any of the files, or deserialize the metadata
 */
public static <K, V> TableReader<K, V> open(@Nonnull final File metaPath,
                                            @Nullable final File offsetsPath,
                                            @Nullable final File dataPath,
                                            @Nullable final Long maxDataHeapUsage) throws IOException {
    final TableMeta<K, V> meta = TableMeta.load(metaPath, offsetsPath, dataPath);
    final MMapBuffer data =
        new MMapBuffer(meta.getDataPath(), FileChannel.MapMode.READ_ONLY, ByteOrder.nativeOrder());
    final MMapBuffer offsets = TableConfig.OffsetStorage.INDEXED.equals(meta.getConfig().getOffsetStorage()) ||
        (TableConfig.OffsetStorage.SELECTED.equals(meta.getConfig().getOffsetStorage()) && meta.getSelectOffsets() == null) ?
        new MMapBuffer(meta.getOffsetsPath(), FileChannel.MapMode.READ_ONLY, ByteOrder.nativeOrder()) :
        null;
    final long maxDataHeap =
        maxDataHeapUsage != null ? maxDataHeapUsage : meta.getConfig().getMaxDataHeapUsage();
    if (offsets == null && data.memory().length() < maxDataHeap) {
        final byte[] rawData = new byte[(int) data.memory().length()];
        data.memory().getBytes(0, rawData);
        data.close();
        return new TableReader<>(meta, rawData);
    } else {
        return new TableReader<>(meta, data, offsets);
    }
}
 
Example #4
Source File: TableWriter.java    From mph-table with Apache License 2.0 6 votes vote down vote up
private static <K, V> File writeToHashOffsets(
        final File outputDir,
        final TableMeta<K, V> meta,
        final Iterable<Pair<K, V>> entries,
        final long dataSize) throws IOException {
    // integer serialized size of each entry by hash
    final File tempSizes = File.createTempFile("tmpsizes", ".bin");
    // integer hash (offset from start of shard) of each entry by output order
    final File tempHashes = File.createTempFile("tmphashes", ".bin");
    try (final MMapBuffer sizes = new MMapBuffer(tempSizes, 0L, 4L * meta.numEntries(), FileChannel.MapMode.READ_WRITE, ByteOrder.nativeOrder());
         final MMapBuffer hashes = new MMapBuffer(tempHashes, 0L, 4L * meta.numEntries(), FileChannel.MapMode.READ_WRITE, ByteOrder.nativeOrder())) {
        final List<File> shards = splitToShards(outputDir, meta, entries, dataSize, sizes, hashes);
        rewriteShardsInOrder(new File(outputDir, meta.DEFAULT_DATA_PATH), meta, shards, sizes, hashes);
    } finally {
        tempHashes.delete();
    }
    return tempSizes;
}
 
Example #5
Source File: TableWriter.java    From mph-table with Apache License 2.0 6 votes vote down vote up
private static <K, V> void rewriteShardsInOrder(
        final File outputPath,
        final TableMeta<K, V> meta,
        final List<File> shards,
        final MMapBuffer sizes,
        final MMapBuffer hashes) throws IOException {
    final long startMillis = System.currentTimeMillis();
    try (final DataOutputStream out = new DataOutputStream(new BufferedFileDataOutputStream(outputPath))) {
        final int numShards = shards.size();
        final long shardSize = Math.max(1L, (meta.numEntries() + numShards - 1) / numShards);
        for (int i = 0; i < numShards; ++i) {
            final long start = i * shardSize;
            final long end = Math.min((i + 1) * shardSize, meta.numEntries());
            try {
                rewriteShardInOrder(out, meta, shards.get(i), shardSize, sizes, hashes, start, end);
            } finally {
                shards.get(i).delete();
            }
        }
        out.flush();
    }
    outputPath.setReadOnly();
    LOGGER.info("rewrote shards in " + (System.currentTimeMillis() - startMillis) + " ms");
}
 
Example #6
Source File: MMapFastBitSet.java    From imhotep with Apache License 2.0 5 votes vote down vote up
public MMapFastBitSet(File file, int size, FileChannel.MapMode mapMode) throws IOException {
    this.size = size;
    this.arraySize = (size + 64) >> 6;
    this.bufferLength = arraySize * 8;
    this.buffer = new MMapBuffer(file, 0, bufferLength, mapMode, ByteOrder.LITTLE_ENDIAN);
    this.bits = buffer.memory().longArray(0, arraySize);
}
 
Example #7
Source File: BlockCompressedRecordFile.java    From lsmtree with Apache License 2.0 5 votes vote down vote up
public static @Nullable byte[] getMetadata(File file) throws IOException {
    final long length = file.length();
    final MMapBuffer buffer = new MMapBuffer(file, 0, length, FileChannel.MapMode.READ_ONLY, ByteOrder.BIG_ENDIAN);
    final DirectMemory memory = buffer.memory();
    final int metadataLength = memory.getInt(length - 12);
    if (metadataLength == Integer.MAX_VALUE) return null;
    final byte[] metadata = new byte[metadataLength];
    memory.getBytes(length-12-metadataLength, metadata);
    return metadata;
}
 
Example #8
Source File: ImmutableBTreeIndex.java    From lsmtree with Apache License 2.0 5 votes vote down vote up
public Reader(Path path, Comparator<K> comparator, Serializer<K> keySerializer, Serializer<V> valueSerializer, final boolean mlockFiles) throws IOException {
    this.comparator = comparator;
    indexFile = path.resolve("index.bin");
    sizeInBytes = Files.size(indexFile);
    buffer = new MMapBuffer(indexFile, FileChannel.MapMode.READ_ONLY, ByteOrder.LITTLE_ENDIAN);
    try {
        stuffToClose = SharedReference.create((Closeable)buffer);
        final MemoryDataInput in = new MemoryDataInput(buffer.memory());
        if (sizeInBytes < Header.length()) {
            throw new IOException("file is less than header length bytes");
        }
        final byte[] headerBytes = new byte[Header.length()];
        in.seek(sizeInBytes - Header.length());
        in.readFully(headerBytes);
        final LittleEndianDataInputStream headerStream = new LittleEndianDataInputStream(new ByteArrayInputStream(headerBytes));
        final Header header = new HeaderSerializer().read(headerStream);
        hasDeletions = header.hasDeletions;
        size = header.size;
        if (header.fileLength != sizeInBytes) {
            log.error(header.fileLength);
            throw new IOException("file length written to last 8 bytes of file does not match file length, file is inconsistent");
        }
        rootLevel = Level.build(buffer.memory(), keySerializer, valueSerializer, comparator, header.hasDeletions, header.indexLevels);
        rootLevelStartAddress = header.rootLevelStartAddress;
        if (mlockFiles) buffer.mlock(0, buffer.memory().length());
    } catch (Throwable t) {
        Closeables2.closeQuietly(buffer, log);
        Throwables.propagateIfInstanceOf(t, IOException.class);
        throw Throwables.propagate(t);
    }
}
 
Example #9
Source File: TableWriter.java    From mph-table with Apache License 2.0 5 votes vote down vote up
private static <K, V> Select sizesToSelect(final TableConfig<K, V> config,
                                           final File tempSizes,
                                           final long dataSize) throws IOException {
    final long numEntries = tempSizes.length() / 4;
    try (final MMapBuffer sizes = new MMapBuffer(tempSizes, 0L, numEntries * 4, FileChannel.MapMode.READ_ONLY, ByteOrder.nativeOrder())) {
        final DirectMemory sizesMemory = sizes.memory();
        final long maxValue = config.compressOffset(dataSize, numEntries);
        final BitVector bits = LongArrayBitVector.ofLength(maxValue);
        for (long i = 0, offset = 0; i < numEntries; offset += sizesMemory.getInt(i * 4), ++i) {
            final long value = config.compressOffset(offset, i);
            bits.set(value);
        }
        return new HintedBsearchSelect(new Rank9(bits));
    }
}
 
Example #10
Source File: TableWriter.java    From mph-table with Apache License 2.0 5 votes vote down vote up
private static <K, V> void writeToIndexedOffsets(
        final File inputData,
        final File outputData,
        final File outputOffsets,
        final TableMeta<K, V> meta,
        final Iterable<Pair<K, V>> entries,
        final long dataSize) throws IOException {
    final long numEntries = meta.numEntries();
    final int offsetSize = meta.getConfig().bytesPerOffset(numEntries, dataSize);
    final long totalOffsetSize = numEntries * offsetSize;
    final BufferedFileDataOutputStream fileOut = new BufferedFileDataOutputStream(outputData);
    final CountingOutputStream countOut = new CountingOutputStream(fileOut);
    final long startMillis = System.currentTimeMillis();
    try (final MMapBuffer offsets = new MMapBuffer(outputOffsets, 0L, totalOffsetSize, FileChannel.MapMode.READ_WRITE, ByteOrder.nativeOrder());
         final LittleEndianDataOutputStream out = new LittleEndianDataOutputStream(countOut)) {
        for (final Pair<K, V> e : entries) {
            final long hash = meta.getHash(e.getFirst());
            if (hash < 0) {
                throw new IOException("inconsistent mph, known key hashed to -1: " + e.getFirst());
            }
            final long offset = countOut.getCount();
            if (offsetSize == 2) {
                offsets.memory().putShort(hash * 2L, (short) offset);
            } else if (offsetSize == 4) {
                offsets.memory().putInt(hash * 4L, (int) offset);
            } else {
                offsets.memory().putLong(hash * 8L, offset);
            }
            meta.getConfig().write(e.getFirst(), e.getSecond(), out);
        }
        offsets.sync(0L, totalOffsetSize);
        out.flush();
    }
    outputData.setReadOnly();
    outputOffsets.setReadOnly();
    LOGGER.info("wrote " + numEntries + " offsets for " + dataSize + " bytes of data in " +
                (System.currentTimeMillis() - startMillis) + " ms");
}
 
Example #11
Source File: MMapBitVector.java    From mph-table with Apache License 2.0 4 votes vote down vote up
MMapBitVector(final MMapBuffer buffer) {
    this.buffer = buffer;
}
 
Example #12
Source File: MMapGOV4Function.java    From mph-table with Apache License 2.0 4 votes vote down vote up
public MMapGOV4Function(final GOV4Function<T> gov4Function, final MMapBuffer mmapBuffer, final int signatureWidth) {
    this.gov4Function = gov4Function;
    this.mmapBuffer = mmapBuffer;
    this.signatureWidth = signatureWidth;
}
 
Example #13
Source File: MMapShortArrayIntValueLookup.java    From imhotep with Apache License 2.0 4 votes vote down vote up
public MMapShortArrayIntValueLookup(MMapBuffer buffer, int length) {
    this.buffer = buffer;
    this.shortArray = buffer.memory().shortArray(0, length);
}
 
Example #14
Source File: MMapByteArrayIntValueLookup.java    From imhotep with Apache License 2.0 4 votes vote down vote up
public MMapByteArrayIntValueLookup(MMapBuffer buffer, int length) {
    this.buffer = buffer;
    this.byteArray = buffer.memory().byteArray(0, length);
}
 
Example #15
Source File: MMapIntArrayIntValueLookup.java    From imhotep with Apache License 2.0 4 votes vote down vote up
public MMapIntArrayIntValueLookup(MMapBuffer buffer, int length) {
    this.buffer = buffer;
    this.intArray = buffer.memory().intArray(0, length);
}
 
Example #16
Source File: MMapLongArrayIntValueLookup.java    From imhotep with Apache License 2.0 4 votes vote down vote up
public MMapLongArrayIntValueLookup(MMapBuffer buffer, int length) {
    this.buffer = buffer;
    this.longArray = buffer.memory().longArray(0, length);
}
 
Example #17
Source File: MMapSignedByteArrayIntValueLookup.java    From imhotep with Apache License 2.0 4 votes vote down vote up
public MMapSignedByteArrayIntValueLookup(MMapBuffer buffer, int length) {
    this.buffer = buffer;
    this.byteArray = buffer.memory().byteArray(0, length);
}
 
Example #18
Source File: MMapCharArrayIntValueLookup.java    From imhotep with Apache License 2.0 4 votes vote down vote up
public MMapCharArrayIntValueLookup(MMapBuffer buffer, int length) {
    this.buffer = buffer;
    this.charArray = buffer.memory().charArray(0, length);
}
 
Example #19
Source File: MapCache.java    From imhotep with Apache License 2.0 4 votes vote down vote up
@Override
public synchronized void close() throws IOException {
    for (Map.Entry<String, SharedReference<MMapBuffer>> entry : mappingCache.entrySet()) {
        Closeables2.closeQuietly(entry.getValue(), log);
    }
}
 
Example #20
Source File: MMapGOV4Function.java    From mph-table with Apache License 2.0 4 votes vote down vote up
public MMapBuffer getMMapBuffer() {
    return mmapBuffer;
}
 
Example #21
Source File: BasicRecordFile.java    From lsmtree with Apache License 2.0 4 votes vote down vote up
public BasicRecordFile(File file, Serializer<E> serializer) throws IOException {
    this.file = file;
    this.serializer = serializer;
    buffer = new MMapBuffer(file, FileChannel.MapMode.READ_ONLY, ByteOrder.BIG_ENDIAN);
    memory = buffer.memory();
}