com.indeed.util.io.BufferedFileDataOutputStream Java Examples

The following examples show how to use com.indeed.util.io.BufferedFileDataOutputStream. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TableWriter.java    From mph-table with Apache License 2.0 6 votes vote down vote up
private static <K, V> void rewriteShardsInOrder(
        final File outputPath,
        final TableMeta<K, V> meta,
        final List<File> shards,
        final MMapBuffer sizes,
        final MMapBuffer hashes) throws IOException {
    final long startMillis = System.currentTimeMillis();
    try (final DataOutputStream out = new DataOutputStream(new BufferedFileDataOutputStream(outputPath))) {
        final int numShards = shards.size();
        final long shardSize = Math.max(1L, (meta.numEntries() + numShards - 1) / numShards);
        for (int i = 0; i < numShards; ++i) {
            final long start = i * shardSize;
            final long end = Math.min((i + 1) * shardSize, meta.numEntries());
            try {
                rewriteShardInOrder(out, meta, shards.get(i), shardSize, sizes, hashes, start, end);
            } finally {
                shards.get(i).delete();
            }
        }
        out.flush();
    }
    outputPath.setReadOnly();
    LOGGER.info("rewrote shards in " + (System.currentTimeMillis() - startMillis) + " ms");
}
 
Example #2
Source File: FileBasedCheckpointer.java    From util with Apache License 2.0 6 votes vote down vote up
@Override
public synchronized void setCheckpoint(T checkpoint) throws IOException {
    final Path checkpointFileDir = checkpointFilePath.getParent();
    final Path checkpointFilePathNext = checkpointFileDir.resolve(checkpointFilePath.getFileName() + ".next");
    try (BufferedFileDataOutputStream out = new BufferedFileDataOutputStream(checkpointFilePathNext)) {
        out.write(stringifier.toString(checkpoint).getBytes(Charsets.UTF_8));
        out.sync();
    }

    try {
        Files.move(checkpointFilePathNext, checkpointFilePath, StandardCopyOption.REPLACE_EXISTING);
        try (FileChannel dirChannel = FileChannel.open(checkpointFileDir, StandardOpenOption.READ)) {
            dirChannel.force(true);
        }
    } catch (final IOException e) {
        throw new IOException("failed to rename " + checkpointFilePathNext + " to " + checkpointFilePath, e);
    }

    value = checkpoint;
}
 
Example #3
Source File: TableWriter.java    From mph-table with Apache License 2.0 5 votes vote down vote up
/**
 * As above, using a one-time iterator.  The entries are written
 * to local temp data, making this suitable for use e.g. when
 * reading from a slow source such as hdfs.
 *
 * @param <K> key type
 * @param <V> value type
 * @param outputDir directory to write the hash table files to
 * @param config    a {@link TableConfig} specifying at least a key serializer
 * @param entries   an iterable of key-value Pairs representing entries in the table
 * @param tempDir   directory to write temporary files to
 * @throws IOException if unable to write the files or serialize the data
 */
public static <K, V> void writeWithTempStorage(
        final File outputDir,
        final TableConfig<K, V> config,
        final Iterator<Pair<K, V>> entries,
        final File tempDir) throws IOException {
    if (!config.isValid()) {
        throw new IOException("invalid table config: " + config);
    }
    ensureOutputDirectory(tempDir);
    final File tempDataFile = File.createTempFile("tmp_entries", ".bin", tempDir);
    final BufferedFileDataOutputStream fileOut = new BufferedFileDataOutputStream(tempDataFile);
    long dataSize = 0;
    try (final LittleEndianDataOutputStream out = new LittleEndianDataOutputStream(fileOut)) {
        while (entries.hasNext()) {
            final Pair<K, V> e = entries.next();
            if (e.getFirst() == null || (e.getSecond() == null && config.getValueSerializer() != null)) {
                throw new IllegalArgumentException("can't store nulls: " + e);
            }
            dataSize += config.sizeOf(e.getFirst(), e.getSecond());
            config.getKeySerializer().write(e.getFirst(), out); // write here even if implicit
            if (config.getValueSerializer() != null) {
                config.getValueSerializer().write(e.getSecond(), out);
            }
        }
    }
    try {
        final Iterable<Pair<K, V>> tempEntries =
            new SerializedKeyValueIterable(tempDataFile, config.getKeySerializer(), config.getValueSerializer());
        write(outputDir, config, tempEntries, dataSize);
    } finally {
        tempDataFile.delete();
    }
}
 
Example #4
Source File: TableWriter.java    From mph-table with Apache License 2.0 5 votes vote down vote up
private static <K, V> void writeToIndexedOffsets(
        final File inputData,
        final File outputData,
        final File outputOffsets,
        final TableMeta<K, V> meta,
        final Iterable<Pair<K, V>> entries,
        final long dataSize) throws IOException {
    final long numEntries = meta.numEntries();
    final int offsetSize = meta.getConfig().bytesPerOffset(numEntries, dataSize);
    final long totalOffsetSize = numEntries * offsetSize;
    final BufferedFileDataOutputStream fileOut = new BufferedFileDataOutputStream(outputData);
    final CountingOutputStream countOut = new CountingOutputStream(fileOut);
    final long startMillis = System.currentTimeMillis();
    try (final MMapBuffer offsets = new MMapBuffer(outputOffsets, 0L, totalOffsetSize, FileChannel.MapMode.READ_WRITE, ByteOrder.nativeOrder());
         final LittleEndianDataOutputStream out = new LittleEndianDataOutputStream(countOut)) {
        for (final Pair<K, V> e : entries) {
            final long hash = meta.getHash(e.getFirst());
            if (hash < 0) {
                throw new IOException("inconsistent mph, known key hashed to -1: " + e.getFirst());
            }
            final long offset = countOut.getCount();
            if (offsetSize == 2) {
                offsets.memory().putShort(hash * 2L, (short) offset);
            } else if (offsetSize == 4) {
                offsets.memory().putInt(hash * 4L, (int) offset);
            } else {
                offsets.memory().putLong(hash * 8L, offset);
            }
            meta.getConfig().write(e.getFirst(), e.getSecond(), out);
        }
        offsets.sync(0L, totalOffsetSize);
        out.flush();
    }
    outputData.setReadOnly();
    outputOffsets.setReadOnly();
    LOGGER.info("wrote " + numEntries + " offsets for " + dataSize + " bytes of data in " +
                (System.currentTimeMillis() - startMillis) + " ms");
}
 
Example #5
Source File: TableWriter.java    From mph-table with Apache License 2.0 5 votes vote down vote up
private static void writeLongs(final File outputFile, final long[] values) throws IOException {
    try (final LittleEndianDataOutputStream out = new LittleEndianDataOutputStream(new BufferedFileDataOutputStream(outputFile))) {
        for (final long value : values) {
            out.writeLong(value);
        }
        out.flush();
    }
    outputFile.setReadOnly();
}
 
Example #6
Source File: ImmutableBTreeIndex.java    From lsmtree with Apache License 2.0 5 votes vote down vote up
/**
 * @param path root lsm tree index directory
 * @param iterator the iterator
 * @param keySerializer the key serializer
 * @param valueSerializer the value serializer
 * @param blocksize block size
 * @param keepDeletions true to keep deletion
 * @param <K> the key type
 * @param <V> the value type
 * @throws IOException  if an I/O error occurs
 */
public static <K, V> void write(
        Path path,
        Iterator<Generation.Entry<K,V>> iterator,
        Serializer<K> keySerializer,
        Serializer<V> valueSerializer,
        final int blocksize,
        boolean keepDeletions
) throws IOException {
    if (blocksize > 65536) throw new IllegalArgumentException("block size must be less than 65536");
    Files.createDirectories(path);
    final BufferedFileDataOutputStream fileOut = new BufferedFileDataOutputStream(path.resolve("index.bin"));
    final CountingOutputStream out = new CountingOutputStream(fileOut);
    //tempFile is deleted in writeIndex
    final Path tempPath = Files.createTempFile("tmp", ".bin");
    final WriteLevelResult result = writeLevel(out, tempPath, iterator, keySerializer, valueSerializer, blocksize, keepDeletions);
    final int tmpCount = result.tmpCount;
    final long size = result.size;

    final long valueLevelLength = out.getCount();
    final Header header = writeIndex(out, tempPath, tmpCount, keySerializer, blocksize);
    header.valueLevelLength = valueLevelLength;
    header.size = size;
    header.hasDeletions = keepDeletions;
    new HeaderSerializer().write(header, new LittleEndianDataOutputStream(out));
    fileOut.sync();
    out.close();
}
 
Example #7
Source File: GenericRecordLogAppender.java    From lsmtree with Apache License 2.0 5 votes vote down vote up
/**
 * Writes a string to a file, overwriting it if it exists.
 *
 * @param f             file to write
 * @param str           value to store
 * @throws IOException  if an I/O error occurs
 */
public static void writeStringToFile(File f, String str) throws IOException {
    File nextPath = new File(f.getParentFile(), f.getName()+".next");
    BufferedFileDataOutputStream out = new BufferedFileDataOutputStream(nextPath);
    out.write(str.getBytes(Charsets.UTF_8));
    out.sync();
    out.close();
    nextPath.renameTo(f);
}
 
Example #8
Source File: BlockCompressedRecordFile.java    From lsmtree with Apache License 2.0 4 votes vote down vote up
public static <E> Writer<E> open(File file, Serializer<E> serializer, CompressionCodec codec, int blockSize, int recordIndexBits, int padBits) throws FileNotFoundException {
    final SyncableDataOutput out = new BufferedFileDataOutputStream(file, ByteOrder.BIG_ENDIAN, 16384);
    return new Writer<E>(out, serializer, codec, blockSize, recordIndexBits, padBits);
}
 
Example #9
Source File: BasicRecordFile.java    From lsmtree with Apache License 2.0 4 votes vote down vote up
public Writer(File file, Serializer<E> serializer) throws FileNotFoundException {
    this.serializer = serializer;
    out = new BufferedFileDataOutputStream(file, ByteOrder.BIG_ENDIAN, 65536);
}