Java Code Examples for org.apache.parquet.bytes.BytesInput#from()

The following examples show how to use org.apache.parquet.bytes.BytesInput#from() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: RunLengthBitPackingHybridEncoder.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public BytesInput toBytes() throws IOException {
  Preconditions.checkArgument(!toBytesCalled,
      "You cannot call toBytes() more than once without calling reset()");

  // write anything that is buffered / queued up for an rle-run
  if (repeatCount >= 8) {
    writeRleRun();
  } else if(numBufferedValues > 0) {
    for (int i = numBufferedValues; i < 8; i++) {
      bufferedValues[i] = 0;
    }
    writeOrAppendBitPackedRun();
    endPreviousBitPackedRun();
  } else {
    endPreviousBitPackedRun();
  }

  toBytesCalled = true;
  return BytesInput.from(baos);
}
 
Example 2
Source File: CodecFactory.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Override
public BytesInput compress(BytesInput bytes) throws IOException {
  final BytesInput compressedBytes;
  if (codec == null) {
    compressedBytes = bytes;
  } else {
    compressedOutBuffer.reset();
    if (compressor != null) {
      // null compressor for non-native gzip
      compressor.reset();
    }
    CompressionOutputStream cos = codec.createOutputStream(compressedOutBuffer, compressor);
    bytes.writeAllTo(cos);
    cos.finish();
    cos.close();
    compressedBytes = BytesInput.from(compressedOutBuffer);
  }
  return compressedBytes;
}
 
Example 3
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private DictionaryPage readCompressedDictionary(
    PageHeader pageHeader, SeekableInputStream fin) throws IOException {
  DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header();

  int uncompressedPageSize = pageHeader.getUncompressed_page_size();
  int compressedPageSize = pageHeader.getCompressed_page_size();

  byte [] dictPageBytes = new byte[compressedPageSize];
  fin.readFully(dictPageBytes);

  BytesInput bin = BytesInput.from(dictPageBytes);

  return new DictionaryPage(
      bin, uncompressedPageSize, dictHeader.getNum_values(),
      converter.getEncoding(dictHeader.getEncoding()));
}
 
Example 4
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public BytesInput readAsBytesInput(int size) throws IOException {
  int available = stream.available();
  if (size > available) {
    // this is to workaround a bug where the compressedLength
    // of the chunk is missing the size of the header of the dictionary
    // to allow reading older files (using dictionary) we need this.
    // usually 13 to 19 bytes are missing
    int missingBytes = size - available;
    LOG.info("completed the column chunk with {} bytes", missingBytes);

    List<ByteBuffer> buffers = new ArrayList<>();
    buffers.addAll(stream.sliceBuffers(available));

    ByteBuffer lastBuffer = ByteBuffer.allocate(missingBytes);
    f.readFully(lastBuffer);
    buffers.add(lastBuffer);

    return BytesInput.from(buffers);
  }

  return super.readAsBytesInput(size);
}
 
Example 5
Source File: ColumnChunkIncReadStore.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException {
  ByteBuffer data = uncompressPage(pageHeader, false);
  return new DictionaryPage(
      BytesInput.from(data, 0, pageHeader.uncompressed_page_size),
      pageHeader.getDictionary_page_header().getNum_values(),
      parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
  );
}
 
Example 6
Source File: FileEncodingsIT.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static DictionaryPage reusableCopy(DictionaryPage dict) {
  if (dict == null) {
    return null;
  }
  try {
    return new DictionaryPage(
        BytesInput.from(dict.getBytes().toByteArray()),
        dict.getDictionarySize(), dict.getEncoding());
  } catch (IOException e) {
    throw new ParquetDecodingException("Cannot read dictionary", e);
  }
}
 
Example 7
Source File: TestZstandardCodec.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private BytesInput decompress(ZstandardCodec codec, BytesInput bytes, int uncompressedSize) throws IOException {
  BytesInput decompressed;
  InputStream is = codec.createInputStream(bytes.toInputStream(), null);
  decompressed = BytesInput.from(is, uncompressedSize);
  is.close();
  return decompressed;
}
 
Example 8
Source File: TestZstandardCodec.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private BytesInput compress(ZstandardCodec codec, BytesInput bytes) throws IOException {
  ByteArrayOutputStream compressedOutBuffer = new ByteArrayOutputStream((int)bytes.size());
  CompressionOutputStream cos = codec.createOutputStream(compressedOutBuffer, null);
  bytes.writeAllTo(cos);
  cos.close();
  return BytesInput.from(compressedOutBuffer);
}
 
Example 9
Source File: CodecFactory.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
  final BytesInput decompressed;
  if (codec != null) {
    if (decompressor != null) {
      decompressor.reset();
    }
    InputStream is = codec.createInputStream(bytes.toInputStream(), decompressor);
    decompressed = BytesInput.from(is, uncompressedSize);
  } else {
    decompressed = bytes;
  }
  return decompressed;
}
 
Example 10
Source File: CompressionConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public BytesInput readBlock(int length, TransParquetFileReader reader) throws IOException {
  byte[] data;
  if (length > pageBufferSize) {
    data = new byte[length];
  } else {
    data = pageBuffer;
  }
  reader.blockRead(data, 0, length);
  return BytesInput.from(data, 0, length);
}
 
Example 11
Source File: DirectCodecFactory.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
  decompressor.reset();
  byte[] inputBytes = bytes.toByteArray();
  decompressor.setInput(inputBytes, 0, inputBytes.length);
  byte[] output = new byte[uncompressedSize];
  decompressor.decompress(output, 0, uncompressedSize);
  return BytesInput.from(output);
}
 
Example 12
Source File: FixedLenByteArrayPlainValuesWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput getBytes() {
  try {
    out.flush();
  } catch (IOException e) {
    throw new ParquetEncodingException("could not write page", e);
  }
  LOG.debug("writing a buffer of size {}", arrayOut.size());
  return BytesInput.from(arrayOut);
}
 
Example 13
Source File: PlainValuesWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput getBytes() {
  try {
    out.flush();
  } catch (IOException e) {
    throw new ParquetEncodingException("could not write page", e);
  }
  if (LOG.isDebugEnabled()) LOG.debug("writing a buffer of size {}", arrayOut.size());
  return BytesInput.from(arrayOut);
}
 
Example 14
Source File: BitPackingValuesWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 * {@inheritDoc}
 * @see org.apache.parquet.column.values.ValuesWriter#getBytes()
 */
@Override
public BytesInput getBytes() {
  try {
    this.bitPackingWriter.finish();
    return BytesInput.from(out);
  } catch (IOException e) {
    throw new ParquetEncodingException(e);
  }
}
 
Example 15
Source File: ByteStreamSplitValuesWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesInput getBytes() {
  BytesInput[] allInputs = new BytesInput[this.numStreams];
  for (int i = 0; i < this.numStreams; ++i) {
    allInputs[i] = BytesInput.from(this.byteStreams[i]);
  }
  return BytesInput.concat(allInputs);
}
 
Example 16
Source File: DictionaryPageReader.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private static DictionaryPage reusableCopy(DictionaryPage dict)
    throws IOException {
  return new DictionaryPage(BytesInput.from(dict.getBytes().toByteArray()),
      dict.getDictionarySize(), dict.getEncoding());
}
 
Example 17
Source File: CompressionConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public BytesInput readBlockAllocate(int length, TransParquetFileReader reader) throws IOException {
  byte[] data = new byte[length];
  reader.blockRead(data, 0, length);
  return BytesInput.from(data, 0, length);
}
 
Example 18
Source File: PageReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
public static BytesInput asBytesInput(ArrowBuf buf, int offset, int length) throws IOException {
  return BytesInput.from(buf.nioBuffer(offset, length), 0, length);
}
 
Example 19
Source File: PageReader.java    From Bats with Apache License 2.0 4 votes vote down vote up
public static BytesInput asBytesInput(DrillBuf buf, int offset, int length) throws IOException {
  return BytesInput.from(buf.nioBuffer(offset, length));
}
 
Example 20
Source File: ParquetFileReader.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * @param size the size of the page
 * @return the page
 * @throws IOException if there is an error while reading from the file stream
 */
public BytesInput readAsBytesInput(int size) throws IOException {
  return BytesInput.from(stream.sliceBuffers(size));
}