org.apache.parquet.hadoop.CodecFactory.BytesCompressor Java Examples

The following examples show how to use org.apache.parquet.hadoop.CodecFactory.BytesCompressor. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ParquetRecordWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
private static BytesCompressor toDeprecatedBytesCompressor(final BytesInputCompressor compressor) {
  return new BytesCompressor() {
    @Override
    public BytesInput compress(BytesInput bytes) throws IOException {
      return compressor.compress(bytes);
    }

    @Override
    public CompressionCodecName getCodecName() {
      return compressor.getCodecName();
    }

    @Override
    public void release() {
      compressor.release();
    }
  };
}
 
Example #2
Source File: InternalParquetRecordWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * @param parquetFileWriter the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param rowGroupSize the size of a block in the file (this will be approximate)
 * @param compressor the codec used to compress
 */
public InternalParquetRecordWriter(
    ParquetFileWriter parquetFileWriter,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long rowGroupSize,
    BytesCompressor compressor,
    boolean validating,
    ParquetProperties props) {
  this.parquetFileWriter = parquetFileWriter;
  this.writeSupport = Objects.requireNonNull(writeSupport, "writeSupport cannot be null");
  this.schema = schema;
  this.extraMetaData = extraMetaData;
  this.rowGroupSize = rowGroupSize;
  this.rowGroupSizeThreshold = rowGroupSize;
  this.nextRowGroupSize = rowGroupSizeThreshold;
  this.compressor = compressor;
  this.validating = validating;
  this.props = props;
  initStore();
}
 
Example #3
Source File: ParquetRecordWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param pageSize the size of a page in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 * @param writerVersion writer compatibility version
 * @param memoryManager memory manager for the write
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    long blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion,
    MemoryManager memoryManager) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = Objects.requireNonNull(memoryManager, "memoryManager cannot be null");
  memoryManager.addWriter(internalWriter, blockSize);
  this.codecFactory = null;
}
 
Example #4
Source File: ParquetColumnChunkPageWriteStore.java    From Bats with Apache License 2.0 5 votes vote down vote up
public ParquetColumnChunkPageWriteStore(BytesCompressor compressor,
                                        MessageType schema,
                                        int initialSlabSize,
                                        int maxCapacityHint,
                                        ByteBufferAllocator allocator) {
  this.schema = schema;
  for (ColumnDescriptor path : schema.getColumns()) {
    writers.put(path,  new ColumnChunkPageWriter(path, compressor, initialSlabSize, maxCapacityHint, allocator));
  }
}
 
Example #5
Source File: ParquetColumnChunkPageWriteStore.java    From Bats with Apache License 2.0 5 votes vote down vote up
private ColumnChunkPageWriter(ColumnDescriptor path,
                              BytesCompressor compressor,
                              int initialSlabSize,
                              int maxCapacityHint,
                              ByteBufferAllocator allocator) {
  this.path = path;
  this.compressor = compressor;
  this.buf = new CapacityByteArrayOutputStream(initialSlabSize, maxCapacityHint, allocator);
  this.totalStatistics = Statistics.createStats(this.path.getPrimitiveType());
}
 
Example #6
Source File: ColumnChunkPageWriteStoreExposer.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static ColumnChunkPageWriteStore newColumnChunkPageWriteStore(
    BytesCompressor compressor,
    MessageType schema,
    ParquetProperties parquetProperties
    ) {
  return new ColumnChunkPageWriteStore(compressor, schema, parquetProperties);
}
 
Example #7
Source File: ParquetRecordWriter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param w the file to write to
 * @param writeSupport the class to convert incoming records
 * @param schema the schema of the records
 * @param extraMetaData extra meta data to write in the footer of the file
 * @param blockSize the size of a block in the file (this will be approximate)
 * @param pageSize the size of a page in the file (this will be approximate)
 * @param compressor the compressor used to compress the pages
 * @param dictionaryPageSize the threshold for dictionary size
 * @param enableDictionary to enable the dictionary
 * @param validating if schema validation should be turned on
 * @param writerVersion writer compatibility version
 */
@Deprecated
public ParquetRecordWriter(
    ParquetFileWriter w,
    WriteSupport<T> writeSupport,
    MessageType schema,
    Map<String, String> extraMetaData,
    int blockSize, int pageSize,
    BytesCompressor compressor,
    int dictionaryPageSize,
    boolean enableDictionary,
    boolean validating,
    WriterVersion writerVersion) {
  ParquetProperties props = ParquetProperties.builder()
      .withPageSize(pageSize)
      .withDictionaryPageSize(dictionaryPageSize)
      .withDictionaryEncoding(enableDictionary)
      .withWriterVersion(writerVersion)
      .build();
  internalWriter = new InternalParquetRecordWriter<T>(w, writeSupport, schema,
      extraMetaData, blockSize, compressor, validating, props);
  this.memoryManager = null;
  this.codecFactory = null;
}
 
Example #8
Source File: ColumnChunkPageWriteStore.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private ColumnChunkPageWriter(ColumnDescriptor path,
                              BytesCompressor compressor,
                              ByteBufferAllocator allocator,
                              int columnIndexTruncateLength,
                              boolean pageWriteChecksumEnabled) {
  this.path = path;
  this.compressor = compressor;
  this.allocator = allocator;
  this.buf = new ConcatenatingByteArrayCollector();
  this.columnIndexBuilder = ColumnIndexBuilder.getBuilder(path.getPrimitiveType(), columnIndexTruncateLength);
  this.offsetIndexBuilder = OffsetIndexBuilder.getBuilder();
  this.pageWriteChecksumEnabled = pageWriteChecksumEnabled;
  this.crc = pageWriteChecksumEnabled ? new CRC32() : null;
}
 
Example #9
Source File: ColumnChunkPageWriteStore.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator,
    int columnIndexTruncateLength, boolean pageWriteChecksumEnabled) {
  this.schema = schema;
  for (ColumnDescriptor path : schema.getColumns()) {
    writers.put(path, new ColumnChunkPageWriter(path, compressor, allocator, columnIndexTruncateLength, pageWriteChecksumEnabled));
  }
}
 
Example #10
Source File: ColumnChunkPageWriteStore.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator,
                                 int columnIndexTruncateLength) {
  this(compressor, schema, allocator, columnIndexTruncateLength,
    ParquetProperties.DEFAULT_PAGE_WRITE_CHECKSUM_ENABLED);
}