org.apache.parquet.format.Encoding Java Examples

The following examples show how to use org.apache.parquet.format.Encoding. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VarLengthValuesColumn.java    From Bats with Apache License 2.0 6 votes vote down vote up
VarLengthValuesColumn(ParquetRecordReader parentReader, ColumnDescriptor descriptor,
                      ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
                      SchemaElement schemaElement) throws ExecutionSetupException {

  super(parentReader, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  variableWidthVector = (VariableWidthVector) valueVec;

  if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
    usingDictionary = true;
    // We didn't implement the fixed length optimization when a Parquet Dictionary is used; as there are
    // no data point about this use-case. Will also enable bulk processing by default since early data
    // profiling (for detecting the best processing strategy to use) is disabled when the column precision
    // is already set.
    bulkReaderState.columnPrecInfo.columnPrecisionType = ColumnPrecisionType.DT_PRECISION_IS_VARIABLE;
    bulkReaderState.columnPrecInfo.bulkProcess         = true;
  }
  else {
    usingDictionary = false;
  }
}
 
Example #2
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
Set<org.apache.parquet.column.Encoding> fromFormatEncodings(List<Encoding> encodings) {
  Set<org.apache.parquet.column.Encoding> converted = new HashSet<org.apache.parquet.column.Encoding>();

  for (Encoding encoding : encodings) {
    converted.add(getEncoding(encoding));
  }

  // make converted unmodifiable, drop reference to modifiable copy
  converted = Collections.unmodifiableSet(converted);

  // atomically update the cache
  Set<org.apache.parquet.column.Encoding> cached = cachedEncodingSets.putIfAbsent(converted, converted);

  if (cached == null) {
    // cached == null signifies that converted was *not* in the cache previously
    // so we can return converted instead of throwing it away, it has now
    // been cached
    cached = converted;
  }

  return cached;
}
 
Example #3
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Deprecated
public void writeDataPageHeader(
    int uncompressedSize,
    int compressedSize,
    int valueCount,
    org.apache.parquet.column.Encoding rlEncoding,
    org.apache.parquet.column.Encoding dlEncoding,
    org.apache.parquet.column.Encoding valuesEncoding,
    OutputStream to) throws IOException {
  writePageHeader(newDataPageHeader(uncompressedSize,
                                    compressedSize,
                                    valueCount,
                                    rlEncoding,
                                    dlEncoding,
                                    valuesEncoding), to);
}
 
Example #4
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
public void writeDataPageV1Header(
    int uncompressedSize,
    int compressedSize,
    int valueCount,
    org.apache.parquet.column.Encoding rlEncoding,
    org.apache.parquet.column.Encoding dlEncoding,
    org.apache.parquet.column.Encoding valuesEncoding,
    int crc,
    OutputStream to) throws IOException {
  writePageHeader(newDataPageHeader(uncompressedSize,
                                    compressedSize,
                                    valueCount,
                                    rlEncoding,
                                    dlEncoding,
                                    valuesEncoding,
                                    crc), to);
}
 
Example #5
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private PageHeader newDataPageHeader(
    int uncompressedSize, int compressedSize,
    int valueCount,
    org.apache.parquet.column.Encoding rlEncoding,
    org.apache.parquet.column.Encoding dlEncoding,
    org.apache.parquet.column.Encoding valuesEncoding,
    int crc) {
  PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize);
  pageHeader.setCrc(crc);
  pageHeader.setData_page_header(new DataPageHeader(
      valueCount,
      getEncoding(valuesEncoding),
      getEncoding(dlEncoding),
      getEncoding(rlEncoding)));
  return pageHeader;
}
 
Example #6
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void writeDictionaryPageHeader(
    int uncompressedSize, int compressedSize, int valueCount,
    org.apache.parquet.column.Encoding valuesEncoding, int crc, OutputStream to) throws IOException {
  PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize);
  pageHeader.setCrc(crc);
  pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding)));
  writePageHeader(pageHeader, to);
}
 
Example #7
Source File: VarLengthColumn.java    From Bats with Apache License 2.0 5 votes vote down vote up
VarLengthColumn(ParquetRecordReader parentReader, ColumnDescriptor descriptor,
                ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
                SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
    if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
      usingDictionary = true;
    }
    else {
      usingDictionary = false;
    }
}
 
Example #8
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void writeDictionaryPageHeader(
  int uncompressedSize, int compressedSize, int valueCount,
  org.apache.parquet.column.Encoding valuesEncoding, OutputStream to) throws IOException {
  PageHeader pageHeader = new PageHeader(PageType.DICTIONARY_PAGE, uncompressedSize, compressedSize);
  pageHeader.setDictionary_page_header(new DictionaryPageHeader(valueCount, getEncoding(valuesEncoding)));
  writePageHeader(pageHeader, to);
}
 
Example #9
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private PageHeader newDataPageV2Header(
    int uncompressedSize, int compressedSize,
    int valueCount, int nullCount, int rowCount,
    org.apache.parquet.column.Encoding dataEncoding,
    int rlByteLength, int dlByteLength) {
  // TODO: pageHeader.crc = ...;
  DataPageHeaderV2 dataPageHeaderV2 = new DataPageHeaderV2(
      valueCount, nullCount, rowCount,
      getEncoding(dataEncoding),
      dlByteLength, rlByteLength);
  PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE_V2, uncompressedSize, compressedSize);
  pageHeader.setData_page_header_v2(dataPageHeaderV2);
  return pageHeader;
}
 
Example #10
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void writeDataPageV2Header(
    int uncompressedSize, int compressedSize,
    int valueCount, int nullCount, int rowCount,
    org.apache.parquet.column.Encoding dataEncoding,
    int rlByteLength, int dlByteLength,
    OutputStream to) throws IOException {
  writePageHeader(
      newDataPageV2Header(
          uncompressedSize, compressedSize,
          valueCount, nullCount, rowCount,
          dataEncoding,
          rlByteLength, dlByteLength), to);
}
 
Example #11
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void writeDataPageV1Header(
  int uncompressedSize,
  int compressedSize,
  int valueCount,
  org.apache.parquet.column.Encoding rlEncoding,
  org.apache.parquet.column.Encoding dlEncoding,
  org.apache.parquet.column.Encoding valuesEncoding,
  OutputStream to) throws IOException {
  writePageHeader(newDataPageHeader(uncompressedSize,
    compressedSize,
    valueCount,
    rlEncoding,
    dlEncoding,
    valuesEncoding), to);
}
 
Example #12
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Deprecated
public void writeDataPageV2Header(
    int uncompressedSize, int compressedSize,
    int valueCount, int nullCount, int rowCount,
    org.apache.parquet.column.statistics.Statistics statistics,
    org.apache.parquet.column.Encoding dataEncoding,
    int rlByteLength, int dlByteLength,
    OutputStream to) throws IOException {
  writePageHeader(
      newDataPageV2Header(
          uncompressedSize, compressedSize,
          valueCount, nullCount, rowCount,
          dataEncoding,
          rlByteLength, dlByteLength), to);
}
 
Example #13
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private PageHeader newDataPageHeader(
  int uncompressedSize, int compressedSize,
  int valueCount,
  org.apache.parquet.column.Encoding rlEncoding,
  org.apache.parquet.column.Encoding dlEncoding,
  org.apache.parquet.column.Encoding valuesEncoding) {
  PageHeader pageHeader = new PageHeader(PageType.DATA_PAGE, uncompressedSize, compressedSize);
  pageHeader.setData_page_header(new DataPageHeader(
    valueCount,
    getEncoding(valuesEncoding),
    getEncoding(dlEncoding),
    getEncoding(rlEncoding)));
  return pageHeader;
}
 
Example #14
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Deprecated
public void writeDataPageHeader(
    int uncompressedSize,
    int compressedSize,
    int valueCount,
    org.apache.parquet.column.statistics.Statistics statistics,
    org.apache.parquet.column.Encoding rlEncoding,
    org.apache.parquet.column.Encoding dlEncoding,
    org.apache.parquet.column.Encoding valuesEncoding,
    OutputStream to) throws IOException {
  writePageHeader(
      newDataPageHeader(uncompressedSize, compressedSize, valueCount,
          rlEncoding, dlEncoding, valuesEncoding),
      to);
}
 
Example #15
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private List<Encoding> toFormatEncodings(Set<org.apache.parquet.column.Encoding> encodings) {
  List<Encoding> converted = new ArrayList<Encoding>(encodings.size());
  for (org.apache.parquet.column.Encoding encoding : encodings) {
    converted.add(getEncoding(encoding));
  }
  return converted;
}
 
Example #16
Source File: MetadataReader.java    From presto with Apache License 2.0 5 votes vote down vote up
private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings)
{
    Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>();
    for (Encoding encoding : encodings) {
        columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name()));
    }
    return Collections.unmodifiableSet(columnEncodings);
}
 
Example #17
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public Encoding getEncoding(org.apache.parquet.column.Encoding encoding) {
  return Encoding.valueOf(encoding.name());
}
 
Example #18
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public org.apache.parquet.column.Encoding getEncoding(Encoding encoding) {
  return org.apache.parquet.column.Encoding.valueOf(encoding.name());
}