org.apache.parquet.column.values.RequiresPreviousReader Java Examples

The following examples show how to use org.apache.parquet.column.values.RequiresPreviousReader. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: VectorizedPageIterator.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
  ValuesReader previousReader = plainValuesReader;
  if (dataEncoding.usesDictionary()) {
    if (dictionary == null) {
      throw new ParquetDecodingException(
          "could not read page in col " + desc + " as the dictionary was missing for encoding " + dataEncoding);
    }
    try {
      dictionaryEncodedValuesReader =
          new VectorizedDictionaryEncodedParquetValuesReader(desc.getMaxDefinitionLevel(), setArrowValidityVector);
      dictionaryEncodedValuesReader.initFromPage(valueCount, in);
      if (ParquetUtil.isIntType(desc.getPrimitiveType()) || !allPagesDictEncoded) {
        dictionaryDecodeMode = DictionaryDecodeMode.EAGER;
      } else {
        dictionaryDecodeMode = DictionaryDecodeMode.LAZY;
      }
    } catch (IOException e) {
      throw new ParquetDecodingException("could not read page in col " + desc, e);
    }
  } else {
    plainValuesReader = new ValuesAsBytesReader();
    plainValuesReader.initFromPage(valueCount, in);
    dictionaryDecodeMode = DictionaryDecodeMode.NONE;
  }
  if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
      previousReader != null && previousReader instanceof RequiresPreviousReader) {
    // previous reader can only be set if reading sequentially
    ((RequiresPreviousReader) plainValuesReader).setPreviousReader(previousReader);
  }
}
 
Example #2
Source File: PageIterator.java    From iceberg with Apache License 2.0 5 votes vote down vote up
@Override
  protected void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
    ValuesReader previousReader = values;

    this.valueEncoding = dataEncoding;

    // TODO: May want to change this so that this class is not dictionary-aware.
    // For dictionary columns, this class could rely on wrappers to correctly handle dictionaries
    // This isn't currently possible because RLE must be read by getDictionaryBasedValuesReader
    if (dataEncoding.usesDictionary()) {
      if (dictionary == null) {
        throw new ParquetDecodingException(
            "could not read page in col " + desc + " as the dictionary was missing for encoding " + dataEncoding);
      }
      this.values = dataEncoding.getDictionaryBasedValuesReader(desc, ValuesType.VALUES, dictionary);
    } else {
      this.values = dataEncoding.getValuesReader(desc, ValuesType.VALUES);
    }

//    if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
//      bindToDictionary(dictionary);
//    } else {
//      bind(path.getType());
//    }

    try {
      values.initFromPage(valueCount, in);
    } catch (IOException e) {
      throw new ParquetDecodingException("could not read page in col " + desc, e);
    }

    if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
        previousReader instanceof RequiresPreviousReader) {
      // previous reader can only be set if reading sequentially
      ((RequiresPreviousReader) values).setPreviousReader(previousReader);
    }
  }
 
Example #3
Source File: PageIterator.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
    ValuesReader previousReader = values;

    this.valueEncoding = dataEncoding;

    // TODO: May want to change this so that this class is not dictionary-aware.
    // For dictionary columns, this class could rely on wrappers to correctly handle dictionaries
    // This isn't currently possible because RLE must be read by getDictionaryBasedValuesReader
    if (dataEncoding.usesDictionary()) {
      if (dict == null) {
        throw new ParquetDecodingException(
            "could not read page in col " + desc + " as the dictionary was missing for encoding " + dataEncoding);
      }
      this.values = dataEncoding.getDictionaryBasedValuesReader(desc, VALUES, dict);
    } else {
      this.values = dataEncoding.getValuesReader(desc, VALUES);
    }

//    if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
//      bindToDictionary(dictionary);
//    } else {
//      bind(path.getType());
//    }

    try {
      values.initFromPage(valueCount, in);
    } catch (IOException e) {
      throw new ParquetDecodingException("could not read page in col " + desc, e);
    }

    if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
        previousReader != null && previousReader instanceof RequiresPreviousReader) {
      // previous reader can only be set if reading sequentially
      ((RequiresPreviousReader) values).setPreviousReader(previousReader);
    }
  }
 
Example #4
Source File: ColumnReaderBase.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
  ValuesReader previousReader = this.dataColumn;

  this.currentEncoding = dataEncoding;
  this.pageValueCount = valueCount;
  this.endOfPageValueCount = readValues + pageValueCount;

  if (dataEncoding.usesDictionary()) {
    if (dictionary == null) {
      throw new ParquetDecodingException(
          "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
    }
    this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
  } else {
    this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
  }

  if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
    bindToDictionary(dictionary);
  } else {
    bind(path.getType());
  }

  try {
    dataColumn.initFromPage(pageValueCount, in);
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page in col " + path, e);
  }

  if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
      previousReader != null && previousReader instanceof RequiresPreviousReader) {
    // previous reader can only be set if reading sequentially
    ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
  }
}