org.apache.parquet.io.api.PrimitiveConverter Java Examples

The following examples show how to use org.apache.parquet.io.api.PrimitiveConverter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: ColumnReaderBase.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * creates a reader for triplets
 * @param path the descriptor for the corresponding column
 * @param pageReader the underlying store to read from
 * @param converter a converter that materializes the values in this column in the current record
 * @param writerVersion writer version string from the Parquet file being read
 */
ColumnReaderBase(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
  this.path = Objects.requireNonNull(path, "path cannot be null");
  this.pageReader = Objects.requireNonNull(pageReader, "pageReader cannot be null");
  this.converter = Objects.requireNonNull(converter, "converter cannot be null");
  this.writerVersion = writerVersion;
  this.maxDefinitionLevel = path.getMaxDefinitionLevel();
  DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
  if (dictionaryPage != null) {
    try {
      this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
      if (converter.hasDictionarySupport()) {
        converter.setDictionary(dictionary);
      }
    } catch (IOException e) {
      throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
    }
  } else {
    this.dictionary = null;
  }
  this.totalValueCount = pageReader.getTotalValueCount();
  if (totalValueCount <= 0) {
    throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
  }
}
 
Example #2
Source File: CheckParquet251Command.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateStatsForPage(DataPage page, DictionaryPage dict,
                                  ColumnDescriptor desc) {
  SingletonPageReader reader = new SingletonPageReader(dict, page);
  PrimitiveConverter converter = getValidatingConverter(page, desc.getType());
  Statistics stats = getStatisticsFromPageHeader(page);

  long numNulls = 0;

  ColumnReader column = COL_READER_CTOR.newInstance(desc, reader, converter, null);
  for (int i = 0; i < reader.getTotalValueCount(); i += 1) {
    if (column.getCurrentDefinitionLevel() >= desc.getMaxDefinitionLevel()) {
      column.writeCurrentValueToConverter();
    } else {
      numNulls += 1;
    }
    column.consume();
  }

  if (numNulls != stats.getNumNulls()) {
    throw new BadStatsException("Number of nulls doesn't match.");
  }

  console.debug(String.format(
      "Validated stats min=%s max=%s nulls=%d for page=%s col=%s",
      stats.minAsString(),
      stats.maxAsString(), stats.getNumNulls(), page,
      Arrays.toString(desc.getPath())));
}
 
Example #3
Source File: ThriftRecordConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public PrimitiveFieldHandler(PrimitiveConverter delegate, final ThriftField field, List<TProtocol> events) {
  this.delegate = delegate;
  this.events = events;
  final byte thriftType =
      field.getType().getType() == ThriftTypeID.ENUM ?
          ThriftTypeID.I32.getThriftType() : // enums are serialized as I32
          field.getType().getType().getThriftType();
  this.readFieldBegin = new ParquetProtocol("readFieldBegin()") {
    @Override
    public TField readFieldBegin() throws TException {
      return new TField(field.getName(), thriftType, field.getFieldId());
    }
  };
}
 
Example #4
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void validateStatsForPage(DataPage page, DictionaryPage dict, ColumnDescriptor desc) {
  SingletonPageReader reader = new SingletonPageReader(dict, page);
  PrimitiveConverter converter = getValidatingConverter(page, desc.getType());
  Statistics<?> stats = getStatisticsFromPageHeader(page);

  assertEquals("Statistics does not use the proper comparator",
      desc.getPrimitiveType().comparator().getClass(),
      stats.comparator().getClass());

  if (stats.isEmpty()) {
    // stats are empty if num nulls = 0 and there are no non-null values
    // this happens if stats are not written (e.g., when stats are too big)
    return;
  }

  long numNulls = 0;
  ColumnReaderImpl column = new ColumnReaderImpl(desc, reader, converter, null);
  for (int i = 0; i < reader.getTotalValueCount(); i += 1) {
    if (column.getCurrentDefinitionLevel() >= desc.getMaxDefinitionLevel()) {
      column.writeCurrentValueToConverter();
    } else {
      numNulls += 1;
    }
    column.consume();
  }

  Assert.assertEquals(numNulls, stats.getNumNulls());
}
 
Example #5
Source File: FileEncodingsIT.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static void validateValuesForPage(int rowGroupID, int pageID, DictionaryPage dictPage, DataPage page, ColumnDescriptor columnDesc, List<?> expectedValues) {
  TestStatistics.SingletonPageReader pageReader = new TestStatistics.SingletonPageReader(dictPage, page);
  PrimitiveConverter converter = getConverter(rowGroupID, pageID, columnDesc.getType(), expectedValues);
  ColumnReaderImpl column = new ColumnReaderImpl(columnDesc, pageReader, converter, null);
  for (int i = 0; i < pageReader.getTotalValueCount(); i += 1) {
    column.writeCurrentValueToConverter();
    column.consume();
  }
}
 
Example #6
Source File: RecordReaderImplementation.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private State(int id, PrimitiveColumnIO primitiveColumnIO, ColumnReader column, int[] nextLevel, GroupConverter[] groupConverterPath, PrimitiveConverter primitiveConverter) {
  this.id = id;
  this.primitiveColumnIO = primitiveColumnIO;
  this.maxDefinitionLevel = primitiveColumnIO.getDefinitionLevel();
  this.maxRepetitionLevel = primitiveColumnIO.getRepetitionLevel();
  this.column = column;
  this.nextLevel = nextLevel;
  this.groupConverterPath = groupConverterPath;
  this.primitiveConverter = primitiveConverter;
  this.primitive = primitiveColumnIO.getType().asPrimitiveType().getPrimitiveTypeName();
  this.fieldPath = primitiveColumnIO.getFieldPath();
  this.primitiveField = fieldPath[fieldPath.length - 1];
  this.indexFieldPath = primitiveColumnIO.getIndexFieldPath();
  this.primitiveFieldIndex = indexFieldPath[indexFieldPath.length - 1];
}
 
Example #7
Source File: SynchronizingColumnReader.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
SynchronizingColumnReader(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter,
    ParsedVersion writerVersion, PrimitiveIterator.OfLong rowIndexes) {
  super(path, pageReader, converter, writerVersion);
  this.rowIndexes = rowIndexes;
  targetRow = Long.MIN_VALUE;
  consume();
}
 
Example #8
Source File: ColumnReadStoreImpl.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private PrimitiveConverter getPrimitiveConverter(ColumnDescriptor path) {
  Type currentType = schema;
  Converter currentConverter = recordConverter;
  for (String fieldName : path.getPath()) {
    final GroupType groupType = currentType.asGroupType();
    int fieldIndex = groupType.getFieldIndex(fieldName);
    currentType = groupType.getType(fieldName);
    currentConverter = currentConverter.asGroupConverter().getConverter(fieldIndex);
  }
  PrimitiveConverter converter = currentConverter.asPrimitiveConverter();
  return converter;
}
 
Example #9
Source File: ColumnReadStoreImpl.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Override
public ColumnReader getColumnReader(ColumnDescriptor path) {
  PrimitiveConverter converter = getPrimitiveConverter(path);
  PageReader pageReader = pageReadStore.getPageReader(path);
  Optional<PrimitiveIterator.OfLong> rowIndexes = pageReadStore.getRowIndexes();
  if (rowIndexes.isPresent()) {
    return new SynchronizingColumnReader(path, pageReader, converter, writerVersion, rowIndexes.get());
  } else {
    return new ColumnReaderImpl(path, pageReader, converter, writerVersion);
  }
}
 
Example #10
Source File: FilteringPrimitiveConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public FilteringPrimitiveConverter(PrimitiveConverter delegate, ValueInspector[] valueInspectors) {
  this.delegate = Objects.requireNonNull(delegate, "delegate cannot be null");
  this.valueInspectors = Objects.requireNonNull(valueInspectors, "valueInspectors cannot be null");
}
 
Example #11
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addLong(columnReader.getLong());
}
 
Example #12
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addInt(columnReader.getInteger());
}
 
Example #13
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addBoolean(columnReader.getBoolean());
}
 
Example #14
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addBinary(columnReader.getBinary());
}
 
Example #15
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addFloat(columnReader.getFloat());
}
 
Example #16
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addDouble(columnReader.getDouble());
}
 
Example #17
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addBinary(columnReader.getBinary());
}
 
Example #18
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
public void addValueToPrimitiveConverter(
    PrimitiveConverter primitiveConverter, ColumnReader columnReader) {
  primitiveConverter.addBinary(columnReader.getBinary());
}
 
Example #19
Source File: PrimitiveType.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
abstract public void addValueToPrimitiveConverter(
PrimitiveConverter primitiveConverter, ColumnReader columnReader);
 
Example #20
Source File: ColumnReadStoreImpl.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private ColumnReaderImpl newMemColumnReader(ColumnDescriptor path, PageReader pageReader) {
  PrimitiveConverter converter = getPrimitiveConverter(path);
  return new ColumnReaderImpl(path, pageReader, converter, writerVersion);
}
 
Example #21
Source File: ThriftRecordConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
public PrimitiveCounter(PrimitiveConverter delegate) {
  this.delegate = delegate;
}
 
Example #22
Source File: ColumnReaderImpl.java    From parquet-mr with Apache License 2.0 2 votes vote down vote up
/**
 * creates a reader for triplets
 * 
 * @param path
 *          the descriptor for the corresponding column
 * @param pageReader
 *          the underlying store to read from
 * @param converter
 *          a converter that materializes the values in this column in the current record
 * @param writerVersion
 *          writer version string from the Parquet file being read
 */
public ColumnReaderImpl(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter,
    ParsedVersion writerVersion) {
  super(path, pageReader, converter, writerVersion);
  consume();
}