Java Code Examples for org.apache.parquet.column.page.PageReader#readDictionaryPage()

The following examples show how to use org.apache.parquet.column.page.PageReader#readDictionaryPage() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ColumnReaderBase.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
/**
 * creates a reader for triplets
 * @param path the descriptor for the corresponding column
 * @param pageReader the underlying store to read from
 * @param converter a converter that materializes the values in this column in the current record
 * @param writerVersion writer version string from the Parquet file being read
 */
ColumnReaderBase(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
  this.path = Objects.requireNonNull(path, "path cannot be null");
  this.pageReader = Objects.requireNonNull(pageReader, "pageReader cannot be null");
  this.converter = Objects.requireNonNull(converter, "converter cannot be null");
  this.writerVersion = writerVersion;
  this.maxDefinitionLevel = path.getMaxDefinitionLevel();
  DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
  if (dictionaryPage != null) {
    try {
      this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
      if (converter.hasDictionarySupport()) {
        converter.setDictionary(dictionary);
      }
    } catch (IOException e) {
      throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
    }
  } else {
    this.dictionary = null;
  }
  this.totalValueCount = pageReader.getTotalValueCount();
  if (totalValueCount <= 0) {
    throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
  }
}
 
Example 2
Source File: ParquetUtil.java    From iceberg with Apache License 2.0 5 votes vote down vote up
public static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
  DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
  if (dictionaryPage != null) {
    try {
      return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
    } catch (IOException e) {
      throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
    }
  }
  return null;
}
 
Example 3
Source File: ColumnIterator.java    From iceberg with Apache License 2.0 5 votes vote down vote up
private static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
    DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
    if (dictionaryPage != null) {
      try {
        return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
//        if (converter.hasDictionarySupport()) {
//          converter.setDictionary(dictionary);
//        }
      } catch (IOException e) {
        throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
      }
    }
    return null;
  }
 
Example 4
Source File: AbstractColumnReader.java    From flink with Apache License 2.0 5 votes vote down vote up
public AbstractColumnReader(
		ColumnDescriptor descriptor,
		PageReader pageReader) throws IOException {
	this.descriptor = descriptor;
	this.pageReader = pageReader;
	this.maxDefLevel = descriptor.getMaxDefinitionLevel();

	DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
	if (dictionaryPage != null) {
		try {
			this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
			this.isCurrentPageDictionaryEncoded = true;
		} catch (IOException e) {
			throw new IOException("could not decode the dictionary for " + descriptor, e);
		}
	} else {
		this.dictionary = null;
		this.isCurrentPageDictionaryEncoded = false;
	}
	/*
	 * Total number of values in this column (in this row group).
	 */
	long totalValueCount = pageReader.getTotalValueCount();
	if (totalValueCount == 0) {
		throw new IOException("totalValueCount == 0");
	}
}
 
Example 5
Source File: TestStatistics.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public void validate(MessageType schema, PageReadStore store) {
  for (ColumnDescriptor desc : schema.getColumns()) {
    PageReader reader = store.getPageReader(desc);
    DictionaryPage dict = reader.readDictionaryPage();
    DataPage page;
    while ((page = reader.readPage()) != null) {
      validateStatsForPage(page, dict, desc);
    }
  }
}
 
Example 6
Source File: FileEncodingsIT.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
private static DictionaryPage getDictionaryPageForColumn(PageReadStore pageReadStore, ColumnDescriptor columnDescriptor) {
  PageReader pageReader = pageReadStore.getPageReader(columnDescriptor);
  return pageReader.readDictionaryPage();
}