Java Code Examples for org.apache.parquet.column.page.PageReadStore#getPageReader()
The following examples show how to use
org.apache.parquet.column.page.PageReadStore#getPageReader() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetRecordReaderTest.java From dremio-oss with Apache License 2.0 | 5 votes |
private void validateContains(MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes) throws IOException { PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path)); DataPageV1 page = (DataPageV1) pageReader.readPage(); assertEquals(values, page.getValueCount()); assertArrayEquals(bytes.toByteArray(), page.getBytes().toByteArray()); }
Example 2
Source File: TestParquetFileWriter.java From parquet-mr with Apache License 2.0 | 5 votes |
private void validateV2Page(MessageType schema, PageReadStore pages, String[] path, int values, int rows, int nullCount, byte[] repetition, byte[] definition, byte[] data, int uncompressedSize) throws IOException { PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path)); DataPageV2 page = (DataPageV2)pageReader.readPage(); assertEquals(values, page.getValueCount()); assertEquals(rows, page.getRowCount()); assertEquals(nullCount, page.getNullCount()); assertEquals(uncompressedSize, page.getUncompressedSize()); assertArrayEquals(repetition, page.getRepetitionLevels().toByteArray()); assertArrayEquals(definition, page.getDefinitionLevels().toByteArray()); assertArrayEquals(data, page.getData().toByteArray()); }
Example 3
Source File: FileEncodingsIT.java From parquet-mr with Apache License 2.0 | 5 votes |
private static List<DataPage> getPageGroupForColumn(PageReadStore pageReadStore, ColumnDescriptor columnDescriptor) { PageReader pageReader = pageReadStore.getPageReader(columnDescriptor); List<DataPage> pageGroup = new ArrayList<DataPage>(); DataPage page; while ((page = pageReader.readPage()) != null) { pageGroup.add(reusableCopy(page)); } return pageGroup; }
Example 4
Source File: TestStatistics.java From parquet-mr with Apache License 2.0 | 5 votes |
public void validate(MessageType schema, PageReadStore store) { for (ColumnDescriptor desc : schema.getColumns()) { PageReader reader = store.getPageReader(desc); DictionaryPage dict = reader.readDictionaryPage(); DataPage page; while ((page = reader.readPage()) != null) { validateStatsForPage(page, dict, desc); } } }
Example 5
Source File: TestColumnChunkPageWriteStore.java From parquet-mr with Apache License 2.0 | 4 votes |
@Test public void test() throws Exception { Path file = new Path("target/test/TestColumnChunkPageWriteStore/test.parquet"); Path root = file.getParent(); FileSystem fs = file.getFileSystem(conf); if (fs.exists(root)) { fs.delete(root, true); } fs.mkdirs(root); MessageType schema = MessageTypeParser.parseMessageType("message test { repeated binary bar; }"); ColumnDescriptor col = schema.getColumns().get(0); Encoding dataEncoding = PLAIN; int valueCount = 10; int d = 1; int r = 2; int v = 3; BytesInput definitionLevels = BytesInput.fromInt(d); BytesInput repetitionLevels = BytesInput.fromInt(r); Statistics<?> statistics = Statistics.getBuilderForReading(Types.required(PrimitiveTypeName.BINARY).named("test_binary")) .build(); BytesInput data = BytesInput.fromInt(v); int rowCount = 5; int nullCount = 1; statistics.incrementNumNulls(nullCount); statistics.setMinMaxFromBytes(new byte[] {0, 1, 2}, new byte[] {0, 1, 2, 3}); long pageOffset; long pageSize; { OutputFileForTesting outputFile = new OutputFileForTesting(file, conf); ParquetFileWriter writer = new ParquetFileWriter(outputFile, schema, Mode.CREATE, ParquetWriter.DEFAULT_BLOCK_SIZE, ParquetWriter.MAX_PADDING_SIZE_DEFAULT); writer.start(); writer.startBlock(rowCount); pageOffset = outputFile.out().getPos(); { ColumnChunkPageWriteStore store = new ColumnChunkPageWriteStore(compressor(GZIP), schema, new HeapByteBufferAllocator(), Integer.MAX_VALUE); PageWriter pageWriter = store.getPageWriter(col); pageWriter.writePageV2( rowCount, nullCount, valueCount, repetitionLevels, definitionLevels, dataEncoding, data, statistics); store.flushToFileWriter(writer); pageSize = outputFile.out().getPos() - pageOffset; } writer.endBlock(); writer.end(new HashMap<String, String>()); } { ParquetMetadata footer = ParquetFileReader.readFooter(conf, file, NO_FILTER); ParquetFileReader reader = new ParquetFileReader( conf, footer.getFileMetaData(), file, footer.getBlocks(), schema.getColumns()); PageReadStore rowGroup = reader.readNextRowGroup(); PageReader pageReader = rowGroup.getPageReader(col); DataPageV2 page = (DataPageV2)pageReader.readPage(); assertEquals(rowCount, page.getRowCount()); assertEquals(nullCount, page.getNullCount()); assertEquals(valueCount, page.getValueCount()); assertEquals(d, intValue(page.getDefinitionLevels())); assertEquals(r, intValue(page.getRepetitionLevels())); assertEquals(dataEncoding, page.getDataEncoding()); assertEquals(v, intValue(page.getData())); // Checking column/offset indexes for the one page ColumnChunkMetaData column = footer.getBlocks().get(0).getColumns().get(0); ColumnIndex columnIndex = reader.readColumnIndex(column); assertArrayEquals(statistics.getMinBytes(), columnIndex.getMinValues().get(0).array()); assertArrayEquals(statistics.getMaxBytes(), columnIndex.getMaxValues().get(0).array()); assertEquals(statistics.getNumNulls(), columnIndex.getNullCounts().get(0).longValue()); assertFalse(columnIndex.getNullPages().get(0)); OffsetIndex offsetIndex = reader.readOffsetIndex(column); assertEquals(1, offsetIndex.getPageCount()); assertEquals(pageSize, offsetIndex.getCompressedPageSize(0)); assertEquals(0, offsetIndex.getFirstRowIndex(0)); assertEquals(pageOffset, offsetIndex.getOffset(0)); reader.close(); } }
Example 6
Source File: TestParquetFileWriter.java From parquet-mr with Apache License 2.0 | 4 votes |
private void validateContains(MessageType schema, PageReadStore pages, String[] path, int values, BytesInput bytes) throws IOException { PageReader pageReader = pages.getPageReader(schema.getColumnDescription(path)); DataPage page = pageReader.readPage(); assertEquals(values, page.getValueCount()); assertArrayEquals(bytes.toByteArray(), ((DataPageV1)page).getBytes().toByteArray()); }
Example 7
Source File: FileEncodingsIT.java From parquet-mr with Apache License 2.0 | 4 votes |
private static DictionaryPage getDictionaryPageForColumn(PageReadStore pageReadStore, ColumnDescriptor columnDescriptor) { PageReader pageReader = pageReadStore.getPageReader(columnDescriptor); return pageReader.readDictionaryPage(); }