Java Code Examples for org.apache.parquet.column.page.DictionaryPage

The following examples show how to use org.apache.parquet.column.page.DictionaryPage. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: Bats   Source File: PageReader.java    License: Apache License 2.0 6 votes vote down vote up
private void readDictionaryPage(final PageHeader pageHeader,
                                final ColumnReader<?> parentStatus) throws IOException {
  int compressedSize = pageHeader.getCompressed_page_size();
  int uncompressedSize = pageHeader.getUncompressed_page_size();

  final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize);
  allocatedDictionaryBuffers.add(dictionaryData);

  DictionaryPage page = new DictionaryPage(
      asBytesInput(dictionaryData, 0, uncompressedSize),
      pageHeader.uncompressed_page_size,
      pageHeader.dictionary_page_header.num_values,
      valueOf(pageHeader.dictionary_page_header.encoding.name()));

  this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
 
Example 2
Source Project: Bats   Source File: AsyncPageReader.java    License: Apache License 2.0 6 votes vote down vote up
private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus)
    throws UserException {
  try {
    pageHeader = readStatus.getPageHeader();
    int uncompressedSize = pageHeader.getUncompressed_page_size();
    final DrillBuf dictionaryData = getDecompressedPageData(readStatus);
    Stopwatch timer = Stopwatch.createStarted();
    allocatedDictionaryBuffers.add(dictionaryData);
    DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize),
        pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values,
        valueOf(pageHeader.dictionary_page_header.encoding.name()));
    this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
    long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS);
    stats.timeDictPageDecode.addAndGet(timeToDecode);
  } catch (Exception e) {
    handleAndThrowException(e, "Error decoding dictionary page.");
  }
}
 
Example 3
Source Project: parquet-mr   Source File: ColumnWriterBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Finalizes the Column chunk. Possibly adding extra pages if needed (dictionary, ...)
 * Is called right after writePage
 */
void finalizeColumnChunk() {
  final DictionaryPage dictionaryPage = dataColumn.toDictPageAndClose();
  if (dictionaryPage != null) {
    if (DEBUG)
      LOG.debug("write dictionary");
    try {
      pageWriter.writeDictionaryPage(dictionaryPage);
    } catch (IOException e) {
      throw new ParquetEncodingException("could not write dictionary page for " + path, e);
    }
    dataColumn.resetDictionary();
  }

  if (bloomFilterWriter != null && bloomFilter != null) {
    bloomFilterWriter.writeBloomFilter(bloomFilter);
  }
}
 
Example 4
Source Project: parquet-mr   Source File: ColumnReaderBase.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * creates a reader for triplets
 * @param path the descriptor for the corresponding column
 * @param pageReader the underlying store to read from
 * @param converter a converter that materializes the values in this column in the current record
 * @param writerVersion writer version string from the Parquet file being read
 */
ColumnReaderBase(ColumnDescriptor path, PageReader pageReader, PrimitiveConverter converter, ParsedVersion writerVersion) {
  this.path = Objects.requireNonNull(path, "path cannot be null");
  this.pageReader = Objects.requireNonNull(pageReader, "pageReader cannot be null");
  this.converter = Objects.requireNonNull(converter, "converter cannot be null");
  this.writerVersion = writerVersion;
  this.maxDefinitionLevel = path.getMaxDefinitionLevel();
  DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
  if (dictionaryPage != null) {
    try {
      this.dictionary = dictionaryPage.getEncoding().initDictionary(path, dictionaryPage);
      if (converter.hasDictionarySupport()) {
        converter.setDictionary(dictionary);
      }
    } catch (IOException e) {
      throw new ParquetDecodingException("could not decode the dictionary for " + path, e);
    }
  } else {
    this.dictionary = null;
  }
  this.totalValueCount = pageReader.getTotalValueCount();
  if (totalValueCount <= 0) {
    throw new ParquetDecodingException("totalValueCount '" + totalValueCount + "' <= 0");
  }
}
 
Example 5
Source Project: parquet-mr   Source File: ColumnChunkPageReadStore.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public DictionaryPage readDictionaryPage() {
  if (compressedDictionaryPage == null) {
    return null;
  }
  try {
    DictionaryPage decompressedPage = new DictionaryPage(
      decompressor.decompress(compressedDictionaryPage.getBytes(), compressedDictionaryPage.getUncompressedSize()),
      compressedDictionaryPage.getDictionarySize(),
      compressedDictionaryPage.getEncoding());
    if (compressedDictionaryPage.getCrc().isPresent()) {
      decompressedPage.setCrc(compressedDictionaryPage.getCrc().getAsInt());
    }
    return decompressedPage;
  } catch (IOException e) {
    throw new ParquetDecodingException("Could not decompress dictionary page", e);
  }
}
 
Example 6
Source Project: parquet-mr   Source File: ParquetFileReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Reads and decompresses a dictionary page for the given column chunk.
 *
 * Returns null if the given column chunk has no dictionary page.
 *
 * @param meta a column's ColumnChunkMetaData to read the dictionary from
 * @return an uncompressed DictionaryPage or null
 * @throws IOException if there is an error while reading the dictionary
 */
DictionaryPage readDictionary(ColumnChunkMetaData meta) throws IOException {
  if (!meta.getEncodings().contains(Encoding.PLAIN_DICTIONARY) &&
      !meta.getEncodings().contains(Encoding.RLE_DICTIONARY)) {
    return null;
  }

  // TODO: this should use getDictionaryPageOffset() but it isn't reliable.
  if (f.getPos() != meta.getStartingPos()) {
    f.seek(meta.getStartingPos());
  }

  PageHeader pageHeader = Util.readPageHeader(f);
  if (!pageHeader.isSetDictionary_page_header()) {
    return null; // TODO: should this complain?
  }

  DictionaryPage compressedPage = readCompressedDictionary(pageHeader, f);
  BytesInputDecompressor decompressor = options.getCodecFactory().getDecompressor(meta.getCodec());

  return new DictionaryPage(
      decompressor.decompress(compressedPage.getBytes(), compressedPage.getUncompressedSize()),
      compressedPage.getDictionarySize(),
      compressedPage.getEncoding());
}
 
Example 7
Source Project: parquet-mr   Source File: ParquetFileReader.java    License: Apache License 2.0 6 votes vote down vote up
private DictionaryPage readCompressedDictionary(
    PageHeader pageHeader, SeekableInputStream fin) throws IOException {
  DictionaryPageHeader dictHeader = pageHeader.getDictionary_page_header();

  int uncompressedPageSize = pageHeader.getUncompressed_page_size();
  int compressedPageSize = pageHeader.getCompressed_page_size();

  byte [] dictPageBytes = new byte[compressedPageSize];
  fin.readFully(dictPageBytes);

  BytesInput bin = BytesInput.from(dictPageBytes);

  return new DictionaryPage(
      bin, uncompressedPageSize, dictHeader.getNum_values(),
      converter.getEncoding(dictHeader.getEncoding()));
}
 
Example 8
Source Project: parquet-mr   Source File: FileEncodingsIT.java    License: Apache License 2.0 6 votes vote down vote up
public static void validatePages(Path file, List<?> expectedValues) throws IOException {
  List<PageReadStore> blockReaders = readBlocksFromFile(file);
  MessageType fileSchema = readSchemaFromFile(file);
  int rowGroupID = 0;
  int rowsRead = 0;
  for (PageReadStore pageReadStore : blockReaders) {
    for (ColumnDescriptor columnsDesc : fileSchema.getColumns()) {
      List<DataPage> pageGroup = getPageGroupForColumn(pageReadStore, columnsDesc);
      DictionaryPage dictPage = reusableCopy(getDictionaryPageForColumn(pageReadStore, columnsDesc));

      List<?> expectedRowGroupValues = expectedValues.subList(rowsRead, (int)(rowsRead + pageReadStore.getRowCount()));
      validateFirstToLast(rowGroupID, dictPage, pageGroup, columnsDesc, expectedRowGroupValues);
      validateLastToFirst(rowGroupID, dictPage, pageGroup, columnsDesc, expectedRowGroupValues);
    }

    rowsRead += pageReadStore.getRowCount();
    rowGroupID++;
  }
}
 
Example 9
Source Project: parquet-mr   Source File: ShowPagesCommand.java    License: Apache License 2.0 6 votes vote down vote up
private String printDictionaryPage(DictionaryPage dict) {
  // TODO: the compressed size of a dictionary page is lost in Parquet
  dict.getUncompressedSize();
  long totalSize = dict.getCompressedSize();
  int count = dict.getDictionarySize();
  float perValue = ((float) totalSize) / count;
  String enc = encodingAsString(dict.getEncoding(), true);
  if (pageNum == 0) {
    return String.format("%3d-D    %-5s %s %-2s %-7d %-10s %-10s",
        rowGroupNum, "dict", shortCodec, enc, count, humanReadable(perValue),
        humanReadable(totalSize));
  } else {
    return String.format("%3d-%-3d  %-5s %s %-2s %-7d %-10s %-10s",
        rowGroupNum, pageNum, "dict", shortCodec, enc, count, humanReadable(perValue),
        humanReadable(totalSize));
  }
}
 
Example 10
Source Project: Bats   Source File: ParquetColumnChunkPageWriteStore.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException {
  if (this.dictionaryPage != null) {
    throw new ParquetEncodingException("Only one dictionary page is allowed");
  }
  BytesInput dictionaryBytes = dictionaryPage.getBytes();
  int uncompressedSize = (int)dictionaryBytes.size();
  BytesInput compressedBytes = compressor.compress(dictionaryBytes);
  this.dictionaryPage = new DictionaryPage(BytesInput.copy(compressedBytes), uncompressedSize,
      dictionaryPage.getDictionarySize(), dictionaryPage.getEncoding());
}
 
Example 11
Source Project: presto   Source File: PrimitiveColumnWriter.java    License: Apache License 2.0 5 votes vote down vote up
private List<ParquetDataOutput> getDataStreams()
        throws IOException
{
    List<ParquetDataOutput> dictPage = new ArrayList<>();
    if (currentPageRows > 0) {
        flushCurrentPageToBuffer();
    }
    // write dict page if possible
    DictionaryPage dictionaryPage = primitiveValueWriter.toDictPageAndClose();
    if (dictionaryPage != null) {
        BytesInput pageBytes = copy(dictionaryPage.getBytes());
        long uncompressedSize = dictionaryPage.getUncompressedSize();

        ParquetDataOutput pageData = createDataOutput(pageBytes);
        if (compressor != null) {
            pageData = compressor.compress(pageBytes);
        }
        long compressedSize = pageData.size();

        ByteArrayOutputStream dictStream = new ByteArrayOutputStream();
        parquetMetadataConverter.writeDictionaryPageHeader(toIntExact(uncompressedSize),
                toIntExact(compressedSize),
                dictionaryPage.getDictionarySize(),
                dictionaryPage.getEncoding(),
                dictStream);
        ParquetDataOutput pageHeader = createDataOutput(Slices.wrappedBuffer(dictStream.toByteArray()));
        dictPage.add(pageHeader);
        dictPage.add(pageData);
        totalCompressedSize += pageHeader.size() + compressedSize;
        totalUnCompressedSize += pageHeader.size() + uncompressedSize;

        primitiveValueWriter.resetDictionary();
    }
    getDataStreamsCalled = true;

    return ImmutableList.<ParquetDataOutput>builder()
            .addAll(dictPage)
            .addAll(pageBuffer)
            .build();
}
 
Example 12
Source Project: iceberg   Source File: ParquetUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
  DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
  if (dictionaryPage != null) {
    try {
      return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
    } catch (IOException e) {
      throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
    }
  }
  return null;
}
 
Example 13
Source Project: iceberg   Source File: ColumnIterator.java    License: Apache License 2.0 5 votes vote down vote up
private static Dictionary readDictionary(ColumnDescriptor desc, PageReader pageSource) {
    DictionaryPage dictionaryPage = pageSource.readDictionaryPage();
    if (dictionaryPage != null) {
      try {
        return dictionaryPage.getEncoding().initDictionary(desc, dictionaryPage);
//        if (converter.hasDictionarySupport()) {
//          converter.setDictionary(dictionary);
//        }
      } catch (IOException e) {
        throw new ParquetDecodingException("could not decode the dictionary for " + desc, e);
      }
    }
    return null;
  }
 
Example 14
Source Project: dremio-oss   Source File: ColumnChunkIncReadStore.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage readDictionaryPage() {
  try {
    in.seek(lastPosition);
    final DictionaryPage dictionaryPage = super.readDictionaryPage();
    lastPosition = in.getPos();
    return dictionaryPage;
  } catch (IOException ioe) {
    throw new RuntimeException(ioe);
  }
}
 
Example 15
Source Project: dremio-oss   Source File: ColumnChunkIncReadStore.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage readDictionaryPage() {
  if (dictionaryPage == null) {
    PageHeader pageHeader = new PageHeader();
    long pos = 0;
    try {
      pos = in.getPos();
      pageHeader = Util.readPageHeader(in.asSeekableInputStream());
      if (pageHeader.getDictionary_page_header() == null) {
        in.seek(pos);
        return null;
      }
      dictionaryPage = readDictionaryPageHelper(pageHeader);
    } catch (Exception e) {
      throw new RuntimeException("Error reading dictionary page." +
        "\nFile path: " + path.toURI().getPath() +
        "\nRow count: " + rowCount +
        "\nColumn Chunk Metadata: " + metaData +
        "\nPage Header: " + pageHeader +
        "\nFile offset: " + fileOffset +
        "\nSize: " + size +
        "\nValue read so far: " + valueReadSoFar +
        "\nPosition: " + pos, e);
    }
  }
  return dictionaryPage;
}
 
Example 16
Source Project: dremio-oss   Source File: ColumnChunkIncReadStore.java    License: Apache License 2.0 5 votes vote down vote up
private DictionaryPage readDictionaryPageHelper(PageHeader pageHeader) throws IOException {
  ByteBuffer data = uncompressPage(pageHeader, false);
  return new DictionaryPage(
      BytesInput.from(data, 0, pageHeader.uncompressed_page_size),
      pageHeader.getDictionary_page_header().getNum_values(),
      parquetMetadataConverter.getEncoding(pageHeader.dictionary_page_header.encoding)
  );
}
 
Example 17
Source Project: dremio-oss   Source File: LocalDictionariesReader.java    License: Apache License 2.0 5 votes vote down vote up
public static Dictionary readDictionary(FSInputStream in, ColumnDescriptor column, PageHeaderWithOffset pageHeader, BytesInputDecompressor decompressor) throws IOException {
  in.setPosition(pageHeader.getOffset());
  final byte[] data = new byte[pageHeader.getPageHeader().getCompressed_page_size()];
  int read = in.read(data);
  if (read != data.length) {
    throw new IOException(format("Failed to read dictionary page, read %d bytes, expected %d", read, data.length));
  }
  final DictionaryPage dictionaryPage = new DictionaryPage(
    decompressor.decompress(BytesInput.from(data), pageHeader.getPageHeader().getUncompressed_page_size()),
    pageHeader.getPageHeader().getDictionary_page_header().getNum_values(),
    CONVERTER.getEncoding(pageHeader.getPageHeader().getDictionary_page_header().getEncoding()));
  return dictionaryPage.getEncoding().initDictionary(column, dictionaryPage);
}
 
Example 18
Source Project: dremio-oss   Source File: PageReader.java    License: Apache License 2.0 5 votes vote down vote up
private void readDictionaryPage(final PageHeader pageHeader,
                                final ColumnReader<?> parentStatus) throws IOException {
  int compressedSize = pageHeader.getCompressed_page_size();
  int uncompressedSize = pageHeader.getUncompressed_page_size();

  final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize);
  readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData);
  DictionaryPage page = new DictionaryPage(
      asBytesInput(dictionaryData, 0, uncompressedSize),
      pageHeader.uncompressed_page_size,
      pageHeader.dictionary_page_header.num_values,
      valueOf(pageHeader.dictionary_page_header.encoding.name()));

  this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
 
Example 19
Source Project: flink   Source File: AbstractColumnReader.java    License: Apache License 2.0 5 votes vote down vote up
public AbstractColumnReader(
		ColumnDescriptor descriptor,
		PageReader pageReader) throws IOException {
	this.descriptor = descriptor;
	this.pageReader = pageReader;
	this.maxDefLevel = descriptor.getMaxDefinitionLevel();

	DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
	if (dictionaryPage != null) {
		try {
			this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
			this.isCurrentPageDictionaryEncoded = true;
		} catch (IOException e) {
			throw new IOException("could not decode the dictionary for " + descriptor, e);
		}
	} else {
		this.dictionary = null;
		this.isCurrentPageDictionaryEncoded = false;
	}
	/*
	 * Total number of values in this column (in this row group).
	 */
	long totalValueCount = pageReader.getTotalValueCount();
	if (totalValueCount == 0) {
		throw new IOException("totalValueCount == 0");
	}
}
 
Example 20
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage toDictPageAndClose() {
  if (lastUsedDictionarySize > 0) {
    // return a dictionary only if we actually used it
    PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize, allocator);
    Iterator<Binary> binaryIterator = binaryDictionaryContent.keySet().iterator();
    // write only the part of the dict that we used
    for (int i = 0; i < lastUsedDictionarySize; i++) {
      Binary entry = binaryIterator.next();
      dictionaryEncoder.writeBytes(entry);
    }
    return dictPage(dictionaryEncoder);
  }
  return null;
}
 
Example 21
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage toDictPageAndClose() {
  if (lastUsedDictionarySize > 0) {
    // return a dictionary only if we actually used it
    FixedLenByteArrayPlainValuesWriter dictionaryEncoder = new FixedLenByteArrayPlainValuesWriter(length, lastUsedDictionaryByteSize, maxDictionaryByteSize, allocator);
    Iterator<Binary> binaryIterator = binaryDictionaryContent.keySet().iterator();
    // write only the part of the dict that we used
    for (int i = 0; i < lastUsedDictionarySize; i++) {
      Binary entry = binaryIterator.next();
      dictionaryEncoder.writeBytes(entry);
    }
    return dictPage(dictionaryEncoder);
  }
  return null;
}
 
Example 22
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage toDictPageAndClose() {
  if (lastUsedDictionarySize > 0) {
    // return a dictionary only if we actually used it
    PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize, allocator);
    LongIterator longIterator = longDictionaryContent.keySet().iterator();
    // write only the part of the dict that we used
    for (int i = 0; i < lastUsedDictionarySize; i++) {
      dictionaryEncoder.writeLong(longIterator.nextLong());
    }
    return dictPage(dictionaryEncoder);
  }
  return null;
}
 
Example 23
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage toDictPageAndClose() {
  if (lastUsedDictionarySize > 0) {
    // return a dictionary only if we actually used it
    PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize, allocator);
    DoubleIterator doubleIterator = doubleDictionaryContent.keySet().iterator();
    // write only the part of the dict that we used
    for (int i = 0; i < lastUsedDictionarySize; i++) {
      dictionaryEncoder.writeDouble(doubleIterator.nextDouble());
    }
    return dictPage(dictionaryEncoder);
  }
  return null;
}
 
Example 24
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage toDictPageAndClose() {
  if (lastUsedDictionarySize > 0) {
    // return a dictionary only if we actually used it
    PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize, allocator);
    it.unimi.dsi.fastutil.ints.IntIterator intIterator = intDictionaryContent.keySet().iterator();
    // write only the part of the dict that we used
    for (int i = 0; i < lastUsedDictionarySize; i++) {
      dictionaryEncoder.writeInteger(intIterator.nextInt());
    }
    return dictPage(dictionaryEncoder);
  }
  return null;
}
 
Example 25
Source Project: parquet-mr   Source File: DictionaryValuesWriter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public DictionaryPage toDictPageAndClose() {
  if (lastUsedDictionarySize > 0) {
    // return a dictionary only if we actually used it
    PlainValuesWriter dictionaryEncoder = new PlainValuesWriter(lastUsedDictionaryByteSize, maxDictionaryByteSize, allocator);
    FloatIterator floatIterator = floatDictionaryContent.keySet().iterator();
    // write only the part of the dict that we used
    for (int i = 0; i < lastUsedDictionarySize; i++) {
      dictionaryEncoder.writeFloat(floatIterator.nextFloat());
    }
    return dictPage(dictionaryEncoder);
  }
  return null;
}
 
Example 26
Source Project: parquet-mr   Source File: PlainValuesDictionary.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param dictionaryPage the PLAIN encoded content of the dictionary
 * @throws IOException if there is an exception while decoding the dictionary page
 */
protected PlainValuesDictionary(DictionaryPage dictionaryPage) throws IOException {
  super(dictionaryPage.getEncoding());
  if (dictionaryPage.getEncoding() != PLAIN_DICTIONARY
      && dictionaryPage.getEncoding() != PLAIN) {
    throw new ParquetDecodingException("Dictionary data encoding type not supported: " + dictionaryPage.getEncoding());
  }
}
 
Example 27
Source Project: parquet-mr   Source File: PlainValuesDictionary.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Decodes {@link Binary} values from a {@link DictionaryPage}.
 *
 * If the given {@code length} is null, the values will be read as length-
 * prefixed values with a 4-byte little-endian length. If length is not
 * null, it will be used as the length for all fixed-length {@code Binary}
 * values read from the page.
 *
 * @param dictionaryPage a {@code DictionaryPage} of encoded binary values
 * @param length a fixed length of binary arrays, or null if not fixed
 * @throws IOException if there is an exception while decoding the dictionary page
 */
public PlainBinaryDictionary(DictionaryPage dictionaryPage, Integer length) throws IOException {
  super(dictionaryPage);
  final ByteBuffer dictionaryBytes = dictionaryPage.getBytes().toByteBuffer();
  binaryDictionaryContent = new Binary[dictionaryPage.getDictionarySize()];
  // dictionary values are stored in order: size (4 bytes LE) followed by {size} bytes
  int offset = dictionaryBytes.position();
  if (length == null) {
    // dictionary values are stored in order: size (4 bytes LE) followed by {size} bytes
    for (int i = 0; i < binaryDictionaryContent.length; i++) {
      int len = readIntLittleEndian(dictionaryBytes, offset);
      // read the length
      offset += 4;
      // wrap the content in a binary
      binaryDictionaryContent[i] = Binary.fromConstantByteBuffer(dictionaryBytes, offset, len);
      // increment to the next value
      offset += len;
    }
  } else {
    // dictionary values are stored as fixed-length arrays
    Preconditions.checkArgument(length > 0,
        "Invalid byte array length: " + length);
    for (int i = 0; i < binaryDictionaryContent.length; i++) {
      // wrap the content in a Binary
      binaryDictionaryContent[i] = Binary.fromConstantByteBuffer(
          dictionaryBytes, offset, length);
      // increment to the next value
      offset += length;
    }
  }
}
 
Example 28
Source Project: parquet-mr   Source File: PlainValuesDictionary.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param dictionaryPage a dictionary page of encoded long values
 * @throws IOException if there is an exception while decoding the dictionary page
 */
public PlainLongDictionary(DictionaryPage dictionaryPage) throws IOException {
  super(dictionaryPage);
  ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream();
  longDictionaryContent = new long[dictionaryPage.getDictionarySize()];
  LongPlainValuesReader longReader = new LongPlainValuesReader();
  longReader.initFromPage(dictionaryPage.getDictionarySize(), in);
  for (int i = 0; i < longDictionaryContent.length; i++) {
    longDictionaryContent[i] = longReader.readLong();
  }
}
 
Example 29
Source Project: parquet-mr   Source File: PlainValuesDictionary.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param dictionaryPage a dictionary page of encoded double values
 * @throws IOException if there is an exception while decoding the dictionary page
 */
public PlainDoubleDictionary(DictionaryPage dictionaryPage) throws IOException {
  super(dictionaryPage);
  ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream();
  doubleDictionaryContent = new double[dictionaryPage.getDictionarySize()];
  DoublePlainValuesReader doubleReader = new DoublePlainValuesReader();
  doubleReader.initFromPage(dictionaryPage.getDictionarySize(), in);
  for (int i = 0; i < doubleDictionaryContent.length; i++) {
    doubleDictionaryContent[i] = doubleReader.readDouble();
  }
}
 
Example 30
Source Project: parquet-mr   Source File: PlainValuesDictionary.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * @param dictionaryPage a dictionary page of encoded integer values
 * @throws IOException if there is an exception while decoding the dictionary page
 */
public PlainIntegerDictionary(DictionaryPage dictionaryPage) throws IOException {
  super(dictionaryPage);
  ByteBufferInputStream in = dictionaryPage.getBytes().toInputStream();
  intDictionaryContent = new int[dictionaryPage.getDictionarySize()];
  IntegerPlainValuesReader intReader = new IntegerPlainValuesReader();
  intReader.initFromPage(dictionaryPage.getDictionarySize(), in);
  for (int i = 0; i < intDictionaryContent.length; i++) {
    intDictionaryContent[i] = intReader.readInteger();
  }
}