Java Code Examples for org.apache.parquet.column.ColumnDescriptor

The following examples show how to use org.apache.parquet.column.ColumnDescriptor. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: parquet-mr   Source File: ColumnWriteStoreBase.java    License: Apache License 2.0 6 votes vote down vote up
@Deprecated
ColumnWriteStoreBase(
    final PageWriteStore pageWriteStore,
    final ParquetProperties props) {
  this.props = props;
  this.thresholdTolerance = (long) (props.getPageSizeThreshold() * THRESHOLD_TOLERANCE_RATIO);

  this.columns = new TreeMap<>();

  this.rowCountForNextSizeCheck = min(props.getMinRowCountForPageSizeCheck(), props.getPageRowCountLimit());

  columnWriterProvider = new ColumnWriterProvider() {
    @Override
    public ColumnWriter getColumnWriter(ColumnDescriptor path) {
      ColumnWriterBase column = columns.get(path);
      if (column == null) {
        column = createColumnWriter(path, pageWriteStore.getPageWriter(path), null, props);
        columns.put(path, column);
      }
      return column;
    }
  };
}
 
Example 2
Source Project: parquet-mr   Source File: ParquetFileReader.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * @param conf the Hadoop Configuration
 * @param file Path to a parquet file
 * @param footer a {@link ParquetMetadata} footer already read from the file
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer) throws IOException {
  this.converter = new ParquetMetadataConverter(conf);
  this.file = HadoopInputFile.fromPath(file, conf);
  this.f = this.file.newStream();
  this.options = HadoopReadOptions.builder(conf).build();
  this.footer = footer;
  this.fileMetaData = footer.getFileMetaData();
  this.blocks = filterRowGroups(footer.getBlocks());
  this.blockIndexStores = listWithNulls(this.blocks.size());
  this.blockRowRanges = listWithNulls(this.blocks.size());
  for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
    paths.put(ColumnPath.get(col.getPath()), col);
  }
  this.crc = options.usePageChecksumVerification() ? new CRC32() : null;
}
 
Example 3
Source Project: parquet-mr   Source File: Util.java    License: Apache License 2.0 5 votes vote down vote up
public static String encodingsAsString(Set<Encoding> encodings, ColumnDescriptor desc) {
  StringBuilder sb = new StringBuilder();
  if (encodings.contains(RLE) || encodings.contains(BIT_PACKED)) {
    sb.append(desc.getMaxDefinitionLevel() == 0 ? "B" : "R");
    sb.append(desc.getMaxRepetitionLevel() == 0 ? "B" : "R");
    if (encodings.contains(PLAIN_DICTIONARY)) {
      sb.append("R");
    }
    if (encodings.contains(PLAIN)) {
      sb.append("_");
    }
  } else {
    sb.append("RR");
    if (encodings.contains(RLE_DICTIONARY)) {
      sb.append("R");
    }
    if (encodings.contains(PLAIN)) {
      sb.append("_");
    }
    if (encodings.contains(DELTA_BYTE_ARRAY) ||
        encodings.contains(DELTA_BINARY_PACKED) ||
        encodings.contains(DELTA_LENGTH_BYTE_ARRAY)) {
      sb.append("D");
    }
  }
  return sb.toString();
}
 
Example 4
Source Project: parquet-mr   Source File: DefaultV2ValuesWriterFactory.java    License: Apache License 2.0 5 votes vote down vote up
private ValuesWriter getDoubleValuesWriter(ColumnDescriptor path) {
  ValuesWriter fallbackWriter = null;
  if (this.parquetProperties.isByteStreamSplitEnabled()) {
    fallbackWriter = new ByteStreamSplitValuesWriter.DoubleByteStreamSplitValuesWriter(parquetProperties.getInitialSlabSize(), parquetProperties.getPageSizeThreshold(), parquetProperties.getAllocator());
  } else {
    fallbackWriter = new PlainValuesWriter(parquetProperties.getInitialSlabSize(), parquetProperties.getPageSizeThreshold(), parquetProperties.getAllocator());
  }
  return DefaultValuesWriterFactory.dictWriterWithFallBack(path, parquetProperties, getEncodingForDictionaryPage(), getEncodingForDataPage(), fallbackWriter);
}
 
Example 5
Source Project: parquet-mr   Source File: FileEncodingsIT.java    License: Apache License 2.0 5 votes vote down vote up
private static void validateFirstToLast(int rowGroupID, DictionaryPage dictPage, List<DataPage> pageGroup, ColumnDescriptor desc, List<?> expectedValues) {
  int rowsRead = 0, pageID = 0;
  for (DataPage page : pageGroup) {
    List<?> expectedPageValues = expectedValues.subList(rowsRead, rowsRead + page.getValueCount());
    PageValuesValidator.validateValuesForPage(rowGroupID, pageID, dictPage, page, desc, expectedPageValues);
    rowsRead += page.getValueCount();
    pageID++;
  }
}
 
Example 6
Source Project: parquet-mr   Source File: MetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (showOriginalTypes) {
    OriginalType otype;
    try {
      otype = type.getOriginalType();
    } catch (Exception e) {
      otype = null;
    }
    if (otype != null) out.format(" O:%s", otype);
  } else {
    LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
    if (ltype != null) out.format(" L:%s", ltype);
  }

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 7
Source Project: presto   Source File: TestTupleDomainParquetPredicate.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testVarcharMatchesWithDictionaryDescriptor()
{
    ColumnDescriptor columnDescriptor = new ColumnDescriptor(new String[] {"path"}, BINARY, 0, 0);
    RichColumnDescriptor column = new RichColumnDescriptor(columnDescriptor, new PrimitiveType(OPTIONAL, BINARY, "Test column"));
    TupleDomain<ColumnDescriptor> effectivePredicate = getEffectivePredicate(column, createVarcharType(255), EMPTY_SLICE);
    TupleDomainParquetPredicate parquetPredicate = new TupleDomainParquetPredicate(effectivePredicate, singletonList(column));
    DictionaryPage page = new DictionaryPage(Slices.wrappedBuffer(new byte[] {0, 0, 0, 0}), 1, PLAIN_DICTIONARY);
    assertTrue(parquetPredicate.matches(new DictionaryDescriptor(column, Optional.of(page))));
}
 
Example 8
Source Project: Bats   Source File: ReadState.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create the readers needed to read columns: fixed-length or variable length.
 *
 * @param reader
 * @param output
 * @throws Exception
 */

@SuppressWarnings("unchecked")
public void buildReader(ParquetRecordReader reader, OutputMutator output) throws Exception {
  final ArrayList<VarLengthColumn<? extends ValueVector>> varLengthColumns = new ArrayList<>();
  // initialize all of the column read status objects
  BlockMetaData rowGroupMetadata = schema.getRowGroupMetadata();
  Map<String, Integer> columnChunkMetadataPositionsInList = schema.buildChunkMap(rowGroupMetadata);
  for (ParquetColumnMetadata columnMetadata : schema.getColumnMetadata()) {
    ColumnDescriptor column = columnMetadata.column;
    columnMetadata.columnChunkMetaData = rowGroupMetadata.getColumns().get(
                    columnChunkMetadataPositionsInList.get(Arrays.toString(column.getPath())));
    columnMetadata.buildVector(output);
    if (! columnMetadata.isFixedLength( )) {
      // create a reader and add it to the appropriate list
      varLengthColumns.add(columnMetadata.makeVariableWidthReader(reader));
    } else if (columnMetadata.isRepeated()) {
      varLengthColumns.add(columnMetadata.makeRepeatedFixedWidthReader(reader));
    }
    else {
      fixedLenColumnReaders.add(columnMetadata.makeFixedWidthReader(reader));
    }
  }
  varLengthReader = new VarLenBinaryReader(reader, varLengthColumns);
  if (! schema.isStarQuery()) {
    schema.createNonExistentColumns(output, nullFilledVectors);
  }
}
 
Example 9
Source Project: parquet-mr   Source File: ColumnChunkPageWriteStore.java    License: Apache License 2.0 5 votes vote down vote up
public ColumnChunkPageWriteStore(BytesCompressor compressor, MessageType schema, ByteBufferAllocator allocator,
    int columnIndexTruncateLength, boolean pageWriteChecksumEnabled) {
  this.schema = schema;
  for (ColumnDescriptor path : schema.getColumns()) {
    writers.put(path, new ColumnChunkPageWriter(path, compressor, allocator, columnIndexTruncateLength, pageWriteChecksumEnabled));
  }
}
 
Example 10
Source Project: parquet-mr   Source File: TestColumnReaderImpl.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testOptional() throws Exception {
  MessageType schema = MessageTypeParser.parseMessageType("message test { optional binary foo; }");
  ColumnDescriptor col = schema.getColumns().get(0);
  MemPageWriter pageWriter = new MemPageWriter();
  ColumnWriterV2 columnWriterV2 = new ColumnWriterV2(col, pageWriter,
      ParquetProperties.builder()
          .withDictionaryPageSize(1024).withWriterVersion(PARQUET_2_0)
          .withPageSize(2048).build());
  for (int i = 0; i < rows; i++) {
    columnWriterV2.writeNull(0, 0);
    if ((i + 1) % 1000 == 0) {
      columnWriterV2.writePage();
    }
  }
  columnWriterV2.writePage();
  columnWriterV2.finalizeColumnChunk();
  List<DataPage> pages = pageWriter.getPages();
  int valueCount = 0;
  int rowCount = 0;
  for (DataPage dataPage : pages) {
    valueCount += dataPage.getValueCount();
    rowCount += ((DataPageV2)dataPage).getRowCount();
  }
  assertEquals(rows, rowCount);
  assertEquals(rows, valueCount);
  MemPageReader pageReader = new MemPageReader(rows, pages.iterator(), pageWriter.getDictionaryPage());
  ValidatingConverter converter = new ValidatingConverter();
  ColumnReader columnReader = new ColumnReaderImpl(col, pageReader, converter, VersionParser.parse(Version.FULL_VERSION));
  for (int i = 0; i < rows; i++) {
    assertEquals(0, columnReader.getCurrentRepetitionLevel());
    assertEquals(0, columnReader.getCurrentDefinitionLevel());
    columnReader.consume();
  }
  assertEquals(0, converter.count);
}
 
Example 11
Source Project: presto   Source File: ParquetReader.java    License: Apache License 2.0 5 votes vote down vote up
private ColumnChunkMetaData getColumnChunkMetaData(BlockMetaData blockMetaData, ColumnDescriptor columnDescriptor)
        throws IOException
{
    for (ColumnChunkMetaData metadata : blockMetaData.getColumns()) {
        if (metadata.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
            return metadata;
        }
    }
    throw new ParquetCorruptionException("Metadata is missing for column: %s", columnDescriptor);
}
 
Example 12
public IncrementallyUpdatedFilterPredicateBuilderBase(List<PrimitiveColumnIO> leaves) {
  for (PrimitiveColumnIO leaf : leaves) {
    ColumnDescriptor descriptor = leaf.getColumnDescriptor();
    ColumnPath path = ColumnPath.get(descriptor.getPath());
    PrimitiveComparator<?> comparator = descriptor.getPrimitiveType().comparator();
    comparatorsByColumn.put(path, comparator);
  }
}
 
Example 13
Source Project: parquet-mr   Source File: TestStatistics.java    License: Apache License 2.0 5 votes vote down vote up
public void validate(MessageType schema, PageReadStore store) {
  for (ColumnDescriptor desc : schema.getColumns()) {
    PageReader reader = store.getPageReader(desc);
    DictionaryPage dict = reader.readDictionaryPage();
    DataPage page;
    while ((page = reader.readPage()) != null) {
      validateStatsForPage(page, dict, desc);
    }
  }
}
 
Example 14
Source Project: parquet-mr   Source File: MetadataUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static void showDetails(PrettyPrintWriter out, ColumnDescriptor desc) {
  String path = Joiner.on(".").skipNulls().join(desc.getPath());
  PrimitiveTypeName type = desc.getType();
  int defl = desc.getMaxDefinitionLevel();
  int repl = desc.getMaxRepetitionLevel();

  out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl);
}
 
Example 15
Source Project: Bats   Source File: ColumnChunkIncReadStore.java    License: Apache License 2.0 5 votes vote down vote up
public void addColumn(ColumnDescriptor descriptor, ColumnChunkMetaData metaData) throws IOException {
  FSDataInputStream in = fs.open(path);
  streams.add(in);
  in.seek(metaData.getStartingPos());
  ColumnChunkIncPageReader reader = new ColumnChunkIncPageReader(metaData, descriptor, in);

  columns.put(descriptor, reader);
}
 
Example 16
Source Project: dremio-oss   Source File: LocalDictionariesReader.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Return dictionary per row group for all binary columns in given parquet file.
 * @param fs filesystem object.
 * @param filePath parquet file to scan
 * @return pair of dictionaries found for binary fields and list of binary fields which are not dictionary encoded.
 * @throws IOException
 */
public static Pair<Map<ColumnDescriptor, Dictionary>, Set<ColumnDescriptor>> readDictionaries(FileSystem fs, Path filePath, CompressionCodecFactory codecFactory) throws IOException {
  // Passing the max footer length is not required in this case as the parquet reader would already have failed.
  final ParquetMetadata parquetMetadata = SingletonParquetFooterCache.readFooter(fs, filePath, ParquetMetadataConverter.NO_FILTER,
    ExecConstants.PARQUET_MAX_FOOTER_LEN_VALIDATOR.getDefault().getNumVal());
  if (parquetMetadata.getBlocks().size() > 1) {
    throw new IOException(
      format("Global dictionaries can only be built on a parquet file with a single row group, found %d row groups for file %s",
        parquetMetadata.getBlocks().size(), filePath));
  }
  final BlockMetaData rowGroupMetadata = parquetMetadata.getBlocks().get(0);
  final Map<ColumnPath, ColumnDescriptor> columnDescriptorMap = Maps.newHashMap();

  for (ColumnDescriptor columnDescriptor : parquetMetadata.getFileMetaData().getSchema().getColumns()) {
    columnDescriptorMap.put(ColumnPath.get(columnDescriptor.getPath()), columnDescriptor);
  }

  final Set<ColumnDescriptor> columnsToSkip = Sets.newHashSet(); // columns which are found in parquet file but are not dictionary encoded
  final Map<ColumnDescriptor, Dictionary> dictionaries = Maps.newHashMap();
  try(final FSInputStream in = fs.open(filePath)) {
    for (ColumnChunkMetaData columnChunkMetaData : rowGroupMetadata.getColumns()) {
      if (isBinaryType(columnChunkMetaData.getType())) {
        final ColumnDescriptor column = columnDescriptorMap.get(columnChunkMetaData.getPath());
        // if first page is dictionary encoded then load dictionary, otherwise skip this column.
        final PageHeaderWithOffset pageHeader = columnChunkMetaData.getPageHeaders().get(0);
        if (PageType.DICTIONARY_PAGE == pageHeader.getPageHeader().getType()) {
          dictionaries.put(column, readDictionary(in, column, pageHeader, codecFactory.getDecompressor(columnChunkMetaData.getCodec())));
        } else {
          columnsToSkip.add(column);
        }
      }
    }
  }
  return new ImmutablePair<>(dictionaries, columnsToSkip);
}
 
Example 17
Source Project: dremio-oss   Source File: VarLengthValuesColumn.java    License: Apache License 2.0 5 votes vote down vote up
VarLengthValuesColumn(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                      ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v,
                      SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
  variableWidthVector = (VariableWidthVector) valueVec;
  if (columnChunkMetaData.getEncodings().contains(Encoding.PLAIN_DICTIONARY)) {
    usingDictionary = true;
  }
  else {
    usingDictionary = false;
  }
}
 
Example 18
Source Project: dremio-oss   Source File: ColumnReaderFactory.java    License: Apache License 2.0 5 votes vote down vote up
static VarLengthValuesColumn<?> getReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                                          ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, ValueVector v,
                                          SchemaElement schemaElement
) throws ExecutionSetupException {
  ConvertedType convertedType = schemaElement.getConverted_type();
  switch (descriptor.getMaxDefinitionLevel()) {
    case 0:
      if (convertedType == null) {
        return new VarLengthColumnReaders.VarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarBinaryVector) v, schemaElement);
      }
      switch (convertedType) {
        case UTF8:
          return new VarLengthColumnReaders.VarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarCharVector) v, schemaElement);
        case DECIMAL:
          return new VarLengthColumnReaders.Decimal28Column(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (DecimalVector) v, schemaElement);
        default:
          return new VarLengthColumnReaders.VarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarBinaryVector) v, schemaElement);
      }
    default:
      if (convertedType == null) {
        return new VarLengthColumnReaders.NullableVarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarBinaryVector) v, schemaElement);
      }

      switch (convertedType) {
        case UTF8:
          return new VarLengthColumnReaders.NullableVarCharColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarCharVector) v, schemaElement);
        case DECIMAL:
          return new NullableDecimalColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (DecimalVector) v, schemaElement);
        default:
          return new VarLengthColumnReaders.NullableVarBinaryColumn(parentReader, allocateSize, descriptor, columnChunkMetaData, fixedLength, (VarBinaryVector) v, schemaElement);
      }
  }
}
 
Example 19
Source Project: dremio-oss   Source File: GlobalDictionaryBuilder.java    License: Apache License 2.0 5 votes vote down vote up
private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
  final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
  final VectorContainer input = new VectorContainer(bufferAllocator);
  final BigIntVector longVector = input.addOrGet(field);
  longVector.allocateNew();
  SortedSet<Long> values = Sets.newTreeSet();
  for (Dictionary dictionary : dictionaries) {
    for (int i = 0; i <= dictionary.getMaxId(); ++i) {
      values.add(dictionary.decodeToLong(i));
    }
  }
  if (existingDict != null) {
    final BigIntVector existingDictValues = existingDict.getValueAccessorById(BigIntVector.class, 0).getValueVector();
    for (int i = 0; i < existingDict.getRecordCount(); ++i) {
      values.add(existingDictValues.get(i));
    }
  }
  final Iterator<Long> iter = values.iterator();
  int recordCount = 0;
  while (iter.hasNext()) {
    longVector.setSafe(recordCount++, iter.next());
  }
  longVector.setValueCount(recordCount);
  input.setRecordCount(recordCount);
  input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
  return input;
}
 
Example 20
Source Project: presto   Source File: PredicateUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static boolean predicateMatches(Predicate parquetPredicate, BlockMetaData block, ParquetDataSource dataSource, Map<List<String>, RichColumnDescriptor> descriptorsByPath, TupleDomain<ColumnDescriptor> parquetTupleDomain, boolean failOnCorruptedParquetStatistics)
        throws ParquetCorruptionException
{
    Map<ColumnDescriptor, Statistics<?>> columnStatistics = getStatistics(block, descriptorsByPath);
    if (!parquetPredicate.matches(block.getRowCount(), columnStatistics, dataSource.getId(), failOnCorruptedParquetStatistics)) {
        return false;
    }

    return dictionaryPredicatesMatch(parquetPredicate, block, dataSource, descriptorsByPath, parquetTupleDomain);
}
 
Example 21
Source Project: parquet-mr   Source File: DictionaryFilter.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
  ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
  DictionaryPage page = dictionaries.readDictionaryPage(col);

  // the chunk may not be dictionary-encoded
  if (page == null) {
    return null;
  }

  Dictionary dict = page.getEncoding().initDictionary(col, page);

  IntFunction<Object> dictValueProvider;
  PrimitiveTypeName type = meta.getPrimitiveType().getPrimitiveTypeName();
  switch (type) {
  case FIXED_LEN_BYTE_ARRAY: // Same as BINARY
  case BINARY:
    dictValueProvider = dict::decodeToBinary;
    break;
  case INT32:
    dictValueProvider = dict::decodeToInt;
    break;
  case INT64:
    dictValueProvider = dict::decodeToLong;
    break;
  case FLOAT:
    dictValueProvider = dict::decodeToFloat;
    break;
  case DOUBLE:
    dictValueProvider = dict::decodeToDouble;
    break;
  default:
    LOG.warn("Unsupported dictionary type: {}", type);
    return null;
  }

  Set<T> dictSet = new HashSet<>();
  for (int i = 0; i <= dict.getMaxId(); i++) {
    dictSet.add((T) dictValueProvider.apply(i));
  }
  
  return dictSet;
}
 
Example 22
Source Project: iceberg   Source File: ArrowVectorAccessors.java    License: Apache License 2.0 4 votes vote down vote up
@NotNull
private static ArrowVectorAccessor getDictionaryVectorAccessor(
    Dictionary dictionary,
    ColumnDescriptor desc,
    FieldVector vector, PrimitiveType primitive) {
  Preconditions.checkState(vector instanceof IntVector, "Dictionary ids should be stored in IntVectors only");
  if (primitive.getOriginalType() != null) {
    switch (desc.getPrimitiveType().getOriginalType()) {
      case ENUM:
      case JSON:
      case UTF8:
      case BSON:
        return new DictionaryStringAccessor((IntVector) vector, dictionary);
      case INT_64:
      case TIMESTAMP_MILLIS:
      case TIMESTAMP_MICROS:
        return new DictionaryLongAccessor((IntVector) vector, dictionary);
      case DECIMAL:
        switch (primitive.getPrimitiveTypeName()) {
          case BINARY:
          case FIXED_LEN_BYTE_ARRAY:
            return new DictionaryDecimalBinaryAccessor(
                (IntVector) vector,
                dictionary);
          case INT64:
            return new DictionaryDecimalLongAccessor(
                (IntVector) vector,
                dictionary);
          case INT32:
            return new DictionaryDecimalIntAccessor(
                (IntVector) vector,
                dictionary);
          default:
            throw new UnsupportedOperationException(
                "Unsupported base type for decimal: " + primitive.getPrimitiveTypeName());
        }
      default:
        throw new UnsupportedOperationException(
            "Unsupported logical type: " + primitive.getOriginalType());
    }
  } else {
    switch (primitive.getPrimitiveTypeName()) {
      case FIXED_LEN_BYTE_ARRAY:
      case BINARY:
        return new DictionaryBinaryAccessor((IntVector) vector, dictionary);
      case FLOAT:
        return new DictionaryFloatAccessor((IntVector) vector, dictionary);
      case INT64:
        return new DictionaryLongAccessor((IntVector) vector, dictionary);
      case DOUBLE:
        return new DictionaryDoubleAccessor((IntVector) vector, dictionary);
      default:
        throw new UnsupportedOperationException("Unsupported type: " + primitive);
    }
  }
}
 
Example 23
Source Project: flink   Source File: BytesColumnReader.java    License: Apache License 2.0 4 votes vote down vote up
public BytesColumnReader(
		ColumnDescriptor descriptor,
		PageReader pageReader) throws IOException {
	super(descriptor, pageReader);
	checkTypeName(PrimitiveType.PrimitiveTypeName.BINARY);
}
 
Example 24
Source Project: iceberg   Source File: GenericParquetWriter.java    License: Apache License 2.0 4 votes vote down vote up
private TimestampWriter(ColumnDescriptor desc) {
  super(desc);
}
 
Example 25
Source Project: Bats   Source File: BitReader.java    License: Apache License 2.0 4 votes vote down vote up
BitReader(ParquetRecordReader parentReader, ColumnDescriptor descriptor, ColumnChunkMetaData columnChunkMetaData,
          boolean fixedLength, BitVector v, SchemaElement schemaElement) throws ExecutionSetupException {
  super(parentReader, descriptor, columnChunkMetaData, fixedLength, v, schemaElement);
}
 
Example 26
Source Project: iceberg   Source File: ParquetValueReaders.java    License: Apache License 2.0 4 votes vote down vote up
public LongAsDecimalReader(ColumnDescriptor desc, int scale) {
  super(desc);
  this.scale = scale;
}
 
Example 27
Source Project: iceberg   Source File: BasePageIterator.java    License: Apache License 2.0 4 votes vote down vote up
protected abstract void initDefinitionLevelsReader(DataPageV1 dataPageV1, ColumnDescriptor descriptor,
ByteBufferInputStream in, int count) throws IOException;
 
Example 28
Source Project: parquet-mr   Source File: DefaultValuesWriterFactoryTest.java    License: Apache License 2.0 4 votes vote down vote up
private void validateFactory(ValuesWriterFactory factory, PrimitiveTypeName typeName, String colName,
    Class<? extends ValuesWriter> initialWriterClass, Class<? extends ValuesWriter> fallbackWriterClass) {
  ColumnDescriptor column = createColumnDescriptor(typeName, colName);
  ValuesWriter writer = factory.newValuesWriter(column);
  validateFallbackWriter(writer, initialWriterClass, fallbackWriterClass);
}
 
Example 29
Source Project: parquet-mr   Source File: DefaultValuesWriterFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public ValuesWriter newValuesWriter(ColumnDescriptor descriptor) {
  return delegateFactory.newValuesWriter(descriptor);
}
 
Example 30
Source Project: iceberg   Source File: ParquetValueWriters.java    License: Apache License 2.0 4 votes vote down vote up
public static PrimitiveWriter<BigDecimal> decimalAsLong(ColumnDescriptor desc,
                                                        int precision, int scale) {
  return new LongDecimalWriter(desc, precision, scale);
}