Java Code Examples for parquet.column.ColumnDescriptor#getMaxDefinitionLevel()

The following examples show how to use parquet.column.ColumnDescriptor#getMaxDefinitionLevel() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParquetReader.java    From paraflow with Apache License 2.0 6 votes vote down vote up
public int nextBatch()
{
    if (nextRowInGroup >= currentGroupRowCount && !advanceToNextRowGroup()) {
        return -1;
    }

    batchSize = checkedCast(min(MAX_VECTOR_LENGTH, currentGroupRowCount - nextRowInGroup));

    nextRowInGroup += batchSize;
    currentPosition += batchSize;
    for (PrimitiveColumnIO columnIO : getColumns(fileSchema, requestedSchema)) {
        ColumnDescriptor descriptor = columnIO.getColumnDescriptor();
        RichColumnDescriptor column = new RichColumnDescriptor(descriptor.getPath(), columnIO.getType().asPrimitiveType(), descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel());
        ParquetColumnReader columnReader = columnReadersMap.get(column);
        columnReader.prepareNextRead(batchSize);
    }
    return batchSize;
}
 
Example 2
Source File: MetadataUtils.java    From parquet-tools with Apache License 2.0 6 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  OriginalType otype = type.getOriginalType();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (otype != null) out.format(" O:%s", otype);

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[cpath.size()]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 3
Source File: ParquetReader.java    From paraflow with Apache License 2.0 5 votes vote down vote up
private void initializeColumnReaders()
{
    for (PrimitiveColumnIO columnIO : getColumns(fileSchema, requestedSchema)) {
        ColumnDescriptor descriptor = columnIO.getColumnDescriptor();
        RichColumnDescriptor column = new RichColumnDescriptor(descriptor.getPath(), columnIO.getType().asPrimitiveType(), descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel());
        columnReadersMap.put(column, ParquetColumnReader.createReader(column));
    }
}
 
Example 4
Source File: MetadataUtils.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
public static void showDetails(PrettyPrintWriter out, ColumnDescriptor desc) {
  String path = Joiner.on(".").skipNulls().join(desc.getPath());
  PrimitiveTypeName type = desc.getType();
  int defl = desc.getMaxDefinitionLevel();
  int repl = desc.getMaxRepetitionLevel();

  out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl); 
}
 
Example 5
Source File: DumpCommand.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
public static void dump(PrettyPrintWriter out, PageReadStore store, ColumnDescriptor column) throws IOException {
    PageReader reader = store.getPageReader(column);

    long vc = reader.getTotalValueCount();
    int rmax = column.getMaxRepetitionLevel();
    int dmax = column.getMaxDefinitionLevel();
    out.format("%s TV=%d RL=%d DL=%d", Joiner.on('.').skipNulls().join(column.getPath()), vc, rmax, dmax);

    DictionaryPage dict = reader.readDictionaryPage();
    if (dict != null) {
        out.format(" DS:%d", dict.getDictionarySize());
        out.format(" DE:%s", dict.getEncoding());
    }

    out.println();
    out.rule('-');

    Page page = reader.readPage();
    for (long count = 0; page != null; count++) {
        out.format("page %d:", count);
        out.format(" DLE:%s", page.getDlEncoding());
        out.format(" RLE:%s", page.getRlEncoding());
        out.format(" VLE:%s", page.getValueEncoding());
        out.format(" SZ:%d", page.getUncompressedSize());
        out.format(" VC:%d", page.getValueCount());
        out.println();
        page = reader.readPage();
    }
}
 
Example 6
Source File: DumpCommand.java    From parquet-tools with Apache License 2.0 5 votes vote down vote up
public static void dump(PrettyPrintWriter out, ColumnReadStoreImpl crstore, ColumnDescriptor column, long page, long total, long offset) throws IOException {
    int dmax = column.getMaxDefinitionLevel();
    ColumnReader creader = crstore.getColumnReader(column);
    out.format("*** row group %d of %d, values %d to %d ***%n", page, total, offset, offset + creader.getTotalValueCount() - 1);

    for (long i = 0, e = creader.getTotalValueCount(); i < e; ++i) {
        int rlvl = creader.getCurrentDefinitionLevel();
        int dlvl = creader.getCurrentDefinitionLevel();

        out.format("value %d: R:%d D:%d V:", offset+i, rlvl, dlvl);
        if (dlvl == dmax) {
            switch (column.getType()) {
            case BINARY:  out.format("%s", binaryToString(creader.getBinary())); break;
            case BOOLEAN: out.format("%s", creader.getBoolean()); break;
            case DOUBLE:  out.format("%s", creader.getDouble()); break;
            case FLOAT:   out.format("%s", creader.getFloat()); break;
            case INT32:   out.format("%s", creader.getInteger()); break;
            case INT64:   out.format("%s", creader.getLong()); break;
            case INT96:   out.format("%s", binaryToBigInteger(creader.getBinary())); break;
            case FIXED_LEN_BYTE_ARRAY: out.format("%s", binaryToString(creader.getBinary())); break;
            }
        } else {
            out.format("<null>");
        }

        out.println();
        creader.consume();
    }
}