Java Code Examples for org.apache.parquet.column.ColumnDescriptor#getMaxRepetitionLevel()

The following examples show how to use org.apache.parquet.column.ColumnDescriptor#getMaxRepetitionLevel() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath) {
  String name = Strings.repeat(".", depth) + type.getName();
  OriginalType otype = type.getOriginalType();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (otype != null) out.format(" O:%s", otype);

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 2
Source File: RichColumnDescriptor.java    From presto with Apache License 2.0 5 votes vote down vote up
public RichColumnDescriptor(
        ColumnDescriptor descriptor,
        PrimitiveType primitiveType)
{
    super(descriptor.getPath(), primitiveType, descriptor.getMaxRepetitionLevel(), descriptor.getMaxDefinitionLevel());
    this.required = primitiveType.getRepetition() != OPTIONAL;
}
 
Example 3
Source File: DeprecatedParquetVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
/**
 * Returns data type length for a given {@see ColumnDescriptor} and it's corresponding
 * {@see SchemaElement}. Neither is enough information alone as the max
 * repetition level (indicating if it is an array type) is in the ColumnDescriptor and
 * the length of a fixed width field is stored at the schema level.
 *
 * @return the length if fixed width, else -1
 */
private int getDataTypeLength(ColumnDescriptor column, SchemaElement se) {
  if (column.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
    if (column.getMaxRepetitionLevel() > 0) {
      return -1;
    }
    if (column.getType() == PrimitiveType.PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
      return se.getType_length() * 8;
    } else {
      return getTypeLengthInBits(column.getType());
    }
  } else {
    return -1;
  }
}
 
Example 4
Source File: DeprecatedParquetVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
private TypeProtos.DataMode getDataMode(ColumnDescriptor column) {
  if (column.getMaxRepetitionLevel() > 0 ) {
    return DataMode.REPEATED;
  } else if (column.getMaxDefinitionLevel() == 0) {
    return TypeProtos.DataMode.REQUIRED;
  } else {
    return TypeProtos.DataMode.OPTIONAL;
  }
}
 
Example 5
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static void showDetails(PrettyPrintWriter out, ColumnDescriptor desc) {
  String path = Joiner.on(".").skipNulls().join(desc.getPath());
  PrimitiveTypeName type = desc.getType();
  int defl = desc.getMaxDefinitionLevel();
  int repl = desc.getMaxRepetitionLevel();

  out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl);
}
 
Example 6
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (showOriginalTypes) {
    OriginalType otype;
    try {
      otype = type.getOriginalType();
    } catch (Exception e) {
      otype = null;
    }
    if (otype != null) out.format(" O:%s", otype);
  } else {
    LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
    if (ltype != null) out.format(" L:%s", ltype);
  }

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 7
Source File: SchemaCompatibilityValidator.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private <T extends Comparable<T>> void validateColumn(Column<T> column) {
  ColumnPath path = column.getColumnPath();

  Class<?> alreadySeen = columnTypesEncountered.get(path);
  if (alreadySeen != null && !alreadySeen.equals(column.getColumnType())) {
    throw new IllegalArgumentException("Column: "
        + path.toDotString()
        + " was provided with different types in the same predicate."
        + " Found both: (" + alreadySeen + ", " + column.getColumnType() + ")");
  }

  if (alreadySeen == null) {
    columnTypesEncountered.put(path, column.getColumnType());
  }

  ColumnDescriptor descriptor = getColumnDescriptor(path);
  if (descriptor == null) {
    // the column is missing from the schema. evaluation uses calls
    // updateNull() a value is missing, so this will be handled correctly.
    return;
  }

  if (descriptor.getMaxRepetitionLevel() > 0) {
    throw new IllegalArgumentException("FilterPredicates do not currently support repeated columns. "
        + "Column " + path.toDotString() + " is repeated.");
  }

  ValidTypeMap.assertTypeValid(column, descriptor.getType());
}
 
Example 8
Source File: ParquetReaderUtility.java    From Bats with Apache License 2.0 4 votes vote down vote up
/**
 * Check whether any of columns in the given list is either nested or repetitive.
 *
 * @param footer  Parquet file schema
 * @param columns list of query SchemaPath objects
 */
public static boolean containsComplexColumn(ParquetMetadata footer, List<SchemaPath> columns) {

  MessageType schema = footer.getFileMetaData().getSchema();

  if (Utilities.isStarQuery(columns)) {
    for (Type type : schema.getFields()) {
      if (!type.isPrimitive()) {
        return true;
      }
    }
    for (ColumnDescriptor col : schema.getColumns()) {
      if (col.getMaxRepetitionLevel() > 0) {
        return true;
      }
    }
    return false;
  } else {
    Map<String, ColumnDescriptor> colDescMap = ParquetReaderUtility.getColNameToColumnDescriptorMapping(footer);
    Map<String, SchemaElement> schemaElements = ParquetReaderUtility.getColNameToSchemaElementMapping(footer);

    for (SchemaPath schemaPath : columns) {
      // Schema path which is non-leaf is complex column
      if (!schemaPath.isLeaf()) {
        logger.trace("rowGroupScan contains complex column: {}", schemaPath.getUnIndexed().toString());
        return true;
      }

      // following column descriptor lookup failure may mean two cases, depending on subsequent SchemaElement lookup:
      // 1. success: queried column is complex, i.e. GroupType
      // 2. failure: queried column is not in schema and thus is non-complex
      ColumnDescriptor column = colDescMap.get(schemaPath.getUnIndexed().toString().toLowerCase());

      if (column == null) {
        SchemaElement schemaElement = schemaElements.get(schemaPath.getUnIndexed().toString().toLowerCase());
        if (schemaElement != null) {
          return true;
        }
      } else {
        if (column.getMaxRepetitionLevel() > 0) {
          logger.trace("rowGroupScan contains repetitive column: {}", schemaPath.getUnIndexed().toString());
          return true;
        }
      }
    }
  }
  return false;
}
 
Example 9
Source File: ColumnWriterV2.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
ValuesWriter createRLWriter(ParquetProperties props, ColumnDescriptor path) {
  return path.getMaxRepetitionLevel() == 0 ? NULL_WRITER : new RLEWriterForV2(props.newRepetitionLevelEncoder(path));
}