Java Code Examples for org.apache.parquet.schema.PrimitiveType#getLogicalTypeAnnotation()

The following examples show how to use org.apache.parquet.schema.PrimitiveType#getLogicalTypeAnnotation() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (showOriginalTypes) {
    OriginalType otype;
    try {
      otype = type.getOriginalType();
    } catch (Exception e) {
      otype = null;
    }
    if (otype != null) out.format(" O:%s", otype);
  } else {
    LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
    if (ltype != null) out.format(" L:%s", ltype);
  }

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example 2
Source File: BinaryTruncator.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static BinaryTruncator getTruncator(PrimitiveType type) {
  if (type == null) {
    return NO_OP_TRUNCATOR;
  }
  switch (type.getPrimitiveTypeName()) {
    case INT96:
      return NO_OP_TRUNCATOR;
    case BINARY:
    case FIXED_LEN_BYTE_ARRAY:
      LogicalTypeAnnotation logicalTypeAnnotation = type.getLogicalTypeAnnotation();
      if (logicalTypeAnnotation == null) {
        return DEFAULT_UTF8_TRUNCATOR;
      }
      return logicalTypeAnnotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<BinaryTruncator>() {
        @Override
        public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
          return Optional.of(DEFAULT_UTF8_TRUNCATOR);
        }

        @Override
        public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
          return Optional.of(DEFAULT_UTF8_TRUNCATOR);
        }

        @Override
        public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
          return Optional.of(DEFAULT_UTF8_TRUNCATOR);
        }

        @Override
        public Optional<BinaryTruncator> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
          return Optional.of(DEFAULT_UTF8_TRUNCATOR);
        }
      }).orElse(NO_OP_TRUNCATOR);
    default:
      throw new IllegalArgumentException("No truncator is available for the type: " + type);
  }
}
 
Example 3
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
/**
 * @param primitive a primitive type with a logical type annotation
 * @return the "correct" sort order of the type that applications assume
 */
private static SortOrder sortOrder(PrimitiveType primitive) {
  LogicalTypeAnnotation annotation = primitive.getLogicalTypeAnnotation();
  if (annotation != null) {
    return annotation.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<SortOrder>() {
      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.IntLogicalTypeAnnotation intLogicalType) {
        return intLogicalType.isSigned() ? of(SortOrder.SIGNED) : of(SortOrder.UNSIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation intervalLogicalType) {
        return of(SortOrder.UNKNOWN);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.DateLogicalTypeAnnotation dateLogicalType) {
        return of(SortOrder.SIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.EnumLogicalTypeAnnotation enumLogicalType) {
        return of(SortOrder.UNSIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.BsonLogicalTypeAnnotation bsonLogicalType) {
        return of(SortOrder.UNSIGNED);
      }

      @Override
      public Optional<SortOrder> visit(UUIDLogicalTypeAnnotation uuidLogicalType) {
        return of(SortOrder.UNSIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.JsonLogicalTypeAnnotation jsonLogicalType) {
        return of(SortOrder.UNSIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.StringLogicalTypeAnnotation stringLogicalType) {
        return of(SortOrder.UNSIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.DecimalLogicalTypeAnnotation decimalLogicalType) {
        return of(SortOrder.UNKNOWN);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.MapKeyValueTypeAnnotation mapKeyValueLogicalType) {
        return of(SortOrder.UNKNOWN);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.MapLogicalTypeAnnotation mapLogicalType) {
        return of(SortOrder.UNKNOWN);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.ListLogicalTypeAnnotation listLogicalType) {
        return of(SortOrder.UNKNOWN);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.TimeLogicalTypeAnnotation timeLogicalType) {
        return of(SortOrder.SIGNED);
      }

      @Override
      public Optional<SortOrder> visit(LogicalTypeAnnotation.TimestampLogicalTypeAnnotation timestampLogicalType) {
        return of(SortOrder.SIGNED);
      }
    }).orElse(defaultSortOrder(primitive.getPrimitiveTypeName()));
  }

  return defaultSortOrder(primitive.getPrimitiveTypeName());
}
 
Example 4
Source File: ShowDictionaryCommand.java    From parquet-mr with Apache License 2.0 4 votes vote down vote up
@Override
@SuppressWarnings("unchecked")
public int run() throws IOException {
  Preconditions.checkArgument(targets != null && targets.size() >= 1,
      "A Parquet file is required.");
  Preconditions.checkArgument(targets.size() == 1,
      "Cannot process multiple Parquet files.");

  String source = targets.get(0);

  ParquetFileReader reader = ParquetFileReader.open(getConf(), qualifiedPath(source));
  MessageType schema = reader.getFileMetaData().getSchema();
  ColumnDescriptor descriptor = Util.descriptor(column, schema);
  PrimitiveType type = Util.primitive(column, schema);
  Preconditions.checkNotNull(type);

  DictionaryPageReadStore dictionaryReader;
  int rowGroup = 0;
  while ((dictionaryReader = reader.getNextDictionaryReader()) != null) {
    DictionaryPage page = dictionaryReader.readDictionaryPage(descriptor);

    Dictionary dict = page.getEncoding().initDictionary(descriptor, page);

    console.info("\nRow group {} dictionary for \"{}\":", rowGroup, column, page.getCompressedSize());
    for (int i = 0; i <= dict.getMaxId(); i += 1) {
      switch(type.getPrimitiveTypeName()) {
        case BINARY:
          if (type.getLogicalTypeAnnotation() instanceof LogicalTypeAnnotation.StringLogicalTypeAnnotation) {
            console.info("{}: {}", String.format("%6d", i),
                Util.humanReadable(dict.decodeToBinary(i).toStringUsingUTF8(), 70));
          } else {
            console.info("{}: {}", String.format("%6d", i),
                Util.humanReadable(dict.decodeToBinary(i).getBytesUnsafe(), 70));
          }
          break;
        case INT32:
          console.info("{}: {}", String.format("%6d", i),
            dict.decodeToInt(i));
          break;
        case INT64:
          console.info("{}: {}", String.format("%6d", i),
              dict.decodeToLong(i));
          break;
        case FLOAT:
          console.info("{}: {}", String.format("%6d", i),
              dict.decodeToFloat(i));
          break;
        case DOUBLE:
          console.info("{}: {}", String.format("%6d", i),
              dict.decodeToDouble(i));
          break;
        default:
          throw new IllegalArgumentException(
              "Unknown dictionary type: " + type.getPrimitiveTypeName());
      }
    }

    reader.skipNextRowGroup();

    rowGroup += 1;
  }

  console.info("");

  return 0;
}
 
Example 5
Source File: ParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 3 votes vote down vote up
/**
 * Returns whether to use signed order min and max with a type. It is safe to
 * use signed min and max when the type is a string type and contains only
 * ASCII characters (where the sign bit was 0). This checks whether the type
 * is a string type and uses {@code useSignedStringMinMax} to determine if
 * only ASCII characters were written.
 *
 * @param type a primitive type with a logical type annotation
 * @return true if signed order min/max can be used with this type
 */
private boolean overrideSortOrderToSigned(PrimitiveType type) {
  // even if the override is set, only return stats for string-ish types
  // a null type annotation is considered string-ish because some writers
  // failed to use the UTF8 annotation.
  LogicalTypeAnnotation annotation = type.getLogicalTypeAnnotation();
  return useSignedStringMinMax &&
      PrimitiveTypeName.BINARY == type.getPrimitiveTypeName() &&
      (annotation == null || STRING_TYPES.contains(annotation.getClass()));
}