org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName Java Examples

The following examples show how to use org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestMemPageStore.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void test() throws IOException {
  MemPageStore memPageStore = new MemPageStore(10);
  ColumnDescriptor col = new ColumnDescriptor(path , PrimitiveTypeName.INT64, 2, 2);
  LongStatistics stats = new LongStatistics();
  PageWriter pageWriter = memPageStore.getPageWriter(col);
  pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  pageWriter.writePage(BytesInput.from(new byte[743]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  pageWriter.writePage(BytesInput.from(new byte[735]), 209, stats, BIT_PACKED, BIT_PACKED, PLAIN);
  PageReader pageReader = memPageStore.getPageReader(col);
  long totalValueCount = pageReader.getTotalValueCount();
  System.out.println(totalValueCount);
  int total = 0;
  do {
    DataPage readPage = pageReader.readPage();
    total += readPage.getValueCount();
    System.out.println(readPage);
    // TODO: assert
  } while (total < totalValueCount);
}
 
Example #2
Source File: ValidatingRecordConsumer.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void validate(PrimitiveTypeName p) {
  Type currentType = types.peek().asGroupType().getType(fields.peek());
  int c = fieldValueCount.pop() + 1;
  fieldValueCount.push(c);
  LOG.debug("validate {} for {}",p ,currentType.getName());
  switch (currentType.getRepetition()) {
    case OPTIONAL:
    case REQUIRED:
      if (c > 1) {
        throw new InvalidRecordException("repeated value when the type is not repeated in " + currentType);
      }
      break;
    case REPEATED:
      break;
    default:
      throw new InvalidRecordException("unknown repetition " + currentType.getRepetition() + " in " + currentType);
  }
  if (!currentType.isPrimitive() || currentType.asPrimitiveType().getPrimitiveTypeName() != p) {
    throw new InvalidRecordException("expected type " + p + " but got "+ currentType);
  }
}
 
Example #3
Source File: TestTypeBuildersWithLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testInt64AnnotationsRejectNonInt64() {
  LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
    timeType(true, MICROS), timeType(false, MICROS),
    timeType(true, NANOS), timeType(false, NANOS),
    timestampType(true, MILLIS), timestampType(false, MILLIS),
    timestampType(true, MICROS), timestampType(false, MICROS),
    timestampType(true, NANOS), timestampType(false, NANOS),
    intType(64, true), intType(64, false)};
  for (final LogicalTypeAnnotation logicalType : types) {
    PrimitiveTypeName[] nonInt64 = new PrimitiveTypeName[]{
        BOOLEAN, INT32, INT96, DOUBLE, FLOAT, BINARY
    };
    for (final PrimitiveTypeName type : nonInt64) {
      assertThrows("Should reject non-int64 type: " + type,
          IllegalStateException.class, (Callable<Type>) () -> Types.required(type).as(logicalType).named("col"));
    }
    assertThrows("Should reject non-int64 type: FIXED_LEN_BYTE_ARRAY",
        IllegalStateException.class, (Callable<Type>) () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
            .as(logicalType).named("col"));
  }
}
 
Example #4
Source File: ColumnReader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
protected ColumnReader(DeprecatedParquetVectorizedReader parentReader, int allocateSize, ColumnDescriptor descriptor,
                       ColumnChunkMetaData columnChunkMetaData, boolean fixedLength, V v, SchemaElement schemaElement) throws ExecutionSetupException {
  this.parentReader = parentReader;
  this.columnDescriptor = descriptor;
  this.columnChunkMetaData = columnChunkMetaData;
  this.isFixedLength = fixedLength;
  this.schemaElement = schemaElement;
  this.valueVec =  v;
  this.pageReader = (parentReader.getSingleStream() != null)?
    new DeprecatedSingleStreamPageReader(this, parentReader.getSingleStream(), parentReader.getFsPath(), columnChunkMetaData) :
    new PageReader(this, parentReader.getFileSystem(), parentReader.getFsPath(), columnChunkMetaData);

  if (columnDescriptor.getType() != PrimitiveType.PrimitiveTypeName.BINARY) {
    if (columnDescriptor.getType() == PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY) {
      dataTypeLengthInBits = columnDescriptor.getTypeLength() * 8;
    } else if (columnDescriptor.getType() == PrimitiveTypeName.INT96
      && valueVec instanceof TimeStampMilliVector) {
      // if int 96 column is being read as a Timestamp, this truncates the time format used by Impala
      // dataTypeLengthInBits is only ever used when computing offsets into the destination vector, so it
      // needs to be set to the bit width of the resulting Arrow type, usually this matches the input length
      dataTypeLengthInBits = 64;
    } else {
      dataTypeLengthInBits = DeprecatedParquetVectorizedReader.getTypeLengthInBits(columnDescriptor.getType());
    }
  }
}
 
Example #5
Source File: TestTypeBuildersWithLogicalTypes.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testInt32AnnotationsRejectNonInt32() {
  LogicalTypeAnnotation[] types = new LogicalTypeAnnotation[] {
    dateType(), timeType(true, MILLIS), timeType(false, MILLIS),
    intType(8, false), intType(16, false), intType(32, false),
    intType(8, true), intType(16, true), intType(32, true)};
  for (final LogicalTypeAnnotation logicalType : types) {
    PrimitiveTypeName[] nonInt32 = new PrimitiveTypeName[]{
        BOOLEAN, INT64, INT96, DOUBLE, FLOAT, BINARY
    };
    for (final PrimitiveTypeName type : nonInt32) {
      assertThrows("Should reject non-int32 type: " + type,
          IllegalStateException.class, () -> Types.required(type).as(logicalType).named("col"));
    }
    assertThrows("Should reject non-int32 type: FIXED_LEN_BYTE_ARRAY",
        IllegalStateException.class, () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
            .as(logicalType).named("col"));
  }
}
 
Example #6
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testIgnoreStatsWithSignedSortOrder() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("z"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY)
      .as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      StatsHelper.V1.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not include min/max: " + convertedStats, convertedStats.hasNonNullValue());
  Assert.assertTrue("Stats should have null count: " + convertedStats, convertedStats.isNumNullsSet());
  Assert.assertEquals("Stats should have 3 nulls: " + convertedStats, 3L, convertedStats.getNumNulls());
}
 
Example #7
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testInt32AnnotationsRejectNonInt32() {
  OriginalType[] types = new OriginalType[] {
      DATE, TIME_MILLIS, UINT_8, UINT_16, UINT_32, INT_8, INT_16, INT_32};
  for (final OriginalType logicalType : types) {
    PrimitiveTypeName[] nonInt32 = new PrimitiveTypeName[]{
        BOOLEAN, INT64, INT96, DOUBLE, FLOAT, BINARY
    };
    for (final PrimitiveTypeName type : nonInt32) {
      assertThrows("Should reject non-int32 type: " + type,
          IllegalStateException.class, (Callable<Type>) () -> Types.required(type).as(logicalType).named("col"));
    }
    assertThrows("Should reject non-int32 type: FIXED_LEN_BYTE_ARRAY",
        IllegalStateException.class, (Callable<Type>) () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
            .as(logicalType).named("col"));
  }
}
 
Example #8
Source File: TestParquetFileWriter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertToThriftStatistics() throws Exception {
  long[] longArray = new long[] {39L, 99L, 12L, 1000L, 65L, 542L, 2533461316L, -253346131996L, Long.MAX_VALUE, Long.MIN_VALUE};
  LongStatistics parquetMRstats = new LongStatistics();

  for (long l: longArray) {
    parquetMRstats.updateStats(l);
  }
  final String createdBy =
      "parquet-mr version 1.8.0 (build d4d5a07ec9bd262ca1e93c309f1d7d4a74ebda4c)";
  Statistics thriftStats =
      org.apache.parquet.format.converter.ParquetMetadataConverter.toParquetStatistics(parquetMRstats);
  LongStatistics convertedBackStats =
      (LongStatistics) org.apache.parquet.format.converter.ParquetMetadataConverter.fromParquetStatistics(
          createdBy, thriftStats, PrimitiveTypeName.INT64);

  assertEquals(parquetMRstats.getMax(), convertedBackStats.getMax());
  assertEquals(parquetMRstats.getMin(), convertedBackStats.getMin());
  assertEquals(parquetMRstats.getNumNulls(), convertedBackStats.getNumNulls());
}
 
Example #9
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
private void testStillUseStatsWithSignedSortOrderIfSingleValue(StatsHelper helper) {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  BinaryStatistics stats = new BinaryStatistics();
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();
  stats.updateStats(Binary.fromString("A"));
  stats.incrementNumNulls();

  PrimitiveType binaryType = Types.required(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named("b");
  Statistics convertedStats = converter.fromParquetStatistics(
      Version.FULL_VERSION,
      ParquetMetadataConverter.toParquetStatistics(stats),
      binaryType);

  Assert.assertFalse("Stats should not be empty: " + convertedStats, convertedStats.isEmpty());
  Assert.assertArrayEquals("min == max: " + convertedStats, convertedStats.getMaxBytes(), convertedStats.getMinBytes());
}
 
Example #10
Source File: TestTypeBuilders.java    From parquet-mr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryAnnotationsRejectsNonBinary() {
  OriginalType[] types = new OriginalType[] {
      UTF8, JSON, BSON};
  for (final OriginalType logicalType : types) {
    PrimitiveTypeName[] nonBinary = new PrimitiveTypeName[]{
        BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT
    };
    for (final PrimitiveTypeName type : nonBinary) {
      assertThrows("Should reject non-binary type: " + type,
          IllegalStateException.class, (Callable<Type>) () -> Types.required(type).as(logicalType).named("col"));
    }
    assertThrows("Should reject non-binary type: FIXED_LEN_BYTE_ARRAY",
        IllegalStateException.class, (Callable<Type>) () -> Types.required(FIXED_LEN_BYTE_ARRAY).length(1)
            .as(logicalType).named("col"));
  }
}
 
Example #11
Source File: TestTypeBuildersWithLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testDECIMALAnnotationRejectsUnsupportedTypes() {
  PrimitiveTypeName[] unsupported = new PrimitiveTypeName[]{
      BOOLEAN, INT96, DOUBLE, FLOAT
  };
  for (final PrimitiveTypeName type : unsupported) {
    assertThrows("Should reject non-binary type: " + type,
        IllegalStateException.class, () -> Types.required(type)
            .as(decimalType(2, 9))
            .named("d"));
  }
}
 
Example #12
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testBinary_V2() {
  doTestValueWriter(
    PrimitiveTypeName.BINARY,
    WriterVersion.PARQUET_2_0,
    true,
    false,
    PlainBinaryDictionaryValuesWriter.class, DeltaByteArrayWriter.class);
}
 
Example #13
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testColumnIndexConversion() {
  PrimitiveType type = Types.required(PrimitiveTypeName.INT64).named("test_int64");
  ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(type, Integer.MAX_VALUE);
  Statistics<?> stats = Statistics.createStats(type);
  stats.incrementNumNulls(16);
  stats.updateStats(-100l);
  stats.updateStats(100l);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.incrementNumNulls(111);
  builder.add(stats);
  stats = Statistics.createStats(type);
  stats.updateStats(200l);
  stats.updateStats(500l);
  builder.add(stats);
  org.apache.parquet.format.ColumnIndex parquetColumnIndex =
      ParquetMetadataConverter.toParquetColumnIndex(type, builder.build());
  ColumnIndex columnIndex = ParquetMetadataConverter.fromParquetColumnIndex(type, parquetColumnIndex);
  assertEquals(BoundaryOrder.ASCENDING, columnIndex.getBoundaryOrder());
  assertTrue(Arrays.asList(false, true, false).equals(columnIndex.getNullPages()));
  assertTrue(Arrays.asList(16l, 111l, 0l).equals(columnIndex.getNullCounts()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(-100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(200l))).equals(columnIndex.getMinValues()));
  assertTrue(Arrays.asList(
      ByteBuffer.wrap(BytesUtils.longToBytes(100l)),
      ByteBuffer.allocate(0),
      ByteBuffer.wrap(BytesUtils.longToBytes(500l))).equals(columnIndex.getMaxValues()));

  assertNull("Should handle null column index", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT32).named("test_int32"), null));
  assertNull("Should ignore unsupported types", ParquetMetadataConverter
      .toParquetColumnIndex(Types.required(PrimitiveTypeName.INT96).named("test_int96"), columnIndex));
  assertNull("Should ignore unsupported types",
      ParquetMetadataConverter.fromParquetColumnIndex(Types.required(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY)
          .length(12).as(OriginalType.INTERVAL).named("test_interval"), parquetColumnIndex));
}
 
Example #14
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testBoolean_V2() {
  doTestValueWriter(
    PrimitiveTypeName.BOOLEAN,
    WriterVersion.PARQUET_2_0,
    true,
    false,
    RunLengthBitPackingHybridValuesWriter.class);
}
 
Example #15
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testDouble_V2() {
  doTestValueWriter(
    PrimitiveTypeName.DOUBLE,
    WriterVersion.PARQUET_2_0,
    true,
    false,
    PlainDoubleDictionaryValuesWriter.class, PlainValuesWriter.class);
}
 
Example #16
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private ColumnChunkMetaData createColumnChunkMetaData() {
  Set<org.apache.parquet.column.Encoding> e = new HashSet<org.apache.parquet.column.Encoding>();
  PrimitiveTypeName t = PrimitiveTypeName.BINARY;
  ColumnPath p = ColumnPath.get("foo");
  CompressionCodecName c = CompressionCodecName.GZIP;
  BinaryStatistics s = new BinaryStatistics();
  ColumnChunkMetaData md = ColumnChunkMetaData.get(p, t, c, e, s,
          0, 0, 0, 0, 0);
  return md;
}
 
Example #17
Source File: TestDictionary.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private DictionaryValuesReader initDicReader(ValuesWriter cw, PrimitiveTypeName type)
    throws IOException {
  final DictionaryPage dictionaryPage = cw.toDictPageAndClose().copy();
  final ColumnDescriptor descriptor = new ColumnDescriptor(new String[] {"foo"}, type, 0, 0);
  final Dictionary dictionary = PLAIN.initDictionary(descriptor, dictionaryPage);
  final DictionaryValuesReader cr = new DictionaryValuesReader(dictionary);
  return cr;
}
 
Example #18
Source File: ValidTypeMap.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void add(Class<?> c, PrimitiveTypeName p) {
  Set<PrimitiveTypeName> descriptors = classToParquetType.get(c);
  if (descriptors == null) {
    descriptors = new HashSet<>();
    classToParquetType.put(c, descriptors);
  }
  descriptors.add(p);

  Set<Class<?>> classes = parquetTypeToClass.get(p);
  if (classes == null) {
    classes = new HashSet<>();
    parquetTypeToClass.put(p, classes);
  }
  classes.add(c);
}
 
Example #19
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static ParquetMetadata createParquetMetaData(Encoding dicEncoding,
  Encoding dataEncoding) {
  MessageType schema =
    parseMessageType("message schema { optional int32 col (INT_32); }");
  org.apache.parquet.hadoop.metadata.FileMetaData fileMetaData =
    new org.apache.parquet.hadoop.metadata.FileMetaData(schema,
      new HashMap<String, String>(), null);
  List<BlockMetaData> blockMetaDataList = new ArrayList<BlockMetaData>();
  BlockMetaData blockMetaData = new BlockMetaData();
  EncodingStats.Builder builder = new EncodingStats.Builder();
  if (dicEncoding!= null) {
    builder.addDictEncoding(dicEncoding).build();
  }
  builder.addDataEncoding(dataEncoding);
  EncodingStats es = builder.build();
  Set<org.apache.parquet.column.Encoding> e =
    new HashSet<org.apache.parquet.column.Encoding>();
  PrimitiveTypeName t = PrimitiveTypeName.INT32;
  ColumnPath p = ColumnPath.get("col");
  CompressionCodecName c = CompressionCodecName.UNCOMPRESSED;
  BinaryStatistics s = new BinaryStatistics();
  ColumnChunkMetaData md =
    ColumnChunkMetaData.get(p, t, c, es, e, s, 20, 30, 0, 0, 0);
  blockMetaData.addColumn(md);
  blockMetaDataList.add(blockMetaData);
  return new ParquetMetadata(fileMetaData, blockMetaDataList);
}
 
Example #20
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testInt64_V2() {
  doTestValueWriter(
    PrimitiveTypeName.INT64,
    WriterVersion.PARQUET_2_0,
    true,
    false,
    PlainLongDictionaryValuesWriter.class, DeltaBinaryPackingValuesWriterForLong.class);
}
 
Example #21
Source File: TestParquetMetadataConverter.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testMissingValuesFromStats() {
  ParquetMetadataConverter converter = new ParquetMetadataConverter();
  PrimitiveType type = Types.required(PrimitiveTypeName.INT32).named("test_int32");

  org.apache.parquet.format.Statistics formatStats = new org.apache.parquet.format.Statistics();
  Statistics<?> stats = converter.fromParquetStatistics(Version.FULL_VERSION, formatStats, type);
  assertFalse(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());
  assertTrue(stats.isEmpty());
  assertEquals(-1, stats.getNumNulls());

  formatStats.clear();
  formatStats.setMin(BytesUtils.intToBytes(-100));
  formatStats.setMax(BytesUtils.intToBytes(100));
  stats = converter.fromParquetStatistics(Version.FULL_VERSION, formatStats, type);
  assertFalse(stats.isNumNullsSet());
  assertTrue(stats.hasNonNullValue());
  assertFalse(stats.isEmpty());
  assertEquals(-1, stats.getNumNulls());
  assertEquals(-100, stats.genericGetMin());
  assertEquals(100, stats.genericGetMax());

  formatStats.clear();
  formatStats.setNull_count(2000);
  stats = converter.fromParquetStatistics(Version.FULL_VERSION, formatStats, type);
  assertTrue(stats.isNumNullsSet());
  assertFalse(stats.hasNonNullValue());
  assertFalse(stats.isEmpty());
  assertEquals(2000, stats.getNumNulls());
}
 
Example #22
Source File: TestTypeBuildersWithLogicalTypes.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testIntervalAnnotationRejectsNonFixed() {
  PrimitiveTypeName[] nonFixed = new PrimitiveTypeName[]{
      BOOLEAN, INT32, INT64, INT96, DOUBLE, FLOAT, BINARY
  };
  for (final PrimitiveTypeName type : nonFixed) {
    assertThrows("Should reject non-fixed type: " + type,
        IllegalStateException.class, () -> Types.required(type)
            .as(LogicalTypeAnnotation.IntervalLogicalTypeAnnotation.getInstance()).named("interval"));
  }
}
 
Example #23
Source File: TestMessageType.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNestedTypes() {
  MessageType schema = MessageTypeParser.parseMessageType(Paper.schema.toString());
  Type type = schema.getType("Links", "Backward");
  assertEquals(PrimitiveTypeName.INT64,
      type.asPrimitiveType().getPrimitiveTypeName());
  assertEquals(0, schema.getMaxRepetitionLevel("DocId"));
  assertEquals(1, schema.getMaxRepetitionLevel("Name"));
  assertEquals(2, schema.getMaxRepetitionLevel("Name", "Language"));
  assertEquals(0, schema.getMaxDefinitionLevel("DocId"));
  assertEquals(1, schema.getMaxDefinitionLevel("Links"));
  assertEquals(2, schema.getMaxDefinitionLevel("Links", "Backward"));
}
 
Example #24
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testDouble_V2_WithByteStreamSplitAndDictionary() {
  doTestValueWriter(
    PrimitiveTypeName.DOUBLE,
    WriterVersion.PARQUET_2_0,
    true,
    true,
    PlainDoubleDictionaryValuesWriter.class, ByteStreamSplitValuesWriter.class);
}
 
Example #25
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void doTestValueWriter(PrimitiveTypeName typeName, WriterVersion version, boolean enableDictionary, boolean enableByteStreamSplit, Class<? extends ValuesWriter> expectedValueWriterClass) {
  ColumnDescriptor mockPath = createColumnDescriptor(typeName);
  ValuesWriterFactory factory = getDefaultFactory(version, enableDictionary, enableByteStreamSplit);
  ValuesWriter writer = factory.newValuesWriter(mockPath);

  validateWriterType(writer, expectedValueWriterClass);
}
 
Example #26
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private static void showDetails(PrettyPrintWriter out, PrimitiveType type, int depth, MessageType container, List<String> cpath, boolean showOriginalTypes) {
  String name = Strings.repeat(".", depth) + type.getName();
  Repetition rep = type.getRepetition();
  PrimitiveTypeName ptype = type.getPrimitiveTypeName();

  out.format("%s: %s %s", name, rep, ptype);
  if (showOriginalTypes) {
    OriginalType otype;
    try {
      otype = type.getOriginalType();
    } catch (Exception e) {
      otype = null;
    }
    if (otype != null) out.format(" O:%s", otype);
  } else {
    LogicalTypeAnnotation ltype = type.getLogicalTypeAnnotation();
    if (ltype != null) out.format(" L:%s", ltype);
  }

  if (container != null) {
    cpath.add(type.getName());
    String[] paths = cpath.toArray(new String[0]);
    cpath.remove(cpath.size() - 1);

    ColumnDescriptor desc = container.getColumnDescription(paths);

    int defl = desc.getMaxDefinitionLevel();
    int repl = desc.getMaxRepetitionLevel();
    out.format(" R:%d D:%d", repl, defl);
  }
  out.println();
}
 
Example #27
Source File: MetadataUtils.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
public static void showDetails(PrettyPrintWriter out, ColumnDescriptor desc) {
  String path = Joiner.on(".").skipNulls().join(desc.getPath());
  PrimitiveTypeName type = desc.getType();
  int defl = desc.getMaxDefinitionLevel();
  int repl = desc.getMaxRepetitionLevel();

  out.format("column desc: %s T:%s R:%d D:%d%n", path, type, repl, defl);
}
 
Example #28
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private void doTestValueWriter(PrimitiveTypeName typeName, WriterVersion version, boolean enableDictionary, boolean enableByteStreamSplit, Class<? extends ValuesWriter> initialValueWriterClass, Class<? extends ValuesWriter> fallbackValueWriterClass) {
  ColumnDescriptor mockPath = createColumnDescriptor(typeName);
  ValuesWriterFactory factory = getDefaultFactory(version, enableDictionary, enableByteStreamSplit);
  ValuesWriter writer = factory.newValuesWriter(mockPath);

  validateFallbackWriter(writer, initialValueWriterClass, fallbackValueWriterClass);
}
 
Example #29
Source File: JsonElementConversionFactory.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * @param jsonSchema
 * @param repeated
 * @param outputType
 */
public PrimitiveConverter(JsonSchema jsonSchema, boolean repeated, PrimitiveTypeName outputType) {
  super(jsonSchema);
  this.repeated = repeated;
  this.outputType = outputType;
  this.schema = buildSchema();
}
 
Example #30
Source File: DefaultValuesWriterFactoryTest.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
@Test
public void testDouble_V2_WithByteStreamSplit() {
  doTestValueWriter(
    PrimitiveTypeName.DOUBLE,
    WriterVersion.PARQUET_2_0,
    false,
    true,
    ByteStreamSplitValuesWriter.class);
}