Java Code Examples for org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe

The following examples show how to use org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: TestDataWritableWriter.java    License: Apache License 2.0 6 votes vote down vote up
private Binary decimalToBinary(HiveDecimal hiveDecimal, DecimalTypeInfo decimalTypeInfo)
{
    int prec = decimalTypeInfo.precision();
    int scale = decimalTypeInfo.scale();
    byte[] decimalBytes = hiveDecimal.setScale(scale).unscaledValue().toByteArray();

    // Estimated number of bytes needed.
    int precToBytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
    if (precToBytes == decimalBytes.length) {
        // No padding needed.
        return Binary.fromByteArray(decimalBytes);
    }

    byte[] tgt = new byte[precToBytes];
    if (hiveDecimal.signum() == -1) {
        // For negative number, initializing bits to 1
        for (int i = 0; i < precToBytes; i++) {
            tgt[i] |= 0xFF;
        }
    }

    System.arraycopy(decimalBytes, 0, tgt, precToBytes - decimalBytes.length, decimalBytes.length); // Padding leading zeroes/ones.
    return Binary.fromByteArray(tgt);
}
 
Example 2
Source Project: presto   Source File: TestParquetDecimalScaling.java    License: Apache License 2.0 6 votes vote down vote up
public String parquetStorage()
{
    if (!forceFixedLengthArray && precision > 0 && precision < 10) {
        return "INT32";
    }

    if (!forceFixedLengthArray && precision >= 10 && precision < 18) {
        return "INT64";
    }

    if (precision > 38 || precision < 0) {
        throw new IllegalArgumentException("Scale cannot be greater than 38 or less than 0");
    }

    return format("fixed_len_byte_array(%d)", ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[precision - 1]);
}
 
Example 3
Source Project: parquet-mr   Source File: TestParquetSerDe.java    License: Apache License 2.0 6 votes vote down vote up
private void deserializeAndSerializeLazySimple(final ParquetHiveSerDe serDe, final ArrayWritable t) throws SerDeException {

    // Get the row structure
    final StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();

    // Deserialize
    final Object row = serDe.deserialize(t);
    assertEquals("deserialization gives the wrong object class", row.getClass(), ArrayWritable.class);
    assertEquals("size correct after deserialization", serDe.getSerDeStats().getRawDataSize(), t.get().length);
    assertEquals("deserialization gives the wrong object", t, row);

    // Serialize
    final ArrayWritable serializedArr = (ArrayWritable) serDe.serialize(row, oi);
    assertEquals("size correct after serialization", serDe.getSerDeStats().getRawDataSize(), serializedArr.get().length);
    assertTrue("serialized object should be equal to starting object", arrayWritableEquals(t, serializedArr));
  }
 
Example 4
Source Project: presto   Source File: ParquetTester.java    License: Apache License 2.0 5 votes vote down vote up
private static void writeParquetColumn(
        JobConf jobConf,
        File outputFile,
        CompressionCodecName compressionCodecName,
        Properties tableProperties,
        SettableStructObjectInspector objectInspector,
        Iterator<?>[] valuesByField,
        Optional<MessageType> parquetSchema,
        boolean singleLevelArray)
        throws Exception
{
    RecordWriter recordWriter = new TestMapredParquetOutputFormat(parquetSchema, singleLevelArray)
            .getHiveRecordWriter(
                    jobConf,
                    new Path(outputFile.toURI()),
                    Text.class,
                    compressionCodecName != UNCOMPRESSED,
                    tableProperties,
                    () -> {});
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    while (stream(valuesByField).allMatch(Iterator::hasNext)) {
        for (int field = 0; field < fields.size(); field++) {
            Object value = valuesByField[field].next();
            objectInspector.setStructFieldData(row, fields.get(field), value);
        }
        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, tableProperties, null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
    }
    recordWriter.close(false);
}
 
Example 5
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(repetition, name, "map", keyType, valueType);
}
 
Example 6
Source Project: presto   Source File: SingleLevelArraySchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo, Repetition repetition)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return ConversionPatterns.mapType(repetition, name, keyType, valueType);
}
 
Example 7
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(String name, MapTypeInfo typeInfo)
{
    Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
            typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
    Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
            typeInfo.getMapValueTypeInfo());
    return mapType(Repetition.OPTIONAL, name, "map", keyType, valueType);
}
 
Example 8
Source Project: hudi   Source File: HoodieInputFormatUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static String getSerDeClassName(HoodieFileFormat baseFileFormat) {
  switch (baseFileFormat) {
    case PARQUET:
      return ParquetHiveSerDe.class.getName();
    default:
      throw new HoodieIOException("No SerDe for base file format " + baseFileFormat);
  }
}
 
Example 9
Source Project: tajo   Source File: HiveCatalogUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static String getDataFormat(StorageDescriptor descriptor) {
  Preconditions.checkNotNull(descriptor);

  String serde = descriptor.getSerdeInfo().getSerializationLib();
  String inputFormat = descriptor.getInputFormat();

  if (LazySimpleSerDe.class.getName().equals(serde)) {
    if (TextInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.TEXT;
    } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinarySerDe.class.getName().equals(serde)) {
    if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
    if (RCFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.RCFILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (ParquetHiveSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.PARQUET;
  } else if (AvroSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.AVRO;
  } else if (OrcSerde.class.getName().equals(serde)) {
    return BuiltinStorages.ORC;
  } else if (RegexSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.REGEX;
  } else {
    throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
  }
}
 
Example 10
Source Project: parquet-mr   Source File: HiveSchemaConverter.java    License: Apache License 2.0 5 votes vote down vote up
private static GroupType convertMapType(final String name, final MapTypeInfo typeInfo) {
  final Type keyType = convertType(ParquetHiveSerDe.MAP_KEY.toString(),
      typeInfo.getMapKeyTypeInfo(), Repetition.REQUIRED);
  final Type valueType = convertType(ParquetHiveSerDe.MAP_VALUE.toString(),
      typeInfo.getMapValueTypeInfo());
  return ConversionPatterns.mapType(Repetition.OPTIONAL, name, keyType, valueType);
}
 
Example 11
private static Type convertType(String name, TypeInfo typeInfo, Repetition repetition)
{
    if (typeInfo.getCategory() == Category.PRIMITIVE) {
        if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
                    .named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
            throw new UnsupportedOperationException("Void type not implemented");
        }
        if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.CHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
        }
        if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.VARCHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
        }
        if (typeInfo instanceof DecimalTypeInfo) {
            DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
            int prec = decimalTypeInfo.precision();
            int scale = decimalTypeInfo.scale();
            int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
            }
            return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        }
        if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
            throw new UnsupportedOperationException("Unknown type not implemented");
        }
        throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
    if (typeInfo.getCategory() == Category.LIST) {
        return convertArrayType(name, (ListTypeInfo) typeInfo, repetition);
    }
    if (typeInfo.getCategory() == Category.STRUCT) {
        return convertStructType(name, (StructTypeInfo) typeInfo, repetition);
    }
    if (typeInfo.getCategory() == Category.MAP) {
        return convertMapType(name, (MapTypeInfo) typeInfo, repetition);
    }
    if (typeInfo.getCategory() == Category.UNION) {
        throw new UnsupportedOperationException("Union type not implemented");
    }
    throw new IllegalArgumentException("Unknown type: " + typeInfo);
}
 
Example 12
Source Project: presto   Source File: SingleLevelArraySchemaConverter.java    License: Apache License 2.0 4 votes vote down vote up
private static Type convertType(String name, TypeInfo typeInfo,
        Repetition repetition)
{
    if (typeInfo.getCategory() == Category.PRIMITIVE) {
        if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
                    .named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
            throw new UnsupportedOperationException("Void type not implemented");
        }
        else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.CHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            else {
                return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
        }
        else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.VARCHAR_TYPE_NAME)) {
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
            else {
                return Types.repeated(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
            }
        }
        else if (typeInfo instanceof DecimalTypeInfo) {
            DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
            int prec = decimalTypeInfo.precision();
            int scale = decimalTypeInfo.scale();
            int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
            if (repetition == Repetition.OPTIONAL) {
                return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
            }
            else {
                return Types.repeated(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
            }
        }
        else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
            throw new UnsupportedOperationException("Unknown type not implemented");
        }
        else {
            throw new IllegalArgumentException("Unknown type: " + typeInfo);
        }
    }
    else if (typeInfo.getCategory() == Category.LIST) {
        return convertArrayType(name, (ListTypeInfo) typeInfo, repetition);
    }
    else if (typeInfo.getCategory() == Category.STRUCT) {
        return convertStructType(name, (StructTypeInfo) typeInfo, repetition);
    }
    else if (typeInfo.getCategory() == Category.MAP) {
        return convertMapType(name, (MapTypeInfo) typeInfo, repetition);
    }
    else if (typeInfo.getCategory() == Category.UNION) {
        throw new UnsupportedOperationException("Union type not implemented");
    }
    else {
        throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
}
 
Example 13
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 4 votes vote down vote up
private static Type convertType(String name, TypeInfo typeInfo, Repetition repetition)
{
    if (typeInfo.getCategory() == Category.PRIMITIVE) {
        if (typeInfo.equals(TypeInfoFactory.stringTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).as(OriginalType.UTF8)
                    .named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.intTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.shortTypeInfo) ||
                typeInfo.equals(TypeInfoFactory.byteTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.longTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT64, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.doubleTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.DOUBLE, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.floatTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.FLOAT, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BOOLEAN, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.BINARY, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT96, repetition).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {
            throw new UnsupportedOperationException("Void type not implemented");
        }
        else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.CHAR_TYPE_NAME)) {
            return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
        }
        else if (typeInfo.getTypeName().toLowerCase(Locale.ENGLISH).startsWith(
                serdeConstants.VARCHAR_TYPE_NAME)) {
            return Types.optional(PrimitiveTypeName.BINARY).as(OriginalType.UTF8).named(name);
        }
        else if (typeInfo instanceof DecimalTypeInfo) {
            DecimalTypeInfo decimalTypeInfo = (DecimalTypeInfo) typeInfo;
            int prec = decimalTypeInfo.precision();
            int scale = decimalTypeInfo.scale();
            int bytes = ParquetHiveSerDe.PRECISION_TO_BYTE_COUNT[prec - 1];
            return Types.optional(PrimitiveTypeName.FIXED_LEN_BYTE_ARRAY).length(bytes).as(OriginalType.DECIMAL).scale(scale).precision(prec).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
            return Types.primitive(PrimitiveTypeName.INT32, repetition).as(OriginalType.DATE).named(name);
        }
        else if (typeInfo.equals(TypeInfoFactory.unknownTypeInfo)) {
            throw new UnsupportedOperationException("Unknown type not implemented");
        }
        else {
            throw new IllegalArgumentException("Unknown type: " + typeInfo);
        }
    }
    else if (typeInfo.getCategory() == Category.LIST) {
        return convertArrayType(name, (ListTypeInfo) typeInfo);
    }
    else if (typeInfo.getCategory() == Category.STRUCT) {
        return convertStructType(name, (StructTypeInfo) typeInfo);
    }
    else if (typeInfo.getCategory() == Category.MAP) {
        return convertMapType(name, (MapTypeInfo) typeInfo);
    }
    else if (typeInfo.getCategory() == Category.UNION) {
        throw new UnsupportedOperationException("Union type not implemented");
    }
    else {
        throw new IllegalArgumentException("Unknown type: " + typeInfo);
    }
}
 
Example 14
Source Project: presto   Source File: MapKeyValuesSchemaConverter.java    License: Apache License 2.0 4 votes vote down vote up
private static GroupType convertArrayType(String name, ListTypeInfo typeInfo)
{
    TypeInfo subType = typeInfo.getListElementTypeInfo();
    return listWrapper(name, OriginalType.LIST, new GroupType(Repetition.REPEATED,
            ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType)));
}
 
Example 15
Source Project: presto   Source File: TestParquetDecimalScaling.java    License: Apache License 2.0 4 votes vote down vote up
private static void createParquetFile(
        Path path,
        StandardStructObjectInspector inspector,
        Iterator<?>[] iterators,
        MessageType parquetSchema,
        List<String> columnNames)
{
    Properties tableProperties = createTableProperties(columnNames, Collections.singletonList(inspector));

    JobConf jobConf = new JobConf();
    jobConf.setEnum(COMPRESSION, UNCOMPRESSED);
    jobConf.setBoolean(ENABLE_DICTIONARY, false);
    jobConf.setEnum(WRITER_VERSION, PARQUET_2_0);

    try {
        FileSinkOperator.RecordWriter recordWriter = new TestMapredParquetOutputFormat(Optional.of(parquetSchema), true)
                .getHiveRecordWriter(
                        jobConf,
                        path,
                        Text.class,
                        false,
                        tableProperties,
                        () -> {});

        Object row = inspector.create();
        List<StructField> fields = ImmutableList.copyOf(inspector.getAllStructFieldRefs());

        while (stream(iterators).allMatch(Iterator::hasNext)) {
            for (int i = 0; i < fields.size(); i++) {
                Object value = iterators[i].next();
                inspector.setStructFieldData(row, fields.get(i), value);
            }

            ParquetHiveSerDe serde = new ParquetHiveSerDe();
            serde.initialize(jobConf, tableProperties, null);
            Writable record = serde.serialize(row, inspector);
            recordWriter.write(record);
        }

        recordWriter.close(false);
    }
    catch (IOException | SerDeException e) {
        throw new RuntimeException(e);
    }
}
 
Example 16
Source Project: parquet-mr   Source File: HiveSchemaConverter.java    License: Apache License 2.0 4 votes vote down vote up
private static GroupType convertArrayType(final String name, final ListTypeInfo typeInfo) {
  final TypeInfo subType = typeInfo.getListElementTypeInfo();
  return listWrapper(name, listType(), new GroupType(Repetition.REPEATED,
      ParquetHiveSerDe.ARRAY.toString(), convertType("array_element", subType)));
}
 
Example 17
Source Project: parquet-mr   Source File: TestParquetSerDe.java    License: Apache License 2.0 4 votes vote down vote up
public void testParquetHiveSerDe() throws Throwable {
  try {
    // Create the SerDe
    System.out.println("test: testParquetHiveSerDe");

    final ParquetHiveSerDe serDe = new ParquetHiveSerDe();
    final Configuration conf = new Configuration();
    final Properties tbl = createProperties();
    serDe.initialize(conf, tbl);

    // Data
    final Writable[] arr = new Writable[8];

    arr[0] = new ByteWritable((byte) 123);
    arr[1] = new ShortWritable((short) 456);
    arr[2] = new IntWritable(789);
    arr[3] = new LongWritable(1000l);
    arr[4] = new DoubleWritable((double) 5.3);
    arr[5] = new BinaryWritable(Binary.fromString("hive and hadoop and parquet. Big family."));

    final Writable[] mapContainer = new Writable[1];
    final Writable[] map = new Writable[3];
    for (int i = 0; i < 3; ++i) {
      final Writable[] pair = new Writable[2];
      pair[0] = new BinaryWritable(Binary.fromString("key_" + i));
      pair[1] = new IntWritable(i);
      map[i] = new ArrayWritable(Writable.class, pair);
    }
    mapContainer[0] = new ArrayWritable(Writable.class, map);
    arr[6] = new ArrayWritable(Writable.class, mapContainer);

    final Writable[] arrayContainer = new Writable[1];
    final Writable[] array = new Writable[5];
    for (int i = 0; i < 5; ++i) {
      array[i] = new BinaryWritable(Binary.fromString("elem_" + i));
    }
    arrayContainer[0] = new ArrayWritable(Writable.class, array);
    arr[7] = new ArrayWritable(Writable.class, arrayContainer);

    final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
    // Test
    deserializeAndSerializeLazySimple(serDe, arrWritable);
    System.out.println("test: testParquetHiveSerDe - OK");

  } catch (final Throwable e) {
    e.printStackTrace();
    throw e;
  }
}