org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveWriteUtils.java    From presto with Apache License 2.0 6 votes vote down vote up
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
Example #2
Source File: IntegrationTestHelper.java    From circus-train with Apache License 2.0 6 votes vote down vote up
Table createParquetPartitionedTable(
        URI tableUri,
        String database,
        String table,
        Schema schema,
        String fieldName,
        Object fieldData,
        int version) throws Exception {
  List<FieldSchema> columns = new ArrayList<>();
  AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema);
  for (int i = 0; i < schemaInspector.getColumnNames().size(); i++) {
    columns.add(new FieldSchema(
            schemaInspector.getColumnNames().get(i), schemaInspector.getColumnTypes().get(i).toString(), ""
    ));
  }
  List<FieldSchema> partitionKeys = Arrays.asList(new FieldSchema("hour", "string", ""));
  Table parquetTable = TestUtils
          .createPartitionedTable(metaStoreClient, database, table, tableUri, columns, partitionKeys,
                  "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", MapredParquetInputFormat.class.getName(),
                  MapredParquetOutputFormat.class.getName());
  URI partition = createData(tableUri, schema, Integer.toString(version), version, fieldName, fieldData);
  metaStoreClient.add_partitions(Arrays.asList(newTablePartition(parquetTable,
          Arrays.asList(Integer.toString(version)), partition)));
  return metaStoreClient.getTable(database, table);
}
 
Example #3
Source File: HoodieInputFormatUtils.java    From hudi with Apache License 2.0 5 votes vote down vote up
public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) {
  switch (baseFileFormat) {
    case PARQUET:
      return MapredParquetOutputFormat.class.getName();
    default:
      throw new HoodieIOException("No OutputFormat for base file format " + baseFileFormat);
  }
}
 
Example #4
Source File: ParquetFileWriterFactory.java    From presto with Apache License 2.0 4 votes vote down vote up
@Override
public Optional<FileWriter> createFileWriter(
        Path path,
        List<String> inputColumnNames,
        StorageFormat storageFormat,
        Properties schema,
        JobConf conf,
        ConnectorSession session)
{
    if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }

    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder()
            .setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session))
            .setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session))
            .build();

    CompressionCodecName compressionCodecName = getCompression(conf);

    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream()
            .map(hiveType -> hiveType.getType(typeManager))
            .collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream()
            .mapToInt(inputColumnNames::indexOf)
            .toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new ParquetFileWriter(
                fileSystem.create(path),
                rollbackAction,
                fileColumnNames,
                fileColumnTypes,
                parquetWriterOptions,
                fileInputColumnIndexes,
                compressionCodecName));
    }
    catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}