Java Code Examples for org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat

The following examples show how to use org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: HiveWriteUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
Example 2
Source Project: circus-train   Source File: IntegrationTestHelper.java    License: Apache License 2.0 6 votes vote down vote up
Table createParquetPartitionedTable(
        URI tableUri,
        String database,
        String table,
        Schema schema,
        String fieldName,
        Object fieldData,
        int version) throws Exception {
  List<FieldSchema> columns = new ArrayList<>();
  AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema);
  for (int i = 0; i < schemaInspector.getColumnNames().size(); i++) {
    columns.add(new FieldSchema(
            schemaInspector.getColumnNames().get(i), schemaInspector.getColumnTypes().get(i).toString(), ""
    ));
  }
  List<FieldSchema> partitionKeys = Arrays.asList(new FieldSchema("hour", "string", ""));
  Table parquetTable = TestUtils
          .createPartitionedTable(metaStoreClient, database, table, tableUri, columns, partitionKeys,
                  "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", MapredParquetInputFormat.class.getName(),
                  MapredParquetOutputFormat.class.getName());
  URI partition = createData(tableUri, schema, Integer.toString(version), version, fieldName, fieldData);
  metaStoreClient.add_partitions(Arrays.asList(newTablePartition(parquetTable,
          Arrays.asList(Integer.toString(version)), partition)));
  return metaStoreClient.getTable(database, table);
}
 
Example 3
Source Project: hudi   Source File: HoodieInputFormatUtils.java    License: Apache License 2.0 5 votes vote down vote up
public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) {
  switch (baseFileFormat) {
    case PARQUET:
      return MapredParquetOutputFormat.class.getName();
    default:
      throw new HoodieIOException("No OutputFormat for base file format " + baseFileFormat);
  }
}
 
Example 4
Source Project: presto   Source File: ParquetFileWriterFactory.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public Optional<FileWriter> createFileWriter(
        Path path,
        List<String> inputColumnNames,
        StorageFormat storageFormat,
        Properties schema,
        JobConf conf,
        ConnectorSession session)
{
    if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) {
        return Optional.empty();
    }

    if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) {
        return Optional.empty();
    }

    ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder()
            .setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session))
            .setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session))
            .build();

    CompressionCodecName compressionCodecName = getCompression(conf);

    List<String> fileColumnNames = getColumnNames(schema);
    List<Type> fileColumnTypes = getColumnTypes(schema).stream()
            .map(hiveType -> hiveType.getType(typeManager))
            .collect(toList());

    int[] fileInputColumnIndexes = fileColumnNames.stream()
            .mapToInt(inputColumnNames::indexOf)
            .toArray();

    try {
        FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf);

        Callable<Void> rollbackAction = () -> {
            fileSystem.delete(path, false);
            return null;
        };

        return Optional.of(new ParquetFileWriter(
                fileSystem.create(path),
                rollbackAction,
                fileColumnNames,
                fileColumnTypes,
                parquetWriterOptions,
                fileInputColumnIndexes,
                compressionCodecName));
    }
    catch (IOException e) {
        throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e);
    }
}