org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveWriteUtils.java From presto with Apache License 2.0 | 6 votes |
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session) { try { boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT); if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) { return createParquetWriter(target, conf, properties, session); } if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) { return new TextRecordWriter(target, conf, properties, compress); } if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) { return new SequenceFileRecordWriter(target, conf, Text.class, compress); } if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) { return new AvroRecordWriter(target, conf, compress, properties); } Object writer = Class.forName(outputFormatName).getConstructor().newInstance(); return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL); } catch (IOException | ReflectiveOperationException e) { throw new PrestoException(HIVE_WRITER_DATA_ERROR, e); } }
Example #2
Source File: IntegrationTestHelper.java From circus-train with Apache License 2.0 | 6 votes |
Table createParquetPartitionedTable( URI tableUri, String database, String table, Schema schema, String fieldName, Object fieldData, int version) throws Exception { List<FieldSchema> columns = new ArrayList<>(); AvroObjectInspectorGenerator schemaInspector = new AvroObjectInspectorGenerator(schema); for (int i = 0; i < schemaInspector.getColumnNames().size(); i++) { columns.add(new FieldSchema( schemaInspector.getColumnNames().get(i), schemaInspector.getColumnTypes().get(i).toString(), "" )); } List<FieldSchema> partitionKeys = Arrays.asList(new FieldSchema("hour", "string", "")); Table parquetTable = TestUtils .createPartitionedTable(metaStoreClient, database, table, tableUri, columns, partitionKeys, "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe", MapredParquetInputFormat.class.getName(), MapredParquetOutputFormat.class.getName()); URI partition = createData(tableUri, schema, Integer.toString(version), version, fieldName, fieldData); metaStoreClient.add_partitions(Arrays.asList(newTablePartition(parquetTable, Arrays.asList(Integer.toString(version)), partition))); return metaStoreClient.getTable(database, table); }
Example #3
Source File: HoodieInputFormatUtils.java From hudi with Apache License 2.0 | 5 votes |
public static String getOutputFormatClassName(HoodieFileFormat baseFileFormat) { switch (baseFileFormat) { case PARQUET: return MapredParquetOutputFormat.class.getName(); default: throw new HoodieIOException("No OutputFormat for base file format " + baseFileFormat); } }
Example #4
Source File: ParquetFileWriterFactory.java From presto with Apache License 2.0 | 4 votes |
@Override public Optional<FileWriter> createFileWriter( Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf conf, ConnectorSession session) { if (!HiveSessionProperties.isParquetOptimizedWriterEnabled(session)) { return Optional.empty(); } if (!MapredParquetOutputFormat.class.getName().equals(storageFormat.getOutputFormat())) { return Optional.empty(); } ParquetWriterOptions parquetWriterOptions = ParquetWriterOptions.builder() .setMaxPageSize(HiveSessionProperties.getParquetWriterPageSize(session)) .setMaxBlockSize(HiveSessionProperties.getParquetWriterBlockSize(session)) .build(); CompressionCodecName compressionCodecName = getCompression(conf); List<String> fileColumnNames = getColumnNames(schema); List<Type> fileColumnTypes = getColumnTypes(schema).stream() .map(hiveType -> hiveType.getType(typeManager)) .collect(toList()); int[] fileInputColumnIndexes = fileColumnNames.stream() .mapToInt(inputColumnNames::indexOf) .toArray(); try { FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, conf); Callable<Void> rollbackAction = () -> { fileSystem.delete(path, false); return null; }; return Optional.of(new ParquetFileWriter( fileSystem.create(path), rollbackAction, fileColumnNames, fileColumnTypes, parquetWriterOptions, fileInputColumnIndexes, compressionCodecName)); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_OPEN_ERROR, "Error creating Parquet file", e); } }