Java Code Examples for org.apache.hadoop.hive.ql.io.HiveOutputFormat

The following examples show how to use org.apache.hadoop.hive.ql.io.HiveOutputFormat. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: HiveWriteUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
Example 2
Source Project: flink   Source File: HiveWriterFactory.java    License: Apache License 2.0 6 votes vote down vote up
public HiveWriterFactory(
		JobConf jobConf,
		Class hiveOutputFormatClz,
		SerDeInfo serDeInfo,
		TableSchema schema,
		String[] partitionColumns,
		Properties tableProperties,
		HiveShim hiveShim,
		boolean isCompressed) {
	Preconditions.checkArgument(HiveOutputFormat.class.isAssignableFrom(hiveOutputFormatClz),
			"The output format should be an instance of HiveOutputFormat");
	this.confWrapper = new JobConfWrapper(jobConf);
	this.hiveOutputFormatClz = hiveOutputFormatClz;
	this.serDeInfo = serDeInfo;
	this.allColumns = schema.getFieldNames();
	this.allTypes = schema.getFieldDataTypes();
	this.partitionColumns = partitionColumns;
	this.tableProperties = tableProperties;
	this.hiveShim = hiveShim;
	this.isCompressed = isCompressed;
}
 
Example 3
private RecordWriter getWriter() throws IOException {
  try {
    HiveOutputFormat<?, ?> outputFormat = HiveOutputFormat.class
        .cast(Class.forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_OUTPUT_FORMAT_CLASS))
            .newInstance());

    @SuppressWarnings("unchecked")
    Class<? extends Writable> writableClass = (Class<? extends Writable>) Class
        .forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_WRITABLE_CLASS));

    // Merging Job Properties into JobConf for easy tuning
    JobConf loadedJobConf = new JobConf();
    for (Object key : this.properties.getProperties().keySet()) {
      loadedJobConf.set((String)key, this.properties.getProp((String)key));
    }

    return outputFormat.getHiveRecordWriter(loadedJobConf, this.stagingFile, writableClass, true,
        this.properties.getProperties(), null);
  } catch (Throwable t) {
    throw new IOException(String.format("Failed to create writer"), t);
  }
}
 
Example 4
Source Project: flink   Source File: HiveShimV100.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz,
		Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath) {
	try {
		Class utilClass = HiveFileFormatUtils.class;
		HiveOutputFormat outputFormat = (HiveOutputFormat) outputFormatClz.newInstance();
		Method utilMethod = utilClass.getDeclaredMethod("getRecordWriter", JobConf.class, HiveOutputFormat.class,
				Class.class, boolean.class, Properties.class, Path.class, Reporter.class);
		return (FileSinkOperator.RecordWriter) utilMethod.invoke(null,
				jobConf, outputFormat, outValClz, isCompressed, tableProps, outPath, Reporter.NULL);
	} catch (Exception e) {
		throw new CatalogException("Failed to create Hive RecordWriter", e);
	}
}
 
Example 5
Source Project: hive-dwrf   Source File: TestInputOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyFile() throws Exception {
  JobConf job = new JobConf(conf);
  Properties properties = new Properties();
  HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
  FileSinkOperator.RecordWriter writer =
      outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
          properties, Reporter.NULL);
  writer.close(true);
  properties.setProperty("columns", "x,y");
  properties.setProperty("columns.types", "int:int");
  SerDe serde = new OrcSerde();
  serde.initialize(conf, properties);
  InputFormat<?,?> in = new OrcInputFormat();
  FileInputFormat.setInputPaths(conf, testFilePath.toString());
  InputSplit[] splits = in.getSplits(conf, 1);
  assertEquals(1, splits.length);

  // read the whole file
  conf.set("hive.io.file.readcolumn.ids", "0,1");
  org.apache.hadoop.mapred.RecordReader reader =
      in.getRecordReader(splits[0], conf, Reporter.NULL);
  Object key = reader.createKey();
  Object value = reader.createValue();
  assertEquals(0.0, reader.getProgress(), 0.00001);
  assertEquals(0, reader.getPos());
  assertEquals(false, reader.next(key, value));
  reader.close();
  assertEquals(null, serde.getSerDeStats());
}
 
Example 6
Source Project: presto   Source File: AbstractTestHiveFileFormats.java    License: Apache License 2.0 4 votes vote down vote up
public static FileSplit createTestFile(
        String filePath,
        HiveStorageFormat storageFormat,
        HiveCompressionCodec compressionCodec,
        List<TestColumn> testColumns,
        int numRows)
        throws Exception
{
    HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class);
    Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class);

    // filter out partition keys, which are not written to the file
    testColumns = testColumns.stream()
            .filter(column -> !column.isPartitionKey())
            .collect(toImmutableList());

    Properties tableProperties = new Properties();
    tableProperties.setProperty(
            "columns",
            testColumns.stream()
                    .map(TestColumn::getName)
                    .collect(Collectors.joining(",")));
    tableProperties.setProperty(
            "columns.types",
            testColumns.stream()
                    .map(TestColumn::getType)
                    .collect(Collectors.joining(",")));
    serializer.initialize(new Configuration(false), tableProperties);

    JobConf jobConf = new JobConf();
    configureCompression(jobConf, compressionCodec);

    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(
            jobConf,
            new Path(filePath),
            Text.class,
            compressionCodec != HiveCompressionCodec.NONE,
            tableProperties,
            () -> {});

    try {
        serializer.initialize(new Configuration(false), tableProperties);

        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                testColumns.stream()
                        .map(TestColumn::getName)
                        .collect(toImmutableList()),
                testColumns.stream()
                        .map(TestColumn::getObjectInspector)
                        .collect(toImmutableList()));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serializer.serialize(row, objectInspector);
            recordWriter.write(record);
        }
    }
    finally {
        recordWriter.close(false);
    }

    // todo to test with compression, the file must be renamed with the compression extension
    Path path = new Path(filePath);
    path.getFileSystem(new Configuration(false)).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
 
Example 7
Source Project: HiveJdbcStorageHandler   Source File: JdbcStorageHandler.java    License: Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
@Override
public Class<? extends HiveOutputFormat> getOutputFormatClass() {
    // NOTE that must return subclass of HiveOutputFormat
    return JdbcOutputFormat.class;
}