org.apache.hadoop.hive.ql.io.HiveOutputFormat Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.HiveOutputFormat. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveWriteUtils.java    From presto with Apache License 2.0 6 votes vote down vote up
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
Example #2
Source File: HiveWriterFactory.java    From flink with Apache License 2.0 6 votes vote down vote up
public HiveWriterFactory(
		JobConf jobConf,
		Class hiveOutputFormatClz,
		SerDeInfo serDeInfo,
		TableSchema schema,
		String[] partitionColumns,
		Properties tableProperties,
		HiveShim hiveShim,
		boolean isCompressed) {
	Preconditions.checkArgument(HiveOutputFormat.class.isAssignableFrom(hiveOutputFormatClz),
			"The output format should be an instance of HiveOutputFormat");
	this.confWrapper = new JobConfWrapper(jobConf);
	this.hiveOutputFormatClz = hiveOutputFormatClz;
	this.serDeInfo = serDeInfo;
	this.allColumns = schema.getFieldNames();
	this.allTypes = schema.getFieldDataTypes();
	this.partitionColumns = partitionColumns;
	this.tableProperties = tableProperties;
	this.hiveShim = hiveShim;
	this.isCompressed = isCompressed;
}
 
Example #3
Source File: HiveWritableHdfsDataWriter.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
private RecordWriter getWriter() throws IOException {
  try {
    HiveOutputFormat<?, ?> outputFormat = HiveOutputFormat.class
        .cast(Class.forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_OUTPUT_FORMAT_CLASS))
            .newInstance());

    @SuppressWarnings("unchecked")
    Class<? extends Writable> writableClass = (Class<? extends Writable>) Class
        .forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_WRITABLE_CLASS));

    // Merging Job Properties into JobConf for easy tuning
    JobConf loadedJobConf = new JobConf();
    for (Object key : this.properties.getProperties().keySet()) {
      loadedJobConf.set((String)key, this.properties.getProp((String)key));
    }

    return outputFormat.getHiveRecordWriter(loadedJobConf, this.stagingFile, writableClass, true,
        this.properties.getProperties(), null);
  } catch (Throwable t) {
    throw new IOException(String.format("Failed to create writer"), t);
  }
}
 
Example #4
Source File: HiveShimV100.java    From flink with Apache License 2.0 5 votes vote down vote up
@Override
public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz,
		Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath) {
	try {
		Class utilClass = HiveFileFormatUtils.class;
		HiveOutputFormat outputFormat = (HiveOutputFormat) outputFormatClz.newInstance();
		Method utilMethod = utilClass.getDeclaredMethod("getRecordWriter", JobConf.class, HiveOutputFormat.class,
				Class.class, boolean.class, Properties.class, Path.class, Reporter.class);
		return (FileSinkOperator.RecordWriter) utilMethod.invoke(null,
				jobConf, outputFormat, outValClz, isCompressed, tableProps, outPath, Reporter.NULL);
	} catch (Exception e) {
		throw new CatalogException("Failed to create Hive RecordWriter", e);
	}
}
 
Example #5
Source File: TestInputOutputFormat.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyFile() throws Exception {
  JobConf job = new JobConf(conf);
  Properties properties = new Properties();
  HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
  FileSinkOperator.RecordWriter writer =
      outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
          properties, Reporter.NULL);
  writer.close(true);
  properties.setProperty("columns", "x,y");
  properties.setProperty("columns.types", "int:int");
  SerDe serde = new OrcSerde();
  serde.initialize(conf, properties);
  InputFormat<?,?> in = new OrcInputFormat();
  FileInputFormat.setInputPaths(conf, testFilePath.toString());
  InputSplit[] splits = in.getSplits(conf, 1);
  assertEquals(1, splits.length);

  // read the whole file
  conf.set("hive.io.file.readcolumn.ids", "0,1");
  org.apache.hadoop.mapred.RecordReader reader =
      in.getRecordReader(splits[0], conf, Reporter.NULL);
  Object key = reader.createKey();
  Object value = reader.createValue();
  assertEquals(0.0, reader.getProgress(), 0.00001);
  assertEquals(0, reader.getPos());
  assertEquals(false, reader.next(key, value));
  reader.close();
  assertEquals(null, serde.getSerDeStats());
}
 
Example #6
Source File: AbstractTestHiveFileFormats.java    From presto with Apache License 2.0 4 votes vote down vote up
public static FileSplit createTestFile(
        String filePath,
        HiveStorageFormat storageFormat,
        HiveCompressionCodec compressionCodec,
        List<TestColumn> testColumns,
        int numRows)
        throws Exception
{
    HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class);
    Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class);

    // filter out partition keys, which are not written to the file
    testColumns = testColumns.stream()
            .filter(column -> !column.isPartitionKey())
            .collect(toImmutableList());

    Properties tableProperties = new Properties();
    tableProperties.setProperty(
            "columns",
            testColumns.stream()
                    .map(TestColumn::getName)
                    .collect(Collectors.joining(",")));
    tableProperties.setProperty(
            "columns.types",
            testColumns.stream()
                    .map(TestColumn::getType)
                    .collect(Collectors.joining(",")));
    serializer.initialize(new Configuration(false), tableProperties);

    JobConf jobConf = new JobConf();
    configureCompression(jobConf, compressionCodec);

    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(
            jobConf,
            new Path(filePath),
            Text.class,
            compressionCodec != HiveCompressionCodec.NONE,
            tableProperties,
            () -> {});

    try {
        serializer.initialize(new Configuration(false), tableProperties);

        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                testColumns.stream()
                        .map(TestColumn::getName)
                        .collect(toImmutableList()),
                testColumns.stream()
                        .map(TestColumn::getObjectInspector)
                        .collect(toImmutableList()));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serializer.serialize(row, objectInspector);
            recordWriter.write(record);
        }
    }
    finally {
        recordWriter.close(false);
    }

    // todo to test with compression, the file must be renamed with the compression extension
    Path path = new Path(filePath);
    path.getFileSystem(new Configuration(false)).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
 
Example #7
Source File: JdbcStorageHandler.java    From HiveJdbcStorageHandler with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
@Override
public Class<? extends HiveOutputFormat> getOutputFormatClass() {
    // NOTE that must return subclass of HiveOutputFormat
    return JdbcOutputFormat.class;
}