org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: OrcTester.java From presto with Apache License 2.0

6 votes

public static DataSize writeOrcFileColumnHive(File outputFile, RecordWriter recordWriter, Type type, Iterator<?> values)
        throws Exception
{
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", type);
    Object row = objectInspector.create();

    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    Serializer serializer = new OrcSerde();

    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueHive(type, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);

        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);
    }

    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}

Example #2

Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0

6 votes

private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compressionCodec != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}

Example #3

Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0

6 votes

static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}

Example #4

Source File: JdbcOutputFormat.java From HiveJdbcStorageHandler with Apache License 2.0

6 votes

@Override
public RecordWriter getHiveRecordWriter(JobConf jobConf, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress)
        throws IOException {
    if(LOG.isDebugEnabled()) {
        LOG.debug("jobConf: " + jobConf);
        LOG.debug("tableProperties: " + tableProperties);
    }

    DBRecordWriter dbwriter = (DBRecordWriter) super.getRecordWriter(null, jobConf, null, progress);

    /*
    DBInputFormat.setInput(jobConf, DbRecordWritable.class, inputClass, inputQuery, inputCountQuery);
    DBInputFormat.setInput(jobConf, DbRecordWritable.class, tableName, conditions, orderBy, fieldNames);        
    DBOutputFormat.setOutput(jobConf, tableName, fieldNames);
    */

    return new JdbcRecordWriter(dbwriter);
}

Example #5

Source File: HiveWritableHdfsDataWriter.java From incubator-gobblin with Apache License 2.0

6 votes

private RecordWriter getWriter() throws IOException {
  try {
    HiveOutputFormat<?, ?> outputFormat = HiveOutputFormat.class
        .cast(Class.forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_OUTPUT_FORMAT_CLASS))
            .newInstance());

    @SuppressWarnings("unchecked")
    Class<? extends Writable> writableClass = (Class<? extends Writable>) Class
        .forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_WRITABLE_CLASS));

    // Merging Job Properties into JobConf for easy tuning
    JobConf loadedJobConf = new JobConf();
    for (Object key : this.properties.getProperties().keySet()) {
      loadedJobConf.set((String)key, this.properties.getProp((String)key));
    }

    return outputFormat.getHiveRecordWriter(loadedJobConf, this.stagingFile, writableClass, true,
        this.properties.getProperties(), null);
  } catch (Throwable t) {
    throw new IOException(String.format("Failed to create writer"), t);
  }
}

Example #6

Source File: HiveWriteUtils.java From presto with Apache License 2.0

6 votes

public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}

Example #7

Source File: FusionHiveOutputFormat.java From hive-solr with Apache License 2.0

5 votes

@Override
public RecordWriter getHiveRecordWriter(JobConf jobConf, Path finalOutPath,
                                        Class valueClass, boolean isCompressed, Properties tableProperties,
                                        Progressable progressable) throws IOException {

  final Text text = new Text();
  final FusionRecordWriter writer = new FusionRecordWriter(jobConf, "FusionHiveWriter", progressable);
  LOG.info("Got new FusionRecordWriter for Hive");

  return new RecordWriter() {
    @Override
    public void write(Writable w) throws IOException {
      if (w instanceof LWDocumentWritable) {
        writer.write(text, (LWDocumentWritable) w);
      } else {
        throw new IOException(
            "Expected LWDocumentWritable type, but found "
                + w.getClass().getName());
      }
    }

    @Override
    public void close(boolean abort) throws IOException {
      LOG.info("Closing FusionRecordWriter for Hive");
      writer.close(Reporter.NULL);
    }
  };
}

Example #8

Source File: HiveAccumuloTableOutputFormat.java From accumulo-hive-storage-manager with Apache License 2.0

5 votes

@Override
public org.apache.hadoop.mapred.RecordWriter<Text, Mutation>
getRecordWriter(FileSystem fileSystem,
                JobConf jobConf,
                String s,
                Progressable progressable) throws IOException {
    throw new RuntimeException("Hive should not invoke this method");
}

Example #9

Source File: HiveAccumuloTableOutputFormat.java From accumulo-hive-storage-manager with Apache License 2.0

5 votes

@Override
public RecordWriter getHiveRecordWriter(
        JobConf jobConf,
        Path path,
        Class<? extends Writable> aClass,
        boolean b,
        Properties properties,
        final Progressable progressable)
  throws IOException {
    throw new UnsupportedOperationException("INSERT not yet supported to Accumulo");
}

Example #10

Source File: RcFileTester.java From presto with Apache License 2.0

5 votes

private static DataSize writeRcFileColumnOld(File outputFile, Format format, Compression compression, Type type, Iterator<?> values)
        throws Exception
{
    ObjectInspector columnObjectInspector = getJavaObjectInspector(type);
    RecordWriter recordWriter = createRcFileWriterOld(outputFile, compression, columnObjectInspector);

    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();

    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    Serializer serializer = format.createSerializer();

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", objectInspector.getTypeName());
    serializer.initialize(new JobConf(false), tableProperties);

    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(type, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);

        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);
    }

    recordWriter.close(false);
    return DataSize.ofBytes(outputFile.length()).succinct();
}

Example #11

Source File: HiveCassandraOutputFormat.java From Hive-Cassandra with Apache License 2.0

5 votes

@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, Path finalOutPath,
    Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
    Progressable progress) throws IOException {

  final String cassandraKeySpace = jc.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  final String cassandraHost = jc.get(AbstractColumnSerDe.CASSANDRA_HOST);
  final int cassandraPort = Integer.parseInt(jc.get(AbstractColumnSerDe.CASSANDRA_PORT));

  final CassandraProxyClient client;
  try {
    client = new CassandraProxyClient(
      cassandraHost, cassandraPort, true, true);
  } catch (CassandraException e) {
    throw new IOException(e);
  }

  return new RecordWriter() {

    @Override
    public void close(boolean abort) throws IOException {
      if (client != null) {
        client.close();
      }
    }

    @Override
    public void write(Writable w) throws IOException {
      Put put = (Put) w;
      put.write(cassandraKeySpace, client, jc);
    }

  };
}

Example #12

Source File: RcFileTester.java From presto with Apache License 2.0

5 votes

private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf(false);
    Optional<String> codecName = compression.getCodecName();
    codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s));

    return new RCFileOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            codecName.isPresent(),
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> {});
}

Example #13

Source File: HiveWriterFactory.java From flink with Apache License 2.0

5 votes

/**
 * Create a {@link RecordWriter} from path.
 */
public RecordWriter createRecordWriter(Path path) {
	try {
		checkInitialize();
		JobConf conf = new JobConf(confWrapper.conf());

		if (isCompressed) {
			String codecStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
			if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
				//noinspection unchecked
				Class<? extends CompressionCodec> codec =
						(Class<? extends CompressionCodec>) Class.forName(codecStr, true,
								Thread.currentThread().getContextClassLoader());
				FileOutputFormat.setOutputCompressorClass(conf, codec);
			}
			String typeStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
			if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
				SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
				SequenceFileOutputFormat.setOutputCompressionType(conf, style);
			}
		}

		return hiveShim.getHiveRecordWriter(
				conf,
				hiveOutputFormatClz,
				recordSerDe.getSerializedClass(),
				isCompressed,
				tableProperties,
				path);
	} catch (Exception e) {
		throw new FlinkHiveException(e);
	}
}

Example #14

Source File: Base64TextOutputFormat.java From bigdata-tutorial with Apache License 2.0

5 votes

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
    Class<? extends Writable> valueClass, boolean isCompressed,
    Properties tableProperties, Progressable progress) throws IOException {

  Base64RecordWriter writer = new Base64RecordWriter(super
      .getHiveRecordWriter(jc, finalOutPath, BytesWritable.class,
      isCompressed, tableProperties, progress));
  writer.configure(jc);
  return writer;
}

Example #15

Source File: LWHiveOutputFormat.java From hive-solr with Apache License 2.0

5 votes

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
        Class valueClass, boolean isCompressed, Properties tableProperties,
        Progressable progress) throws IOException {

  final Text text = new Text();
  final LucidWorksWriter writer = new LucidWorksWriter(progress);

  writer.open(jc, "HiveWriter");
  LOG.info("Got new LucidWorksWriter for Hive");

  return new RecordWriter() {
    @Override
    public void write(Writable w) throws IOException {
      if (w instanceof LWDocumentWritable) {
        writer.write(text, (LWDocumentWritable) w);
      } else {
        throw new IOException(
                "Expected LWDocumentWritable type, but found "
                        + w.getClass().getName());
      }
    }

    @Override
    public void close(boolean abort) throws IOException {
      LOG.info("Closing LucidWorksWriter for Hive");
      writer.close();
    }
  };
}

Example #16

Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0

5 votes

static DataSize writeOrcColumn(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector, Iterator<?> values)
        throws Exception
{
    RecordWriter recordWriter;
    if (DWRF == format) {
        recordWriter = createDwrfRecordWriter(outputFile, compression, columnObjectInspector);
    }
    else {
        recordWriter = createOrcRecordWriter(outputFile, format, compression, columnObjectInspector);
    }
    return writeOrcFileColumnOld(outputFile, format, recordWriter, columnObjectInspector, values);
}

Example #17

Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0

5 votes

public static DataSize writeOrcFileColumnOld(File outputFile, Format format, RecordWriter recordWriter, ObjectInspector columnObjectInspector, Iterator<?> values)
        throws Exception
{
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();

    List<org.apache.hadoop.hive.serde2.objectinspector.StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

    int i = 0;
    TypeInfo typeInfo = getTypeInfoFromTypeString(columnObjectInspector.getTypeName());
    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(typeInfo, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);

        @SuppressWarnings("deprecation") Serializer serde;
        if (DWRF == format) {
            serde = new org.apache.hadoop.hive.ql.io.orc.OrcSerde();
            if (i == 142_345) {
                setDwrfLowMemoryFlag(recordWriter);
            }
        }
        else {
            serde = new OrcSerde();
        }
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }

    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}

Example #18

Source File: ParquetTester.java From presto with Apache License 2.0

5 votes

private static void writeParquetColumn(
        JobConf jobConf,
        File outputFile,
        CompressionCodecName compressionCodecName,
        Properties tableProperties,
        SettableStructObjectInspector objectInspector,
        Iterator<?>[] valuesByField,
        Optional<MessageType> parquetSchema,
        boolean singleLevelArray)
        throws Exception
{
    RecordWriter recordWriter = new TestMapredParquetOutputFormat(parquetSchema, singleLevelArray)
            .getHiveRecordWriter(
                    jobConf,
                    new Path(outputFile.toURI()),
                    Text.class,
                    compressionCodecName != UNCOMPRESSED,
                    tableProperties,
                    () -> {});
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    while (stream(valuesByField).allMatch(Iterator::hasNext)) {
        for (int field = 0; field < fields.size(); field++) {
            Object value = valuesByField[field].next();
            objectInspector.setStructFieldData(row, fields.get(field), value);
        }
        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, tableProperties, null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
    }
    recordWriter.close(false);
}

Example #19

Source File: OrcTester.java From spliceengine with GNU Affero General Public License v3.0

5 votes

private static void setDwrfLowMemoryFlag(RecordWriter recordWriter)
{
    Object writer = getFieldValue(recordWriter, "writer");
    Object memoryManager = getFieldValue(writer, "memoryManager");
    setFieldValue(memoryManager, "lowMemoryMode", true);
    try {
        writer.getClass().getMethod("enterLowMemoryMode").invoke(writer);
    }
    catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

Example #20

Source File: TestOrcPageSourceMemoryTracking.java From presto with Apache License 2.0

5 votes

private static Constructor<? extends RecordWriter> getOrcWriterConstructor()
{
    try {
        Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader()
                .loadClass(ORC_RECORD_WRITER)
                .asSubclass(RecordWriter.class)
                .getDeclaredConstructor(Path.class, WriterOptions.class);
        constructor.setAccessible(true);
        return constructor;
    }
    catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}

Example #21

Source File: TestOrcPageSourceMemoryTracking.java From presto with Apache License 2.0

5 votes

private static RecordWriter createRecordWriter(Path target, Configuration conf)
{
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(FileSystem.class.getClassLoader())) {
        WriterOptions options = OrcFile.writerOptions(conf)
                .memory(new NullMemoryManager())
                .compress(ZLIB);

        try {
            return WRITER_CONSTRUCTOR.newInstance(target, options);
        }
        catch (ReflectiveOperationException e) {
            throw new RuntimeException(e);
        }
    }
}

Example #22

Source File: OrcTester.java From presto with Apache License 2.0

5 votes

static RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, Type type)
        throws IOException
{
    JobConf jobConf = new JobConf();
    OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11");
    OrcConf.COMPRESS.setString(jobConf, compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", getJavaObjectInspector(type).getTypeName()),
            () -> {});
}

Example #23

Source File: ParquetRecordWriterUtil.java From presto with Apache License 2.0

5 votes

private static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties)
        throws IOException
{
    if (conf.get(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA) == null) {
        List<String> columnNames = Splitter.on(',').splitToList(properties.getProperty(IOConstants.COLUMNS));
        List<TypeInfo> columnTypes = getTypeInfosFromTypeString(properties.getProperty(IOConstants.COLUMNS_TYPES));
        MessageType schema = HiveSchemaConverter.convert(columnNames, columnTypes);
        setParquetSchema(conf, schema);
    }

    ParquetOutputFormat<ParquetHiveRecord> outputFormat = new ParquetOutputFormat<>(new DataWritableWriteSupport());

    return new ParquetRecordWriterWrapper(outputFormat, conf, target.toString(), Reporter.NULL, properties);
}

Example #24

Source File: HiveOutputFormatFactory.java From flink with Apache License 2.0

4 votes

private HiveOutputFormat(RecordWriter recordWriter, Function<Row, Writable> rowConverter) {
	this.recordWriter = recordWriter;
	this.rowConverter = rowConverter;
}

Example #25

Source File: JdbcOutputFormat.java From HiveJdbcStorageHandler with Apache License 2.0

4 votes

@Override
public org.apache.hadoop.mapred.RecordWriter<DbRecordWritable, V> getRecordWriter(FileSystem filesystem, JobConf job, String name, Progressable progress)
        throws IOException {
    throw new RuntimeException("Error: Hive should not invoke this method.");
}

Example #26

Source File: OrcTester.java From presto with Apache License 2.0

4 votes

public static DataSize writeOrcColumnHive(File outputFile, Format format, CompressionKind compression, Type type, Iterator<?> values)
        throws Exception
{
    RecordWriter recordWriter = createOrcRecordWriter(outputFile, format, compression, type);
    return writeOrcFileColumnHive(outputFile, recordWriter, type, values);
}

Example #27

Source File: HiveCassandraOutputFormat.java From Hive-Cassandra with Apache License 2.0

4 votes

@Override
public org.apache.hadoop.mapred.RecordWriter<Text, CassandraPut> getRecordWriter(FileSystem arg0,
    JobConf arg1, String arg2, Progressable arg3) throws IOException {
  throw new RuntimeException("Error: Hive should not invoke this method.");
}

Example #28

Source File: Base64TextOutputFormat.java From bigdata-tutorial with Apache License 2.0

4 votes

public Base64RecordWriter(RecordWriter writer) {
  this.writer = writer;
  bytesWritable = new BytesWritable();
}

Example #29

Source File: HiveDynamoDBOutputFormat.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public org.apache.hadoop.mapred.RecordWriter<Text, DynamoDBItemWritable> getRecordWriter(
    FileSystem fs, JobConf conf, String str, Progressable progressable) throws IOException {
  throw new RuntimeException("This class implements HiveRecordReader. It is not compatible with"
      + " Hadoop Record Reader.");
}

Example #30

Source File: HiveDynamoDBOutputFormat.java From emr-dynamodb-connector with Apache License 2.0

4 votes

@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends
    Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable
    progressable) throws IOException {
  return new HiveDynamoDBRecordWriter(jc, progressable);
}