Java Code Examples for org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter

The following examples show how to use org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: presto   Source File: OrcTester.java    License: Apache License 2.0 6 votes vote down vote up
public static DataSize writeOrcFileColumnHive(File outputFile, RecordWriter recordWriter, Type type, Iterator<?> values)
        throws Exception
{
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", type);
    Object row = objectInspector.create();

    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    Serializer serializer = new OrcSerde();

    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueHive(type, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);

        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);
    }

    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
 
Example 2
Source Project: presto   Source File: HiveWriteUtils.java    License: Apache License 2.0 6 votes vote down vote up
public static RecordWriter createRecordWriter(Path target, JobConf conf, Properties properties, String outputFormatName, ConnectorSession session)
{
    try {
        boolean compress = HiveConf.getBoolVar(conf, COMPRESSRESULT);
        if (outputFormatName.equals(MapredParquetOutputFormat.class.getName())) {
            return createParquetWriter(target, conf, properties, session);
        }
        if (outputFormatName.equals(HiveIgnoreKeyTextOutputFormat.class.getName())) {
            return new TextRecordWriter(target, conf, properties, compress);
        }
        if (outputFormatName.equals(HiveSequenceFileOutputFormat.class.getName())) {
            return new SequenceFileRecordWriter(target, conf, Text.class, compress);
        }
        if (outputFormatName.equals(AvroContainerOutputFormat.class.getName())) {
            return new AvroRecordWriter(target, conf, compress, properties);
        }
        Object writer = Class.forName(outputFormatName).getConstructor().newInstance();
        return ((HiveOutputFormat<?, ?>) writer).getHiveRecordWriter(conf, target, Text.class, compress, properties, Reporter.NULL);
    }
    catch (IOException | ReflectiveOperationException e) {
        throw new PrestoException(HIVE_WRITER_DATA_ERROR, e);
    }
}
 
Example 3
private RecordWriter getWriter() throws IOException {
  try {
    HiveOutputFormat<?, ?> outputFormat = HiveOutputFormat.class
        .cast(Class.forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_OUTPUT_FORMAT_CLASS))
            .newInstance());

    @SuppressWarnings("unchecked")
    Class<? extends Writable> writableClass = (Class<? extends Writable>) Class
        .forName(this.properties.getProp(HiveWritableHdfsDataWriterBuilder.WRITER_WRITABLE_CLASS));

    // Merging Job Properties into JobConf for easy tuning
    JobConf loadedJobConf = new JobConf();
    for (Object key : this.properties.getProperties().keySet()) {
      loadedJobConf.set((String)key, this.properties.getProp((String)key));
    }

    return outputFormat.getHiveRecordWriter(loadedJobConf, this.stagingFile, writableClass, true,
        this.properties.getProperties(), null);
  } catch (Throwable t) {
    throw new IOException(String.format("Failed to create writer"), t);
  }
}
 
Example 4
Source Project: HiveJdbcStorageHandler   Source File: JdbcOutputFormat.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public RecordWriter getHiveRecordWriter(JobConf jobConf, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress)
        throws IOException {
    if(LOG.isDebugEnabled()) {
        LOG.debug("jobConf: " + jobConf);
        LOG.debug("tableProperties: " + tableProperties);
    }

    DBRecordWriter dbwriter = (DBRecordWriter) super.getRecordWriter(null, jobConf, null, progress);

    /*
    DBInputFormat.setInput(jobConf, DbRecordWritable.class, inputClass, inputQuery, inputCountQuery);
    DBInputFormat.setInput(jobConf, DbRecordWritable.class, tableName, conditions, orderBy, fieldNames);        
    DBOutputFormat.setOutput(jobConf, tableName, fieldNames);
    */

    return new JdbcRecordWriter(dbwriter);
}
 
Example 5
static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
    jobConf.set("hive.exec.orc.default.compress", compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}
 
Example 6
private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf();
    jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
    jobConf.set("hive.exec.orc.compress", compressionCodec.name());
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
    OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
    OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compressionCodec != NONE,
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> { }
    );
}
 
Example 7
Source Project: presto   Source File: OrcTester.java    License: Apache License 2.0 5 votes vote down vote up
static RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, Type type)
        throws IOException
{
    JobConf jobConf = new JobConf();
    OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11");
    OrcConf.COMPRESS.setString(jobConf, compression.name());

    return new OrcOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            compression != NONE,
            createTableProperties("test", getJavaObjectInspector(type).getTypeName()),
            () -> {});
}
 
Example 8
Source Project: presto   Source File: RcFileTester.java    License: Apache License 2.0 5 votes vote down vote up
private static DataSize writeRcFileColumnOld(File outputFile, Format format, Compression compression, Type type, Iterator<?> values)
        throws Exception
{
    ObjectInspector columnObjectInspector = getJavaObjectInspector(type);
    RecordWriter recordWriter = createRcFileWriterOld(outputFile, compression, columnObjectInspector);

    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();

    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    Serializer serializer = format.createSerializer();

    Properties tableProperties = new Properties();
    tableProperties.setProperty("columns", "test");
    tableProperties.setProperty("columns.types", objectInspector.getTypeName());
    serializer.initialize(new JobConf(false), tableProperties);

    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(type, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);

        Writable record = serializer.serialize(row, objectInspector);
        recordWriter.write(record);
    }

    recordWriter.close(false);
    return DataSize.ofBytes(outputFile.length()).succinct();
}
 
Example 9
Source Project: presto   Source File: RcFileTester.java    License: Apache License 2.0 5 votes vote down vote up
private static RecordWriter createRcFileWriterOld(File outputFile, Compression compression, ObjectInspector columnObjectInspector)
        throws IOException
{
    JobConf jobConf = new JobConf(false);
    Optional<String> codecName = compression.getCodecName();
    codecName.ifPresent(s -> jobConf.set(COMPRESS_CODEC, s));

    return new RCFileOutputFormat().getHiveRecordWriter(
            jobConf,
            new Path(outputFile.toURI()),
            Text.class,
            codecName.isPresent(),
            createTableProperties("test", columnObjectInspector.getTypeName()),
            () -> {});
}
 
Example 10
Source Project: presto   Source File: ParquetRecordWriterUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties)
        throws IOException
{
    if (conf.get(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA) == null) {
        List<String> columnNames = Splitter.on(',').splitToList(properties.getProperty(IOConstants.COLUMNS));
        List<TypeInfo> columnTypes = getTypeInfosFromTypeString(properties.getProperty(IOConstants.COLUMNS_TYPES));
        MessageType schema = HiveSchemaConverter.convert(columnNames, columnTypes);
        setParquetSchema(conf, schema);
    }

    ParquetOutputFormat<ParquetHiveRecord> outputFormat = new ParquetOutputFormat<>(new DataWritableWriteSupport());

    return new ParquetRecordWriterWrapper(outputFormat, conf, target.toString(), Reporter.NULL, properties);
}
 
Example 11
Source Project: presto   Source File: TestOrcPageSourceMemoryTracking.java    License: Apache License 2.0 5 votes vote down vote up
private static RecordWriter createRecordWriter(Path target, Configuration conf)
{
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(FileSystem.class.getClassLoader())) {
        WriterOptions options = OrcFile.writerOptions(conf)
                .memory(new NullMemoryManager())
                .compress(ZLIB);

        try {
            return WRITER_CONSTRUCTOR.newInstance(target, options);
        }
        catch (ReflectiveOperationException e) {
            throw new RuntimeException(e);
        }
    }
}
 
Example 12
Source Project: presto   Source File: TestOrcPageSourceMemoryTracking.java    License: Apache License 2.0 5 votes vote down vote up
private static Constructor<? extends RecordWriter> getOrcWriterConstructor()
{
    try {
        Constructor<? extends RecordWriter> constructor = OrcOutputFormat.class.getClassLoader()
                .loadClass(ORC_RECORD_WRITER)
                .asSubclass(RecordWriter.class)
                .getDeclaredConstructor(Path.class, WriterOptions.class);
        constructor.setAccessible(true);
        return constructor;
    }
    catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
    }
}
 
Example 13
Source Project: presto   Source File: ParquetTester.java    License: Apache License 2.0 5 votes vote down vote up
private static void writeParquetColumn(
        JobConf jobConf,
        File outputFile,
        CompressionCodecName compressionCodecName,
        Properties tableProperties,
        SettableStructObjectInspector objectInspector,
        Iterator<?>[] valuesByField,
        Optional<MessageType> parquetSchema,
        boolean singleLevelArray)
        throws Exception
{
    RecordWriter recordWriter = new TestMapredParquetOutputFormat(parquetSchema, singleLevelArray)
            .getHiveRecordWriter(
                    jobConf,
                    new Path(outputFile.toURI()),
                    Text.class,
                    compressionCodecName != UNCOMPRESSED,
                    tableProperties,
                    () -> {});
    Object row = objectInspector.create();
    List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());
    while (stream(valuesByField).allMatch(Iterator::hasNext)) {
        for (int field = 0; field < fields.size(); field++) {
            Object value = valuesByField[field].next();
            objectInspector.setStructFieldData(row, fields.get(field), value);
        }
        ParquetHiveSerDe serde = new ParquetHiveSerDe();
        serde.initialize(jobConf, tableProperties, null);
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
    }
    recordWriter.close(false);
}
 
Example 14
Source Project: hive-solr   Source File: FusionHiveOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter getHiveRecordWriter(JobConf jobConf, Path finalOutPath,
                                        Class valueClass, boolean isCompressed, Properties tableProperties,
                                        Progressable progressable) throws IOException {

  final Text text = new Text();
  final FusionRecordWriter writer = new FusionRecordWriter(jobConf, "FusionHiveWriter", progressable);
  LOG.info("Got new FusionRecordWriter for Hive");

  return new RecordWriter() {
    @Override
    public void write(Writable w) throws IOException {
      if (w instanceof LWDocumentWritable) {
        writer.write(text, (LWDocumentWritable) w);
      } else {
        throw new IOException(
            "Expected LWDocumentWritable type, but found "
                + w.getClass().getName());
      }
    }

    @Override
    public void close(boolean abort) throws IOException {
      LOG.info("Closing FusionRecordWriter for Hive");
      writer.close(Reporter.NULL);
    }
  };
}
 
Example 15
Source Project: hive-solr   Source File: LWHiveOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
        Class valueClass, boolean isCompressed, Properties tableProperties,
        Progressable progress) throws IOException {

  final Text text = new Text();
  final LucidWorksWriter writer = new LucidWorksWriter(progress);

  writer.open(jc, "HiveWriter");
  LOG.info("Got new LucidWorksWriter for Hive");

  return new RecordWriter() {
    @Override
    public void write(Writable w) throws IOException {
      if (w instanceof LWDocumentWritable) {
        writer.write(text, (LWDocumentWritable) w);
      } else {
        throw new IOException(
                "Expected LWDocumentWritable type, but found "
                        + w.getClass().getName());
      }
    }

    @Override
    public void close(boolean abort) throws IOException {
      LOG.info("Closing LucidWorksWriter for Hive");
      writer.close();
    }
  };
}
 
Example 16
Source Project: bigdata-tutorial   Source File: Base64TextOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath,
    Class<? extends Writable> valueClass, boolean isCompressed,
    Properties tableProperties, Progressable progress) throws IOException {

  Base64RecordWriter writer = new Base64RecordWriter(super
      .getHiveRecordWriter(jc, finalOutPath, BytesWritable.class,
      isCompressed, tableProperties, progress));
  writer.configure(jc);
  return writer;
}
 
Example 17
Source Project: flink   Source File: HiveWriterFactory.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Create a {@link RecordWriter} from path.
 */
public RecordWriter createRecordWriter(Path path) {
	try {
		checkInitialize();
		JobConf conf = new JobConf(confWrapper.conf());

		if (isCompressed) {
			String codecStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
			if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
				//noinspection unchecked
				Class<? extends CompressionCodec> codec =
						(Class<? extends CompressionCodec>) Class.forName(codecStr, true,
								Thread.currentThread().getContextClassLoader());
				FileOutputFormat.setOutputCompressorClass(conf, codec);
			}
			String typeStr = conf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
			if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
				SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
				SequenceFileOutputFormat.setOutputCompressionType(conf, style);
			}
		}

		return hiveShim.getHiveRecordWriter(
				conf,
				hiveOutputFormatClz,
				recordSerDe.getSerializedClass(),
				isCompressed,
				tableProperties,
				path);
	} catch (Exception e) {
		throw new FlinkHiveException(e);
	}
}
 
Example 18
Source Project: Hive-Cassandra   Source File: HiveCassandraOutputFormat.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public RecordWriter getHiveRecordWriter(final JobConf jc, Path finalOutPath,
    Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties,
    Progressable progress) throws IOException {

  final String cassandraKeySpace = jc.get(AbstractColumnSerDe.CASSANDRA_KEYSPACE_NAME);
  final String cassandraHost = jc.get(AbstractColumnSerDe.CASSANDRA_HOST);
  final int cassandraPort = Integer.parseInt(jc.get(AbstractColumnSerDe.CASSANDRA_PORT));

  final CassandraProxyClient client;
  try {
    client = new CassandraProxyClient(
      cassandraHost, cassandraPort, true, true);
  } catch (CassandraException e) {
    throw new IOException(e);
  }

  return new RecordWriter() {

    @Override
    public void close(boolean abort) throws IOException {
      if (client != null) {
        client.close();
      }
    }

    @Override
    public void write(Writable w) throws IOException {
      Put put = (Put) w;
      put.write(cassandraKeySpace, client, jc);
    }

  };
}
 
Example 19
@Override
public RecordWriter getHiveRecordWriter(
        JobConf jobConf,
        Path path,
        Class<? extends Writable> aClass,
        boolean b,
        Properties properties,
        final Progressable progressable)
  throws IOException {
    throw new UnsupportedOperationException("INSERT not yet supported to Accumulo");
}
 
Example 20
@Override
public org.apache.hadoop.mapred.RecordWriter<Text, Mutation>
getRecordWriter(FileSystem fileSystem,
                JobConf jobConf,
                String s,
                Progressable progressable) throws IOException {
    throw new RuntimeException("Hive should not invoke this method");
}
 
Example 21
static DataSize writeOrcColumn(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector, Iterator<?> values)
        throws Exception
{
    RecordWriter recordWriter;
    if (DWRF == format) {
        recordWriter = createDwrfRecordWriter(outputFile, compression, columnObjectInspector);
    }
    else {
        recordWriter = createOrcRecordWriter(outputFile, format, compression, columnObjectInspector);
    }
    return writeOrcFileColumnOld(outputFile, format, recordWriter, columnObjectInspector, values);
}
 
Example 22
public static DataSize writeOrcFileColumnOld(File outputFile, Format format, RecordWriter recordWriter, ObjectInspector columnObjectInspector, Iterator<?> values)
        throws Exception
{
    SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", columnObjectInspector);
    Object row = objectInspector.create();

    List<org.apache.hadoop.hive.serde2.objectinspector.StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

    int i = 0;
    TypeInfo typeInfo = getTypeInfoFromTypeString(columnObjectInspector.getTypeName());
    while (values.hasNext()) {
        Object value = values.next();
        value = preprocessWriteValueOld(typeInfo, value);
        objectInspector.setStructFieldData(row, fields.get(0), value);

        @SuppressWarnings("deprecation") Serializer serde;
        if (DWRF == format) {
            serde = new org.apache.hadoop.hive.ql.io.orc.OrcSerde();
            if (i == 142_345) {
                setDwrfLowMemoryFlag(recordWriter);
            }
        }
        else {
            serde = new OrcSerde();
        }
        Writable record = serde.serialize(row, objectInspector);
        recordWriter.write(record);
        i++;
    }

    recordWriter.close(false);
    return succinctBytes(outputFile.length());
}
 
Example 23
private static void setDwrfLowMemoryFlag(RecordWriter recordWriter)
{
    Object writer = getFieldValue(recordWriter, "writer");
    Object memoryManager = getFieldValue(writer, "memoryManager");
    setFieldValue(memoryManager, "lowMemoryMode", true);
    try {
        writer.getClass().getMethod("enterLowMemoryMode").invoke(writer);
    }
    catch (Exception e) {
        throw Throwables.propagate(e);
    }
}
 
Example 24
Source Project: presto   Source File: OrcTester.java    License: Apache License 2.0 4 votes vote down vote up
public static DataSize writeOrcColumnHive(File outputFile, Format format, CompressionKind compression, Type type, Iterator<?> values)
        throws Exception
{
    RecordWriter recordWriter = createOrcRecordWriter(outputFile, format, compression, type);
    return writeOrcFileColumnHive(outputFile, recordWriter, type, values);
}
 
Example 25
Source Project: presto   Source File: ParquetRecordWriterUtil.java    License: Apache License 2.0 4 votes vote down vote up
public static RecordWriter createParquetWriter(Path target, JobConf conf, Properties properties, ConnectorSession session)
        throws IOException, ReflectiveOperationException
{
    conf.setLong(ParquetOutputFormat.BLOCK_SIZE, getParquetWriterBlockSize(session).toBytes());
    conf.setLong(ParquetOutputFormat.PAGE_SIZE, getParquetWriterPageSize(session).toBytes());

    RecordWriter recordWriter = createParquetWriter(target, conf, properties);

    Object realWriter = REAL_WRITER_FIELD.get(recordWriter);
    Object internalWriter = INTERNAL_WRITER_FIELD.get(realWriter);
    ParquetFileWriter fileWriter = (ParquetFileWriter) FILE_WRITER_FIELD.get(internalWriter);

    return new ExtendedRecordWriter()
    {
        private long length;

        @Override
        public long getWrittenBytes()
        {
            return length;
        }

        @Override
        public void write(Writable value)
                throws IOException
        {
            recordWriter.write(value);
            length = fileWriter.getPos();
        }

        @Override
        public void close(boolean abort)
                throws IOException
        {
            recordWriter.close(abort);
            if (!abort) {
                length = fileWriter.getPos();
            }
        }
    };
}
 
Example 26
Source Project: presto   Source File: TestOrcPageSourceMemoryTracking.java    License: Apache License 2.0 4 votes vote down vote up
public static FileSplit createTestFile(
        String filePath,
        Serializer serializer,
        String compressionCodec,
        List<TestColumn> testColumns,
        int numRows,
        int stripeRows)
        throws Exception
{
    // filter out partition keys, which are not written to the file
    testColumns = testColumns.stream()
            .filter(column -> !column.isPartitionKey())
            .collect(toImmutableList());

    Properties tableProperties = new Properties();
    tableProperties.setProperty(
            "columns",
            testColumns.stream()
                    .map(TestColumn::getName)
                    .collect(Collectors.joining(",")));

    tableProperties.setProperty(
            "columns.types",
            testColumns.stream()
                    .map(TestColumn::getType)
                    .collect(Collectors.joining(",")));

    serializer.initialize(CONFIGURATION, tableProperties);

    JobConf jobConf = new JobConf();
    if (compressionCodec != null) {
        CompressionCodec codec = new CompressionCodecFactory(CONFIGURATION).getCodecByName(compressionCodec);
        jobConf.set(COMPRESS_CODEC, codec.getClass().getName());
        jobConf.set(COMPRESS_TYPE, SequenceFile.CompressionType.BLOCK.toString());
    }

    RecordWriter recordWriter = createRecordWriter(new Path(filePath), CONFIGURATION);

    try {
        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                testColumns.stream()
                        .map(TestColumn::getName)
                        .collect(toImmutableList()),
                testColumns.stream()
                        .map(TestColumn::getObjectInspector)
                        .collect(toImmutableList()));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serializer.serialize(row, objectInspector);
            recordWriter.write(record);
            if (rowNumber % stripeRows == stripeRows - 1) {
                flushStripe(recordWriter);
            }
        }
    }
    finally {
        recordWriter.close(false);
    }

    Path path = new Path(filePath);
    path.getFileSystem(CONFIGURATION).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
 
Example 27
Source Project: presto   Source File: AbstractTestHiveFileFormats.java    License: Apache License 2.0 4 votes vote down vote up
public static FileSplit createTestFile(
        String filePath,
        HiveStorageFormat storageFormat,
        HiveCompressionCodec compressionCodec,
        List<TestColumn> testColumns,
        int numRows)
        throws Exception
{
    HiveOutputFormat<?, ?> outputFormat = newInstance(storageFormat.getOutputFormat(), HiveOutputFormat.class);
    Serializer serializer = newInstance(storageFormat.getSerDe(), Serializer.class);

    // filter out partition keys, which are not written to the file
    testColumns = testColumns.stream()
            .filter(column -> !column.isPartitionKey())
            .collect(toImmutableList());

    Properties tableProperties = new Properties();
    tableProperties.setProperty(
            "columns",
            testColumns.stream()
                    .map(TestColumn::getName)
                    .collect(Collectors.joining(",")));
    tableProperties.setProperty(
            "columns.types",
            testColumns.stream()
                    .map(TestColumn::getType)
                    .collect(Collectors.joining(",")));
    serializer.initialize(new Configuration(false), tableProperties);

    JobConf jobConf = new JobConf();
    configureCompression(jobConf, compressionCodec);

    RecordWriter recordWriter = outputFormat.getHiveRecordWriter(
            jobConf,
            new Path(filePath),
            Text.class,
            compressionCodec != HiveCompressionCodec.NONE,
            tableProperties,
            () -> {});

    try {
        serializer.initialize(new Configuration(false), tableProperties);

        SettableStructObjectInspector objectInspector = getStandardStructObjectInspector(
                testColumns.stream()
                        .map(TestColumn::getName)
                        .collect(toImmutableList()),
                testColumns.stream()
                        .map(TestColumn::getObjectInspector)
                        .collect(toImmutableList()));

        Object row = objectInspector.create();

        List<StructField> fields = ImmutableList.copyOf(objectInspector.getAllStructFieldRefs());

        for (int rowNumber = 0; rowNumber < numRows; rowNumber++) {
            for (int i = 0; i < testColumns.size(); i++) {
                Object writeValue = testColumns.get(i).getWriteValue();
                if (writeValue instanceof Slice) {
                    writeValue = ((Slice) writeValue).getBytes();
                }
                objectInspector.setStructFieldData(row, fields.get(i), writeValue);
            }

            Writable record = serializer.serialize(row, objectInspector);
            recordWriter.write(record);
        }
    }
    finally {
        recordWriter.close(false);
    }

    // todo to test with compression, the file must be renamed with the compression extension
    Path path = new Path(filePath);
    path.getFileSystem(new Configuration(false)).setVerifyChecksum(true);
    File file = new File(filePath);
    return new FileSplit(path, 0, file.length(), new String[0]);
}
 
Example 28
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class<? extends
    Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable
    progressable) throws IOException {
  return new HiveDynamoDBRecordWriter(jc, progressable);
}
 
Example 29
@Override
public org.apache.hadoop.mapred.RecordWriter<Text, DynamoDBItemWritable> getRecordWriter(
    FileSystem fs, JobConf conf, String str, Progressable progressable) throws IOException {
  throw new RuntimeException("This class implements HiveRecordReader. It is not compatible with"
      + " Hadoop Record Reader.");
}
 
Example 30
Source Project: bigdata-tutorial   Source File: Base64TextOutputFormat.java    License: Apache License 2.0 4 votes vote down vote up
public Base64RecordWriter(RecordWriter writer) {
  this.writer = writer;
  bytesWritable = new BytesWritable();
}