org.apache.hadoop.hive.ql.exec.FileSinkOperator Java Examples
The following examples show how to use
org.apache.hadoop.hive.ql.exec.FileSinkOperator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestOrcReaderMemoryUsage.java From presto with Apache License 2.0 | 6 votes |
/** * Write a file that contains a number of rows with 1 VARCHAR column, and all values are not null. */ private static TempFile createSingleColumnVarcharFile(int count, int length) throws Exception { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, VARCHAR); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, Strings.repeat("0", length)); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
Example #2
Source File: TestOrcReaderPositions.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
private static void createSequentialFile(File file, int count) throws IOException, ReflectiveOperationException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector); @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
Example #3
Source File: TestOrcReaderPositions.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
private static void createMultiStripeFile(File file) throws IOException, ReflectiveOperationException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, OrcTester.Compression.NONE, javaLongObjectInspector); @SuppressWarnings("deprecation") Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", javaLongObjectInspector); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < 300; i += 3) { if ((i > 0) && (i % 60 == 0)) { flushWriter(writer); } objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
Example #4
Source File: TestCachingOrcDataSource.java From spliceengine with GNU Affero General Public License v3.0 | 6 votes |
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11"); jobConf.set("hive.exec.orc.default.compress", compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty("columns", "test"); tableProperties.setProperty("columns.types", columnObjectInspector.getTypeName()); tableProperties.setProperty("orc.stripe.size", "1200000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> { }); }
Example #5
Source File: TestMapredParquetOutputFormat.java From presto with Apache License 2.0 | 6 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter( JobConf jobConf, Path finalOutPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { if (schema.isPresent()) { DataWritableWriteSupport.setSchema(schema.get(), jobConf); return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress, tableProperties); } return super.getHiveRecordWriter(jobConf, finalOutPath, valueClass, isCompressed, tableProperties, progress); }
Example #6
Source File: TestOrcReaderPositions.java From presto with Apache License 2.0 | 6 votes |
private static void createSequentialFile(File file, int count) throws IOException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT); Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < count; i++) { objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
Example #7
Source File: TestOrcReaderMemoryUsage.java From presto with Apache License 2.0 | 6 votes |
/** * Write a file that contains a number of rows with 1 BIGINT column, and some rows have null values. */ private static TempFile createSingleColumnFileWithNullValues(int rows) throws IOException, SerDeException { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, BIGINT); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < rows; i++) { if (i % 10 == 0) { objectInspector.setStructFieldData(row, field, null); } else { objectInspector.setStructFieldData(row, field, (long) i); } Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
Example #8
Source File: TestOrcReaderPositions.java From presto with Apache License 2.0 | 6 votes |
private static void createMultiStripeFile(File file) throws IOException, ReflectiveOperationException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, BIGINT); Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", BIGINT); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 0; i < 300; i += 3) { if ((i > 0) && (i % 60 == 0)) { flushWriter(writer); } objectInspector.setStructFieldData(row, field, (long) i); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
Example #9
Source File: TestCachingOrcDataSource.java From presto with Apache License 2.0 | 6 votes |
private static FileSinkOperator.RecordWriter createOrcRecordWriter(File outputFile, Format format, CompressionKind compression, ObjectInspector columnObjectInspector) throws IOException { JobConf jobConf = new JobConf(); OrcConf.WRITE_FORMAT.setString(jobConf, format == ORC_12 ? "0.12" : "0.11"); OrcConf.COMPRESS.setString(jobConf, compression.name()); Properties tableProperties = new Properties(); tableProperties.setProperty(IOConstants.COLUMNS, "test"); tableProperties.setProperty(IOConstants.COLUMNS_TYPES, columnObjectInspector.getTypeName()); tableProperties.setProperty(OrcConf.STRIPE_SIZE.getAttribute(), "120000"); return new OrcOutputFormat().getHiveRecordWriter( jobConf, new Path(outputFile.toURI()), Text.class, compression != NONE, tableProperties, () -> {}); }
Example #10
Source File: HiveShimV110.java From flink with Apache License 2.0 | 5 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz, Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath) { try { Class utilClass = HiveFileFormatUtils.class; OutputFormat outputFormat = (OutputFormat) outputFormatClz.newInstance(); Method utilMethod = utilClass.getDeclaredMethod("getRecordWriter", JobConf.class, OutputFormat.class, Class.class, boolean.class, Properties.class, Path.class, Reporter.class); return (FileSinkOperator.RecordWriter) utilMethod.invoke(null, jobConf, outputFormat, outValClz, isCompressed, tableProps, outPath, Reporter.NULL); } catch (Exception e) { throw new CatalogException("Failed to create Hive RecordWriter", e); } }
Example #11
Source File: TestOrcReaderPositions.java From spliceengine with GNU Affero General Public License v3.0 | 5 votes |
private static void flushWriter(FileSinkOperator.RecordWriter writer) throws IOException, ReflectiveOperationException { Field field = OrcOutputFormat.class.getClassLoader() .loadClass(OrcOutputFormat.class.getName() + "$OrcRecordWriter") .getDeclaredField("writer"); field.setAccessible(true); ((Writer) field.get(writer)).writeIntermediateFooter(); }
Example #12
Source File: TestOrcReaderMemoryUsage.java From presto with Apache License 2.0 | 5 votes |
/** * Write a file that contains a given number of maps where each row has 10 entries in total * and some entries have null keys/values. */ private static TempFile createSingleColumnMapFileWithNullValues(Type mapType, int rows) throws IOException, SerDeException { Serializer serde = new OrcSerde(); TempFile tempFile = new TempFile(); FileSinkOperator.RecordWriter writer = createOrcRecordWriter(tempFile.getFile(), ORC_12, CompressionKind.NONE, mapType); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", mapType); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); for (int i = 1; i <= rows; i++) { HashMap<Long, Long> map = new HashMap<>(); for (int j = 1; j <= 8; j++) { Long value = (long) j; map.put(value, value); } // Add null values so that the StreamReader nullVectors are not empty. map.put(null, 0L); map.put(0L, null); objectInspector.setStructFieldData(row, field, map); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); return tempFile; }
Example #13
Source File: TestInputOutputFormat.java From hive-dwrf with Apache License 2.0 | 5 votes |
@Test public void testEmptyFile() throws Exception { JobConf job = new JobConf(conf); Properties properties = new Properties(); HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat(); FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL); writer.close(true); properties.setProperty("columns", "x,y"); properties.setProperty("columns.types", "int:int"); SerDe serde = new OrcSerde(); serde.initialize(conf, properties); InputFormat<?,?> in = new OrcInputFormat(); FileInputFormat.setInputPaths(conf, testFilePath.toString()); InputSplit[] splits = in.getSplits(conf, 1); assertEquals(1, splits.length); // read the whole file conf.set("hive.io.file.readcolumn.ids", "0,1"); org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL); Object key = reader.createKey(); Object value = reader.createValue(); assertEquals(0.0, reader.getProgress(), 0.00001); assertEquals(0, reader.getPos()); assertEquals(false, reader.next(key, value)); reader.close(); assertEquals(null, serde.getSerDeStats()); }
Example #14
Source File: MapredParquetOutputFormat.java From parquet-mr with Apache License 2.0 | 5 votes |
/** * * Create the parquet schema from the hive schema, and return the RecordWriterWrapper which * contains the real output format */ @Override public FileSinkOperator.RecordWriter getHiveRecordWriter( final JobConf jobConf, final Path finalOutPath, final Class<? extends Writable> valueClass, final boolean isCompressed, final Properties tableProperties, final Progressable progress) throws IOException { LOG.info("creating new record writer...{}", this); final String columnNameProperty = tableProperties.getProperty(IOConstants.COLUMNS); final String columnTypeProperty = tableProperties.getProperty(IOConstants.COLUMNS_TYPES); List<String> columnNames; List<TypeInfo> columnTypes; if (columnNameProperty.length() == 0) { columnNames = new ArrayList<String>(); } else { columnNames = Arrays.asList(columnNameProperty.split(",")); } if (columnTypeProperty.length() == 0) { columnTypes = new ArrayList<TypeInfo>(); } else { columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty); } DataWritableWriteSupport.setSchema(HiveSchemaConverter.convert(columnNames, columnTypes), jobConf); return getParquerRecordWriterWrapper(realOutputFormat, jobConf, finalOutPath.toString(), progress); }
Example #15
Source File: HiveOutputFormatFactoryTest.java From flink with Apache License 2.0 | 5 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jc, Path finalOutPath, Class valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { URI uri = finalOutPath.toUri(); assertEquals(TEST_URI_SCHEME, uri.getScheme()); assertEquals(TEST_URI_AUTHORITY, uri.getAuthority()); return null; }
Example #16
Source File: HiveBulkWriterFactory.java From flink with Apache License 2.0 | 5 votes |
@Override public HadoopPathBasedBulkWriter<RowData> create(Path targetPath, Path inProgressPath) throws IOException { FileSinkOperator.RecordWriter recordWriter = factory.createRecordWriter(inProgressPath); Function<RowData, Writable> rowConverter = factory.createRowDataConverter(); FileSystem fs = FileSystem.get(inProgressPath.toUri(), factory.getJobConf()); return new HadoopPathBasedBulkWriter<RowData>() { @Override public long getSize() throws IOException { return fs.getFileStatus(inProgressPath).getLen(); } @Override public void dispose() { // close silently. try { recordWriter.close(true); } catch (IOException ignored) { } } @Override public void addElement(RowData element) throws IOException { recordWriter.write(rowConverter.apply(element)); } @Override public void flush() { } @Override public void finish() throws IOException { recordWriter.close(false); } }; }
Example #17
Source File: TestOrcReaderPositions.java From presto with Apache License 2.0 | 5 votes |
private static void flushWriter(FileSinkOperator.RecordWriter writer) throws IOException, ReflectiveOperationException { Field field = OrcOutputFormat.class.getClassLoader() .loadClass(OrcOutputFormat.class.getName() + "$OrcRecordWriter") .getDeclaredField("writer"); field.setAccessible(true); ((Writer) field.get(writer)).writeIntermediateFooter(); }
Example #18
Source File: HiveShimV100.java From flink with Apache License 2.0 | 5 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz, Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath) { try { Class utilClass = HiveFileFormatUtils.class; HiveOutputFormat outputFormat = (HiveOutputFormat) outputFormatClz.newInstance(); Method utilMethod = utilClass.getDeclaredMethod("getRecordWriter", JobConf.class, HiveOutputFormat.class, Class.class, boolean.class, Properties.class, Path.class, Reporter.class); return (FileSinkOperator.RecordWriter) utilMethod.invoke(null, jobConf, outputFormat, outValClz, isCompressed, tableProps, outPath, Reporter.NULL); } catch (Exception e) { throw new CatalogException("Failed to create Hive RecordWriter", e); } }
Example #19
Source File: HiveTableOutputFormat.java From flink with Apache License 2.0 | 5 votes |
HivePartitionWriter(JobConf jobConf, OutputFormat outputFormat, FileSinkOperator.RecordWriter recordWriter, OutputCommitter outputCommitter) { this.jobConf = jobConf; this.outputFormat = outputFormat; this.recordWriter = recordWriter; this.outputCommitter = outputCommitter; }
Example #20
Source File: HiveMetadata.java From presto with Apache License 2.0 | 5 votes |
private static void writeEmptyFile(ConnectorSession session, Path target, JobConf conf, Properties properties, String serDe, String outputFormatName) { // Some serializers such as Avro set a property in the schema. initializeSerializer(conf, properties, serDe); // The code below is not a try with resources because RecordWriter is not Closeable. FileSinkOperator.RecordWriter recordWriter = HiveWriteUtils.createRecordWriter(target, conf, properties, outputFormatName, session); try { recordWriter.close(false); } catch (IOException e) { throw new PrestoException(HIVE_WRITER_CLOSE_ERROR, "Error write empty file to Hive", e); } }
Example #21
Source File: TestOrcReaderPositions.java From presto with Apache License 2.0 | 5 votes |
private static void createGrowingSequentialFile(File file, int count, int step, int initialLength) throws IOException, SerDeException { FileSinkOperator.RecordWriter writer = createOrcRecordWriter(file, ORC_12, CompressionKind.NONE, VARCHAR); Serializer serde = new OrcSerde(); SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test", VARCHAR); Object row = objectInspector.create(); StructField field = objectInspector.getAllStructFieldRefs().get(0); StringBuilder builder = new StringBuilder(); for (int i = 0; i < initialLength; i++) { builder.append("0"); } String seedString = builder.toString(); // gradually grow the length of a cell int previousLength = initialLength; for (int i = 0; i < count; i++) { if ((i / step + 1) * initialLength > previousLength) { previousLength = (i / step + 1) * initialLength; builder.append(seedString); } objectInspector.setStructFieldData(row, field, builder.toString()); Writable record = serde.serialize(row, objectInspector); writer.write(record); } writer.close(false); }
Example #22
Source File: SolrHiveOutputFormat.java From hive-solr with MIT License | 4 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class aClass, boolean b, Properties properties, Progressable progressable) throws IOException { //hive调用这个writer return new SolrHiveWriter(jobConf); }
Example #23
Source File: OrcOutputFormat.java From hive-dwrf with Apache License 2.0 | 4 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf conf, Path path, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable reporter) throws IOException { ReaderWriterProfiler.setProfilerOptions(conf); String stripeSizeStr = tableProperties.getProperty(OrcFile.STRIPE_SIZE); long stripeSize; if (stripeSizeStr != null) { stripeSize = Long.valueOf(stripeSizeStr); } else { stripeSize = OrcConf.getLongVar(conf, OrcConf.ConfVars.HIVE_ORC_STRIPE_SIZE); } String compression = tableProperties.getProperty(OrcFile.COMPRESSION); if (compression == null) { compression = OrcConf.getVar(conf, OrcConf.ConfVars.HIVE_ORC_COMPRESSION); } String compressionSizeStr = tableProperties.getProperty(OrcFile.COMPRESSION_BLOCK_SIZE); int compressionSize; if (compressionSizeStr != null) { compressionSize = Integer.valueOf(compressionSizeStr); } else { compressionSize = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_COMPRESSION_BLOCK_SIZE); } String rowIndexStrideStr = tableProperties.getProperty(OrcFile.ROW_INDEX_STRIDE); int rowIndexStride; if (rowIndexStrideStr != null) { rowIndexStride = Integer.valueOf(rowIndexStrideStr); } else { rowIndexStride = OrcConf.getIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ROW_INDEX_STRIDE); } String enableIndexesStr = tableProperties.getProperty(OrcFile.ENABLE_INDEXES); boolean enableIndexes; if (enableIndexesStr != null) { enableIndexes = Boolean.valueOf(enableIndexesStr); } else { enableIndexes = OrcConf.getBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_CREATE_INDEX); } if (!enableIndexes) { rowIndexStride = 0; } return new OrcRecordWriter(path.getFileSystem(conf), path, conf, stripeSize, compression, compressionSize, rowIndexStride); }
Example #24
Source File: TSFHiveOutputFormat.java From incubator-iotdb with Apache License 2.0 | 4 votes |
@Override public FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Path path, Class<? extends Writable> aClass, boolean b, Properties properties, Progressable progressable) throws IOException { return new TSFHiveRecordWriter(jobConf, path, null); }
Example #25
Source File: TestParquetDecimalScaling.java From presto with Apache License 2.0 | 4 votes |
private static void createParquetFile( Path path, StandardStructObjectInspector inspector, Iterator<?>[] iterators, MessageType parquetSchema, List<String> columnNames) { Properties tableProperties = createTableProperties(columnNames, Collections.singletonList(inspector)); JobConf jobConf = new JobConf(); jobConf.setEnum(COMPRESSION, UNCOMPRESSED); jobConf.setBoolean(ENABLE_DICTIONARY, false); jobConf.setEnum(WRITER_VERSION, PARQUET_2_0); try { FileSinkOperator.RecordWriter recordWriter = new TestMapredParquetOutputFormat(Optional.of(parquetSchema), true) .getHiveRecordWriter( jobConf, path, Text.class, false, tableProperties, () -> {}); Object row = inspector.create(); List<StructField> fields = ImmutableList.copyOf(inspector.getAllStructFieldRefs()); while (stream(iterators).allMatch(Iterator::hasNext)) { for (int i = 0; i < fields.size(); i++) { Object value = iterators[i].next(); inspector.setStructFieldData(row, fields.get(i), value); } ParquetHiveSerDe serde = new ParquetHiveSerDe(); serde.initialize(jobConf, tableProperties, null); Writable record = serde.serialize(row, inspector); recordWriter.write(record); } recordWriter.close(false); } catch (IOException | SerDeException e) { throw new RuntimeException(e); } }
Example #26
Source File: HiveShim.java From flink with Apache License 2.0 | 2 votes |
/** * Get Hive's FileSinkOperator.RecordWriter. */ FileSinkOperator.RecordWriter getHiveRecordWriter(JobConf jobConf, Class outputFormatClz, Class<? extends Writable> outValClz, boolean isCompressed, Properties tableProps, Path outPath);