org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable Java Examples
The following examples show how to use
org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveRCRecordReader.java From spork with Apache License 2.0 | 6 votes |
@SuppressWarnings("deprecation") @Override public void initialize(InputSplit split, TaskAttemptContext ctx) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Configuration conf = ctx.getConfiguration(); splitPath = fileSplit.getPath(); rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>( conf, new org.apache.hadoop.mapred.FileSplit(splitPath, fileSplit.getStart(), fileSplit.getLength(), new org.apache.hadoop.mapred.JobConf(conf))); key = rcFileRecordReader.createKey(); value = rcFileRecordReader.createValue(); }
Example #2
Source File: HiveColumnarLoader.java From spork with Apache License 2.0 | 6 votes |
@Override public Tuple getNext() throws IOException { Tuple tuple = null; try { if (reader.nextKeyValue()) { BytesRefArrayWritable buff = reader.getCurrentValue(); ColumnarStruct struct = readColumnarStruct(buff); tuple = readColumnarTuple(struct, reader.getSplitPath()); } } catch (InterruptedException e) { throw new IOException(e.toString(), e); } return tuple; }
Example #3
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 6 votes |
private ColumnarStruct readRow(File outputFile, Path outputPath, String schema) throws IOException, InterruptedException, SerDeException { FileSplit fileSplit = new FileSplit(outputPath, 0L, outputFile.length(), (String[])null); Path splitPath = fileSplit.getPath(); RCFileRecordReader<LongWritable, BytesRefArrayWritable> rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>( new Configuration(false), new org.apache.hadoop.mapred.FileSplit(splitPath, fileSplit.getStart(), fileSplit.getLength(), new org.apache.hadoop.mapred.JobConf(conf))); LongWritable key = rcFileRecordReader.createKey(); BytesRefArrayWritable value = rcFileRecordReader.createValue(); rcFileRecordReader.next(key, value); rcFileRecordReader.close(); ColumnarStruct struct = readColumnarStruct(value, schema); return struct; }
Example #4
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 6 votes |
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException { Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]"); List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema); List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema); List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>( cols.size()); for (int i = 0; i < cols.size(); i++) { fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil .findPigDataType(types.get(i)))); } Properties props = new Properties(); props.setProperty(Constants.LIST_COLUMNS, HiveRCSchemaUtil.listToString(cols)); props.setProperty(Constants.LIST_COLUMN_TYPES, HiveRCSchemaUtil.listToString(types)); Configuration hiveConf = new HiveConf(conf, SessionState.class); ColumnarSerDe serde = new ColumnarSerDe(); serde.initialize(hiveConf, props); return (ColumnarStruct) serde.deserialize(buff); }
Example #5
Source File: HiveRCInputFormat.java From spork with Apache License 2.0 | 5 votes |
/** * Initialises an instance of HiveRCRecordReader. */ @Override public RecordReader<LongWritable, BytesRefArrayWritable> createRecordReader( InputSplit split, TaskAttemptContext ctx) throws IOException, InterruptedException { HiveRCRecordReader reader = new HiveRCRecordReader(); return reader; }
Example #6
Source File: HiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
/** * Uses the ColumnarSerde to deserialize the buff:BytesRefArrayWritable into * a ColumnarStruct instance. * * @param buff * BytesRefArrayWritable * @return ColumnarStruct */ private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff) { // use ColumnarSerDe to deserialize row ColumnarStruct struct = null; try { struct = (ColumnarStruct) serde.deserialize(buff); } catch (SerDeException e) { LOG.error(e.toString(), e); throw new RuntimeException(e.toString(), e); } return struct; }
Example #7
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec, int columnCount) throws IOException { fs.delete(file, true); int rowsWritten = 0; resetRandomGenerators(); RCFileOutputFormat.setColumnNumber(conf, columnNum); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec); byte[][] columnRandom; BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } for (int i = 0; i < rowCount; i++) { nextRandomRow(columnRandom, bytes, columnCount); rowsWritten++; writer.append(bytes); } writer.close(); return rowsWritten; }
Example #8
Source File: TestHiveColumnarLoader.java From spork with Apache License 2.0 | 5 votes |
private static void nextRandomRow(byte[][] row, BytesRefArrayWritable bytes, int columnCount) { bytes.resetValid(row.length); for (int i = 0; i < row.length; i++) { row[i] = new byte[columnCount]; for (int j = 0; j < columnCount; j++) row[i][j] = getRandomChar(randomCharGenerator); bytes.get(i).set(row[i], 0, columnCount); } }
Example #9
Source File: TestHiveColumnarStorage.java From spork with Apache License 2.0 | 5 votes |
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum, CompressionCodec codec, int columnCount) throws IOException { fs.delete(file, true); int rowsWritten = 0; RCFileOutputFormat.setColumnNumber(conf, columnNum); RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec); byte[][] columnRandom; BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum); columnRandom = new byte[columnNum][]; for (int i = 0; i < columnNum; i++) { BytesRefWritable cu = new BytesRefWritable(); bytes.set(i, cu); } for (int i = 0; i < rowCount; i++) { bytes.resetValid(columnRandom.length); for (int j = 0; j < columnRandom.length; j++) { columnRandom[j]= "Sample value".getBytes(); bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length); } rowsWritten++; writer.append(bytes); } writer.close(); return rowsWritten; }
Example #10
Source File: ReadRCFileBuilder.java From kite with Apache License 2.0 | 5 votes |
private boolean readRowWise(final RCFile.Reader reader, final Record record) throws IOException { LongWritable rowID = new LongWritable(); while (true) { boolean next; try { next = reader.next(rowID); } catch (EOFException ex) { // We have hit EOF of the stream break; } if (!next) { break; } incrementNumRecords(); Record outputRecord = record.copy(); BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable(); rowBatchBytes.resetValid(columns.size()); reader.getCurrentRow(rowBatchBytes); // Read all the columns configured and set it in the output record for (RCFileColumn rcColumn : columns) { BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField()); outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes)); } // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } return true; }
Example #11
Source File: ReadRCFileBuilder.java From kite with Apache License 2.0 | 5 votes |
private boolean readColumnWise(RCFile.Reader reader, Record record) throws IOException { while (true) { boolean next; try { next = reader.nextBlock(); } catch (EOFException ex) { // We have hit EOF of the stream break; } if (!next) { break; } for (RCFileColumn rcColumn : columns) { BytesRefArrayWritable rowBatchBytes = reader.getColumn(rcColumn.getInputField(), null); List<Writable> rowValues = new ArrayList<Writable>(rowBatchBytes.size()); incrementNumRecords(); Record outputRecord = record.copy(); for (int rowIndex = 0; rowIndex < rowBatchBytes.size(); rowIndex++) { BytesRefWritable rowBytes = rowBatchBytes.get(rowIndex); rowValues.add(updateColumnValue(rcColumn, rowBytes)); } outputRecord.put(rcColumn.getOutputField(), rowValues); // pass record to next command in chain: if (!getChild().process(outputRecord)) { return false; } } } return true; }
Example #12
Source File: ReadRCFileTest.java From kite with Apache License 2.0 | 5 votes |
private void createRCFile(final String fileName, final int numRecords, final int maxColumns, boolean addNullValue) throws IOException { // Write the sequence file SequenceFile.Metadata metadata = getMetadataForRCFile(); Configuration conf = new Configuration(); conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns)); Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName)); RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null, metadata, null); for (int row = 0; row < numRecords; row++) { BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns); dataWrite.resetValid(maxColumns); for (int column = 0; column < maxColumns; column++) { Writable sampleText = new Text( "ROW-NUM:" + row + ", COLUMN-NUM:" + column); // Set the last column of the last row as null if (addNullValue && column == maxColumns - 1 && row == numRecords - 1) { sampleText = NullWritable.get(); } ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput(); sampleText.write(dataOutput); dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray())); } rcFileWriter.append(dataWrite); } rcFileWriter.close(); }
Example #13
Source File: HiveRCRecordReader.java From spork with Apache License 2.0 | 4 votes |
@Override public BytesRefArrayWritable getCurrentValue() throws IOException, InterruptedException { return value; }
Example #14
Source File: HiveColumnarLoader.java From spork with Apache License 2.0 | 4 votes |
@Override public InputFormat<LongWritable, BytesRefArrayWritable> getInputFormat() throws IOException { LOG.info("Signature: " + signature); return new HiveRCInputFormat(signature); }
Example #15
Source File: TestAllLoader.java From spork with Apache License 2.0 | 2 votes |
@Override public void writeTestData(File file, int recordCounts, int columnCount, String colSeparator) throws IOException { // write random test data Configuration conf = new Configuration(); FileSystem fs = FileSystem.getLocal(conf); RCFileOutputFormat.setColumnNumber(conf, columnCount); RCFile.Writer writer = new RCFile.Writer(fs, conf, new Path( file.getAbsolutePath())); BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnCount); for (int c = 0; c < columnCount; c++) { bytes.set(c, new BytesRefWritable()); } try { for (int r = 0; r < recordCounts; r++) { // foreach row write n columns for (int c = 0; c < columnCount; c++) { byte[] stringbytes = String.valueOf(Math.random()) .getBytes(); bytes.get(c).set(stringbytes, 0, stringbytes.length); } writer.append(bytes); } } finally { writer.close(); } }