org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveRCRecordReader.java    From spork with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("deprecation")
   @Override
   public void initialize(InputSplit split, TaskAttemptContext ctx)
    throws IOException, InterruptedException {

FileSplit fileSplit = (FileSplit) split;
Configuration conf = ctx.getConfiguration();
splitPath = fileSplit.getPath();

rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>(
	conf, new org.apache.hadoop.mapred.FileSplit(splitPath,
		fileSplit.getStart(), fileSplit.getLength(),
		new org.apache.hadoop.mapred.JobConf(conf)));

key = rcFileRecordReader.createKey();
value = rcFileRecordReader.createValue();

   }
 
Example #2
Source File: HiveColumnarLoader.java    From spork with Apache License 2.0 6 votes vote down vote up
@Override
   public Tuple getNext() throws IOException {
Tuple tuple = null;

try {
    if (reader.nextKeyValue()) {

	BytesRefArrayWritable buff = reader.getCurrentValue();
	ColumnarStruct struct = readColumnarStruct(buff);

	tuple = readColumnarTuple(struct, reader.getSplitPath());
    }

} catch (InterruptedException e) {
    throw new IOException(e.toString(), e);
}

return tuple;
   }
 
Example #3
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
private ColumnarStruct readRow(File outputFile, Path outputPath, String schema) throws IOException,
        InterruptedException, SerDeException {

    FileSplit fileSplit = new FileSplit(outputPath, 0L, outputFile.length(), (String[])null);


    Path splitPath = fileSplit.getPath();

    RCFileRecordReader<LongWritable, BytesRefArrayWritable> rcFileRecordReader = new RCFileRecordReader<LongWritable, BytesRefArrayWritable>(
        new Configuration(false), new org.apache.hadoop.mapred.FileSplit(splitPath,
            fileSplit.getStart(), fileSplit.getLength(),
            new org.apache.hadoop.mapred.JobConf(conf)));

    LongWritable key = rcFileRecordReader.createKey();
    BytesRefArrayWritable value = rcFileRecordReader.createValue();
    rcFileRecordReader.next(key, value);
    rcFileRecordReader.close();

    ColumnarStruct struct = readColumnarStruct(value, schema);
    return struct;
}
 
Example #4
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException {
     Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]");
     List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema);
     List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema);

     List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
         cols.size());

     for (int i = 0; i < cols.size(); i++) {
         fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
             .findPigDataType(types.get(i))));
     }

     Properties props = new Properties();

     props.setProperty(Constants.LIST_COLUMNS,
         HiveRCSchemaUtil.listToString(cols));
     props.setProperty(Constants.LIST_COLUMN_TYPES,
         HiveRCSchemaUtil.listToString(types));

     Configuration hiveConf = new HiveConf(conf, SessionState.class);
     ColumnarSerDe serde = new ColumnarSerDe();
     serde.initialize(hiveConf, props);

     return (ColumnarStruct) serde.deserialize(buff);
}
 
Example #5
Source File: HiveRCInputFormat.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
    * Initialises an instance of HiveRCRecordReader.
    */
   @Override
   public RecordReader<LongWritable, BytesRefArrayWritable> createRecordReader(
    InputSplit split, TaskAttemptContext ctx) throws IOException,
    InterruptedException {

HiveRCRecordReader reader = new HiveRCRecordReader();

return reader;
   }
 
Example #6
Source File: HiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
/**
    * Uses the ColumnarSerde to deserialize the buff:BytesRefArrayWritable into
    * a ColumnarStruct instance.
    *
    * @param buff
    *            BytesRefArrayWritable
    * @return ColumnarStruct
    */
   private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff) {
// use ColumnarSerDe to deserialize row
ColumnarStruct struct = null;
try {
    struct = (ColumnarStruct) serde.deserialize(buff);
} catch (SerDeException e) {
    LOG.error(e.toString(), e);
    throw new RuntimeException(e.toString(), e);
}

return struct;
   }
 
Example #7
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
        CompressionCodec codec, int columnCount) throws IOException {
    fs.delete(file, true);
    int rowsWritten = 0;

    resetRandomGenerators();

    RCFileOutputFormat.setColumnNumber(conf, columnNum);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);

    byte[][] columnRandom;

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }

    for (int i = 0; i < rowCount; i++) {
        nextRandomRow(columnRandom, bytes, columnCount);
        rowsWritten++;
        writer.append(bytes);
    }
    writer.close();

    return rowsWritten;
}
 
Example #8
Source File: TestHiveColumnarLoader.java    From spork with Apache License 2.0 5 votes vote down vote up
private static void nextRandomRow(byte[][] row, BytesRefArrayWritable bytes, int columnCount) {
    bytes.resetValid(row.length);
    for (int i = 0; i < row.length; i++) {

        row[i] = new byte[columnCount];
        for (int j = 0; j < columnCount; j++)
            row[i][j] = getRandomChar(randomCharGenerator);
        bytes.get(i).set(row[i], 0, columnCount);
    }
}
 
Example #9
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
private static int writeRCFileTest(FileSystem fs, int rowCount, Path file, int columnNum,
        CompressionCodec codec, int columnCount) throws IOException {
    fs.delete(file, true);
    int rowsWritten = 0;


    RCFileOutputFormat.setColumnNumber(conf, columnNum);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, file, null, codec);

    byte[][] columnRandom;

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnNum);
    columnRandom = new byte[columnNum][];
    for (int i = 0; i < columnNum; i++) {
        BytesRefWritable cu = new BytesRefWritable();
        bytes.set(i, cu);
    }

    for (int i = 0; i < rowCount; i++) {

        bytes.resetValid(columnRandom.length);
        for (int j = 0; j < columnRandom.length; j++) {
            columnRandom[j]= "Sample value".getBytes();
            bytes.get(j).set(columnRandom[j], 0, columnRandom[j].length);
        }
        rowsWritten++;
        writer.append(bytes);
    }
    writer.close();

    return rowsWritten;
}
 
Example #10
Source File: ReadRCFileBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
private boolean readRowWise(final RCFile.Reader reader, final Record record)
    throws IOException {
  
  LongWritable rowID = new LongWritable();      

  while (true) {
    boolean next;
    try {
      next = reader.next(rowID);
    } catch (EOFException ex) {
      // We have hit EOF of the stream
      break;
    }

    if (!next) {
      break;
    }

    incrementNumRecords();
    Record outputRecord = record.copy();        
    BytesRefArrayWritable rowBatchBytes = new BytesRefArrayWritable();
    rowBatchBytes.resetValid(columns.size());
    reader.getCurrentRow(rowBatchBytes);

    // Read all the columns configured and set it in the output record
    for (RCFileColumn rcColumn : columns) {
      BytesRefWritable columnBytes = rowBatchBytes.get(rcColumn.getInputField());
      outputRecord.put(rcColumn.getOutputField(), updateColumnValue(rcColumn, columnBytes));
    }
    
    // pass record to next command in chain:
    if (!getChild().process(outputRecord)) {
      return false;
    }
  }
  return true;
}
 
Example #11
Source File: ReadRCFileBuilder.java    From kite with Apache License 2.0 5 votes vote down vote up
private boolean readColumnWise(RCFile.Reader reader, Record record)
    throws IOException {

  while (true) {
    boolean next;
    try {
      next = reader.nextBlock();
    } catch (EOFException ex) {
      // We have hit EOF of the stream
      break;
    }
    if (!next) {
      break;
    }

    for (RCFileColumn rcColumn : columns) {
      BytesRefArrayWritable rowBatchBytes = reader.getColumn(rcColumn.getInputField(), null);
      List<Writable> rowValues = new ArrayList<Writable>(rowBatchBytes.size());
      incrementNumRecords();
      Record outputRecord = record.copy();
      for (int rowIndex = 0; rowIndex < rowBatchBytes.size(); rowIndex++) {
        BytesRefWritable rowBytes = rowBatchBytes.get(rowIndex);
        rowValues.add(updateColumnValue(rcColumn, rowBytes));
      }
      outputRecord.put(rcColumn.getOutputField(), rowValues);
      
      // pass record to next command in chain:
      if (!getChild().process(outputRecord)) {
        return false;
      }
    }
  }
  return true;
}
 
Example #12
Source File: ReadRCFileTest.java    From kite with Apache License 2.0 5 votes vote down vote up
private void createRCFile(final String fileName, final int numRecords,
    final int maxColumns, boolean addNullValue) throws IOException {
  // Write the sequence file
  SequenceFile.Metadata metadata = getMetadataForRCFile();
  Configuration conf = new Configuration();
  conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
  Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
  RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null,
      metadata, null);
  for (int row = 0; row < numRecords; row++) {
    BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
    dataWrite.resetValid(maxColumns);
    for (int column = 0; column < maxColumns; column++) {
      Writable sampleText = new Text(
          "ROW-NUM:" + row + ", COLUMN-NUM:" + column);
      // Set the last column of the last row as null
      if (addNullValue && column == maxColumns - 1 && row == numRecords - 1) {
        sampleText = NullWritable.get();
      }
      ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
      sampleText.write(dataOutput);
      dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
    }
    rcFileWriter.append(dataWrite);
  }
  rcFileWriter.close();
}
 
Example #13
Source File: HiveRCRecordReader.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
   public BytesRefArrayWritable getCurrentValue() throws IOException,
    InterruptedException {
return value;
   }
 
Example #14
Source File: HiveColumnarLoader.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
   public InputFormat<LongWritable, BytesRefArrayWritable> getInputFormat()
    throws IOException {
LOG.info("Signature: " + signature);
return new HiveRCInputFormat(signature);
   }
 
Example #15
Source File: TestAllLoader.java    From spork with Apache License 2.0 2 votes vote down vote up
@Override
public void writeTestData(File file, int recordCounts, int columnCount,
        String colSeparator) throws IOException {

    // write random test data

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);

    RCFileOutputFormat.setColumnNumber(conf, columnCount);
    RCFile.Writer writer = new RCFile.Writer(fs, conf, new Path(
            file.getAbsolutePath()));

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(columnCount);

    for (int c = 0; c < columnCount; c++) {
        bytes.set(c, new BytesRefWritable());
    }

    try {

        for (int r = 0; r < recordCounts; r++) {
            // foreach row write n columns

            for (int c = 0; c < columnCount; c++) {

                byte[] stringbytes = String.valueOf(Math.random())
                        .getBytes();
                bytes.get(c).set(stringbytes, 0, stringbytes.length);

            }

            writer.append(bytes);

        }

    } finally {
        writer.close();
    }

}