org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestHiveColumnarStorage.java    From spork with Apache License 2.0 6 votes vote down vote up
private ColumnarStruct readColumnarStruct(BytesRefArrayWritable buff, String schema) throws SerDeException {
     Pattern pcols = Pattern.compile("[a-zA-Z_0-9]*[ ]");
     List<String> types = HiveRCSchemaUtil.parseSchemaTypes(schema);
     List<String> cols = HiveRCSchemaUtil.parseSchema(pcols, schema);

     List<FieldSchema> fieldSchemaList = new ArrayList<FieldSchema>(
         cols.size());

     for (int i = 0; i < cols.size(); i++) {
         fieldSchemaList.add(new FieldSchema(cols.get(i), HiveRCSchemaUtil
             .findPigDataType(types.get(i))));
     }

     Properties props = new Properties();

     props.setProperty(Constants.LIST_COLUMNS,
         HiveRCSchemaUtil.listToString(cols));
     props.setProperty(Constants.LIST_COLUMN_TYPES,
         HiveRCSchemaUtil.listToString(types));

     Configuration hiveConf = new HiveConf(conf, SessionState.class);
     ColumnarSerDe serde = new ColumnarSerDe();
     serde.initialize(hiveConf, props);

     return (ColumnarStruct) serde.deserialize(buff);
}
 
Example #2
Source File: RcFileTester.java    From presto with Apache License 2.0 5 votes vote down vote up
@Override
public Serializer createSerializer()
{
    try {
        ColumnarSerDe columnarSerDe = new ColumnarSerDe();
        Properties tableProperties = new Properties();
        tableProperties.setProperty("columns", "test");
        tableProperties.setProperty("columns.types", "string");
        columnarSerDe.initialize(new JobConf(false), tableProperties);
        return columnarSerDe;
    }
    catch (SerDeException e) {
        throw new RuntimeException(e);
    }
}
 
Example #3
Source File: HiveCatalogUtil.java    From tajo with Apache License 2.0 5 votes vote down vote up
public static String getDataFormat(StorageDescriptor descriptor) {
  Preconditions.checkNotNull(descriptor);

  String serde = descriptor.getSerdeInfo().getSerializationLib();
  String inputFormat = descriptor.getInputFormat();

  if (LazySimpleSerDe.class.getName().equals(serde)) {
    if (TextInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.TEXT;
    } else if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinarySerDe.class.getName().equals(serde)) {
    if (SequenceFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.SEQUENCE_FILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (LazyBinaryColumnarSerDe.class.getName().equals(serde) || ColumnarSerDe.class.getName().equals(serde)) {
    if (RCFileInputFormat.class.getName().equals(inputFormat)) {
      return BuiltinStorages.RCFILE;
    } else {
      throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
    }
  } else if (ParquetHiveSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.PARQUET;
  } else if (AvroSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.AVRO;
  } else if (OrcSerde.class.getName().equals(serde)) {
    return BuiltinStorages.ORC;
  } else if (RegexSerDe.class.getName().equals(serde)) {
    return BuiltinStorages.REGEX;
  } else {
    throw new TajoRuntimeException(new UnknownDataFormatException(inputFormat));
  }
}
 
Example #4
Source File: HiveColumnarLoader.java    From spork with Apache License 2.0 4 votes vote down vote up
@Override
   public void prepareToRead(
    @SuppressWarnings("rawtypes") RecordReader reader, PigSplit split)
    throws IOException {

this.reader = (HiveRCRecordReader) reader;

// check that the required indexes actually exist i.e. the columns that
// should be read.
// assuming this is always defined simplifies the readColumnarTuple
// logic.

int requiredIndexes[] = getRequiredColumns();
if (requiredIndexes == null) {

    int fieldLen = pigSchema.getFields().length;

    // if any the partition keys should already exist
    String[] partitionKeys = getPartitionKeys(null, null);
    if (partitionKeys != null) {
	fieldLen += partitionKeys.length;
    }

    requiredIndexes = new int[fieldLen];

    for (int i = 0; i < fieldLen; i++) {
	requiredIndexes[i] = i;
    }

    this.requiredColumns = requiredIndexes;
}

try {
    serde = new ColumnarSerDe();
    serde.initialize(hiveConf, props);
} catch (SerDeException e) {
    LOG.error(e.toString(), e);
    throw new IOException(e);
}

   }