org.apache.hadoop.hive.serde2.SerDe Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.SerDe. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: HiveAbstractReader.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
public HiveAbstractReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
                          final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
                          final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
                          final StructObjectInspector partitionOI, final ScanFilter filter,
                          final Collection<List<String>> referencedTables, final UserGroupInformation readerUgi) {
  super(context, projectedColumns);
  this.tableAttr = tableAttr;
  this.split = split;
  this.jobConf = jobConf;
  this.tableSerDe = tableSerDe;
  this.tableOI = tableOI;
  this.partitionSerDe = partitionSerDe == null ? tableSerDe : partitionSerDe;
  this.partitionOI = partitionOI == null ? tableOI : partitionOI;
  this.filter = filter;
  this.referencedTables = referencedTables;
  this.readerUgi = readerUgi;
}
 
Example #2
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public static StructObjectInspector getStructOI(final SerDe serDe) throws Exception {
  ObjectInspector oi = serDe.getObjectInspector();
  if (oi.getCategory() != Category.STRUCT) {
    throw new UnsupportedOperationException(String.format("%s category not supported", oi.getCategory()));
  }
  return (StructObjectInspector) oi;
}
 
Example #3
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public HiveORCVectorizedReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
    final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
    final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
    final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables,
    final UserGroupInformation readerUgi) {
  super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter,
    referencedTables, readerUgi);
}
 
Example #4
Source File: HiveTextReader.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
public HiveTextReader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
    final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
    final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
    final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables,
    final UserGroupInformation readerUgi) {
  super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter,
    referencedTables, readerUgi);
}
 
Example #5
Source File: TestInputOutputFormat.java    From hive-dwrf with Apache License 2.0 5 votes vote down vote up
@Test
public void testEmptyFile() throws Exception {
  JobConf job = new JobConf(conf);
  Properties properties = new Properties();
  HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
  FileSinkOperator.RecordWriter writer =
      outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true,
          properties, Reporter.NULL);
  writer.close(true);
  properties.setProperty("columns", "x,y");
  properties.setProperty("columns.types", "int:int");
  SerDe serde = new OrcSerde();
  serde.initialize(conf, properties);
  InputFormat<?,?> in = new OrcInputFormat();
  FileInputFormat.setInputPaths(conf, testFilePath.toString());
  InputSplit[] splits = in.getSplits(conf, 1);
  assertEquals(1, splits.length);

  // read the whole file
  conf.set("hive.io.file.readcolumn.ids", "0,1");
  org.apache.hadoop.mapred.RecordReader reader =
      in.getRecordReader(splits[0], conf, Reporter.NULL);
  Object key = reader.createKey();
  Object value = reader.createValue();
  assertEquals(0.0, reader.getProgress(), 0.00001);
  assertEquals(0, reader.getPos());
  assertEquals(false, reader.next(key, value));
  reader.close();
  assertEquals(null, serde.getSerDeStats());
}
 
Example #6
Source File: HiveRecordReaders.java    From dremio-oss with Apache License 2.0 5 votes vote down vote up
Reader(final HiveTableXattr tableAttr, final SplitAndPartitionInfo split,
    final List<SchemaPath> projectedColumns, final OperatorContext context, final JobConf jobConf,
    final SerDe tableSerDe, final StructObjectInspector tableOI, final SerDe partitionSerDe,
    final StructObjectInspector partitionOI, final ScanFilter filter, final Collection<List<String>> referencedTables,
    final UserGroupInformation readerUgi) {
  super(tableAttr, split, projectedColumns, context, jobConf, tableSerDe, tableOI, partitionSerDe, partitionOI, filter,
    referencedTables, readerUgi);
}
 
Example #7
Source File: HiveSerDeWrapper.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 * Get the {@link SerDe} instance associated with this {@link HiveSerDeWrapper}.
 * This method performs lazy initialization.
 */
public SerDe getSerDe() throws IOException {
  if (!this.serDe.isPresent()) {
    try {
      this.serDe = Optional.of(SerDe.class.cast(Class.forName(this.serDeClassName).newInstance()));
    } catch (Throwable t) {
      throw new IOException("Failed to instantiate SerDe " + this.serDeClassName, t);
    }
  }
  return this.serDe.get();
}
 
Example #8
Source File: KuduStorageHandler.java    From HiveKudu-Handler with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
    return HiveKuduSerDe.class;
}
 
Example #9
Source File: SMStorageHandler.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
    return SMSerDe.class;
}
 
Example #10
Source File: JdbcStorageHandler.java    From HiveJdbcStorageHandler with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
    return JdbcSerDe.class;
}
 
Example #11
Source File: EsStorageHandler.java    From elasticsearch-hadoop with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
    return EsSerDe.class;
}
 
Example #12
Source File: AccumuloStorageHandler.java    From accumulo-hive-storage-manager with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
    return AccumuloSerde.class;
}
 
Example #13
Source File: CassandraStorageHandler.java    From Hive-Cassandra with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
  return CassandraColumnSerDe.class;
}
 
Example #14
Source File: BlurHiveStorageHandler.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
  return BlurSerDe.class;
}
 
Example #15
Source File: KafkaStorageHandler.java    From HiveKa with Apache License 2.0 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
  return AvroSerDe.class;
}
 
Example #16
Source File: SolrStorageHandler.java    From hive-solr with MIT License 4 votes vote down vote up
@Override
public Class<? extends SerDe> getSerDeClass() {
    return SolrSerde.class;
}
 
Example #17
Source File: ScanWithHiveReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private static RecordReader getRecordReader(HiveSplitXattr splitXattr, HiveTableXattr tableXattr,
                                            OperatorContext context, HiveConf hiveConf,
                                            SplitAndPartitionInfo split, CompositeReaderConfig compositeReader,
                                            HiveProxyingSubScan config, UserGroupInformation readerUgi)
  throws Exception {

  final JobConf baseJobConf = new JobConf(hiveConf);
  final Properties tableProperties = new Properties();
  addProperties(baseJobConf, tableProperties, HiveReaderProtoUtil.getTableProperties(tableXattr));

  final boolean isTransactional = AcidUtils.isTablePropertyTransactional(baseJobConf);
  final boolean isPartitioned = config.getPartitionColumns() != null && config.getPartitionColumns().size() > 0;
  final Optional<String> tableInputFormat = HiveReaderProtoUtil.getTableInputFormat(tableXattr);
  final JobConf jobConf = new JobConf(baseJobConf);

  final SerDe tableSerDe = createSerDe(jobConf, HiveReaderProtoUtil.getTableSerializationLib(tableXattr).get(),
      tableProperties);
  final StructObjectInspector tableOI = getStructOI(tableSerDe);
  final SerDe partitionSerDe;
  final StructObjectInspector partitionOI;

  boolean hasDeltas = false;
  if (isTransactional) {
    InputSplit inputSplit = HiveUtilities.deserializeInputSplit(splitXattr.getInputSplit());
    if (inputSplit instanceof OrcSplit) {
      hasDeltas = hasDeltas((OrcSplit) inputSplit);
    }
  }

  final Class<? extends HiveAbstractReader> tableReaderClass =
    getNativeReaderClass(tableInputFormat, context.getOptions(), hiveConf, false, isTransactional && hasDeltas);

  final Constructor<? extends HiveAbstractReader> tableReaderCtor = getNativeReaderCtor(tableReaderClass);

  Constructor<? extends HiveAbstractReader> readerCtor = tableReaderCtor;
  // It is possible to for a partition to have different input format than table input format.
  if (isPartitioned) {
    final List<Prop> partitionPropertiesList;
    final Properties partitionProperties = new Properties();
    final Optional<String> partitionInputFormat;
    final Optional<String> partitionStorageHandlerName;
    // First add table properties and then add partition properties. Partition properties override table properties.
    addProperties(jobConf, partitionProperties, HiveReaderProtoUtil.getTableProperties(tableXattr));

    // If Partition Properties are stored in DatasetMetadata (Pre 3.2.0)
    if (HiveReaderProtoUtil.isPreDremioVersion3dot2dot0LegacyFormat(tableXattr)) {
      logger.debug("Reading partition properties from DatasetMetadata");
      partitionPropertiesList = HiveReaderProtoUtil.getPartitionProperties(tableXattr, splitXattr.getPartitionId());
      addProperties(jobConf, partitionProperties, partitionPropertiesList);
      partitionSerDe =
        createSerDe(jobConf,
          HiveReaderProtoUtil.getPartitionSerializationLib(tableXattr, splitXattr.getPartitionId()).get(),
          partitionProperties
        );
      partitionInputFormat = HiveReaderProtoUtil.getPartitionInputFormat(tableXattr, splitXattr.getPartitionId());
      partitionStorageHandlerName = HiveReaderProtoUtil.getPartitionStorageHandler(tableXattr, splitXattr.getPartitionId());

    } else {
      logger.debug("Reading partition properties from PartitionChunk");
      final PartitionXattr partitionXattr = HiveReaderProtoUtil.getPartitionXattr(split);
      partitionPropertiesList = HiveReaderProtoUtil.getPartitionProperties(tableXattr, partitionXattr);
      addProperties(jobConf, partitionProperties, partitionPropertiesList);
      partitionSerDe =
        createSerDe(jobConf,
          HiveReaderProtoUtil.getPartitionSerializationLib(tableXattr, partitionXattr),
          partitionProperties
        );
      partitionInputFormat = HiveReaderProtoUtil.getPartitionInputFormat(tableXattr, partitionXattr);
      partitionStorageHandlerName = HiveReaderProtoUtil.getPartitionStorageHandler(tableXattr, partitionXattr);
    }

    jobConf.setInputFormat(getInputFormatClass(jobConf, partitionInputFormat, partitionStorageHandlerName));
    partitionOI = getStructOI(partitionSerDe);

    final boolean mixedSchema = !tableOI.equals(partitionOI);
    if (!partitionInputFormat.equals(tableInputFormat) || mixedSchema || isTransactional && hasDeltas) {
      final Class<? extends HiveAbstractReader> partitionReaderClass = getNativeReaderClass(
        partitionInputFormat, context.getOptions(), jobConf, mixedSchema, isTransactional);
      readerCtor = getNativeReaderCtor(partitionReaderClass);
    }
  } else {
    partitionSerDe = null;
    partitionOI = null;
    jobConf.setInputFormat(getInputFormatClass(jobConf, tableInputFormat, HiveReaderProtoUtil.getTableStorageHandler(tableXattr)));
  }

  return readerCtor.newInstance(tableXattr, split,
      compositeReader.getInnerColumns(), context, jobConf, tableSerDe, tableOI, partitionSerDe,
      partitionOI, config.getFilter(), config.getReferencedTables(), readerUgi);
}
 
Example #18
Source File: ScanWithHiveReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private static Constructor<? extends HiveAbstractReader> getNativeReaderCtor(Class<? extends HiveAbstractReader> clazz)
    throws NoSuchMethodException {
  return clazz.getConstructor(HiveTableXattr.class, SplitAndPartitionInfo.class, List.class, OperatorContext.class,
                              JobConf.class, SerDe.class, StructObjectInspector.class, SerDe.class, StructObjectInspector.class,
                              ScanFilter.class, Collection.class, UserGroupInformation.class);
}
 
Example #19
Source File: HiveTextReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
public int populateData() throws IOException, SerDeException {
  final SkipRecordsInspector skipRecordsInspector = this.skipRecordsInspector;
  final RecordReader<Object, Object> reader = this.reader;
  final Converter partTblObjectInspectorConverter = this.partTblObjectInspectorConverter;
  final Object key = this.key;

  final int numRowsPerBatch = (int) this.numRowsPerBatch;

  final StructField[] selectedStructFieldRefs = this.selectedStructFieldRefs;
  final SerDe partitionSerDe = this.partitionSerDe;
  final StructObjectInspector finalOI = this.finalOI;
  final ObjectInspector[] selectedColumnObjInspectors = this.selectedColumnObjInspectors;
  final HiveFieldConverter[] selectedColumnFieldConverters = this.selectedColumnFieldConverters;
  final ValueVector[] vectors = this.vectors;

  skipRecordsInspector.reset();
  Object value;

  int recordCount = 0;

  while (recordCount < numRowsPerBatch) {
    try (OperatorStats.WaitRecorder recorder = OperatorStats.getWaitRecorder(this.context.getStats())) {
      boolean hasNext = reader.next(key, value = skipRecordsInspector.getNextValue());
      if (!hasNext) {
        break;
      }
    }
    catch(FSError e) {
      throw HadoopFileSystemWrapper.propagateFSError(e);
    }
    if (skipRecordsInspector.doSkipHeader(recordCount++)) {
      continue;
    }
    Object bufferedValue = skipRecordsInspector.bufferAdd(value);
    if (bufferedValue != null) {
      Object deSerializedValue = partitionSerDe.deserialize((Writable) bufferedValue);
      if (partTblObjectInspectorConverter != null) {
        deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue);
      }

      for (int i = 0; i < selectedStructFieldRefs.length; i++) {
        Object hiveValue = finalOI.getStructFieldData(deSerializedValue, selectedStructFieldRefs[i]);
        if (hiveValue != null) {
          selectedColumnFieldConverters[i].setSafeValue(selectedColumnObjInspectors[i], hiveValue, vectors[i], skipRecordsInspector.getActualCount());
        }
      }
      skipRecordsInspector.incrementActualCount();
    }
    skipRecordsInspector.incrementTempCount();
  }
  for (int i = 0; i < selectedStructFieldRefs.length; i++) {
    vectors[i].setValueCount(skipRecordsInspector.getActualCount());
  }

  skipRecordsInspector.updateContinuance();
  return skipRecordsInspector.getActualCount();
}
 
Example #20
Source File: HiveUtilities.java    From dremio-oss with Apache License 2.0 3 votes vote down vote up
/**
 * Utility method which creates a SerDe object for given SerDe class name and properties.
 *
 * @param jobConf Configuration to use when creating SerDe class
 * @param sLib {@link SerDe} class name
 * @param properties SerDe properties
 * @return
 * @throws Exception
 */
public static final SerDe createSerDe(final JobConf jobConf, final String sLib, final Properties properties) throws Exception {
  final Class<? extends SerDe> c = Class.forName(sLib).asSubclass(SerDe.class);
  final SerDe serde = c.getConstructor().newInstance();
  serde.initialize(jobConf, properties);

  return serde;
}