Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.StructField#getFieldObjectInspector()

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.StructField#getFieldObjectInspector() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ObjectInspectorTest.java    From Hive-XML-SerDe with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("rawtypes")
public void testSimpleXmlNotMap() throws SerDeException {
    XmlSerDe xmlSerDe = new XmlSerDe();
    Configuration configuration = new Configuration();
    Properties properties = new Properties();
    properties.put(LIST_COLUMNS, "test");
    properties.put(LIST_COLUMN_TYPES, "map<string,string>");
    properties.setProperty("column.xpath.test", "//*[contains(name(),'test')]/text()");
    xmlSerDe.initialize(configuration, properties);
    Text text = new Text();
    text.set("<root><test1>string1</test1><test2>string2</test2></root>");
    Object o = xmlSerDe.deserialize(text);
    XmlStructObjectInspector structInspector = ((XmlStructObjectInspector) xmlSerDe.getObjectInspector());
    StructField structField = structInspector.getStructFieldRef("test");
    Object data = structInspector.getStructFieldData(o, structField);
    XmlMapObjectInspector fieldInspector = (XmlMapObjectInspector) structField.getFieldObjectInspector();
    Map map = fieldInspector.getMap(data);
    assertEquals(0, map.size());
}
 
Example 2
Source File: XmlStructObjectInspector.java    From Hive-XML-SerDe with Apache License 2.0 6 votes vote down vote up
/**
 * @see org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector#getStructFieldData(java.lang.Object,
 *      org.apache.hadoop.hive.serde2.objectinspector.StructField)
 */
@SuppressWarnings("unchecked")
@Override
public Object getStructFieldData(Object data, StructField structField) {
    if ((data instanceof List) && !(data instanceof SerDeArray)) {
        MyField f = (MyField) structField;
        int fieldID = f.getFieldID();
        return ((List<Object>) data).get(fieldID);
    } else {
        ObjectInspector fieldObjectInspector = structField.getFieldObjectInspector();
        Category category = fieldObjectInspector.getCategory();
        Object fieldData = this.xmlProcessor.getObjectValue(data, structField.getFieldName());
        switch (category) {
            case PRIMITIVE: {
                PrimitiveObjectInspector primitiveObjectInspector = (PrimitiveObjectInspector) fieldObjectInspector;
                PrimitiveCategory primitiveCategory = primitiveObjectInspector.getPrimitiveCategory();
                return this.xmlProcessor.getPrimitiveObjectValue(fieldData, primitiveCategory);
            }
            default:
                return fieldData;
        }
    }
}
 
Example 3
Source File: JSONCDHSerDe.java    From bigdata-tutorial with Apache License 2.0 6 votes vote down vote up
/**
 * Deparses struct data into a serializable JSON object.
 *
 * @param obj      - Hive struct data
 * @param structOI - ObjectInspector for the struct
 * @param isRow    - Whether or not this struct represents a top-level row
 * @return - A deparsed struct
 */
private Object deparseStruct(Object obj,
							 StructObjectInspector structOI,
							 boolean isRow) {
	Map<Object, Object> struct = new HashMap<Object, Object>();
	List<? extends StructField> fields = structOI.getAllStructFieldRefs();
	for (int i = 0; i < fields.size(); i++) {
		StructField field = fields.get(i);
		// The top-level row object is treated slightly differently from other
		// structs, because the field names for the row do not correctly reflect
		// the Hive column names. For lower-level structs, we can get the field
		// name from the associated StructField object.
		String fieldName = isRow ? colNames.get(i) : field.getFieldName();
		ObjectInspector fieldOI = field.getFieldObjectInspector();
		Object fieldObj = structOI.getStructFieldData(obj, field);
		struct.put(fieldName, deparseObject(fieldObj, fieldOI));
	}
	return struct;
}
 
Example 4
Source File: MaxRowUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
private ObjectInspector initReduceSide(StructObjectInspector inputStructOI)
        throws HiveException {
    List<? extends StructField> fields = inputStructOI.getAllStructFieldRefs();
    int length = fields.size();
    this.inputStructOI = inputStructOI;
    this.inputOIs = new ObjectInspector[length];
    this.outputOIs = new ObjectInspector[length];

    for (int i = 0; i < length; i++) {
        StructField field = fields.get(i);
        ObjectInspector oi = field.getFieldObjectInspector();
        inputOIs[i] = oi;
        outputOIs[i] = ObjectInspectorUtils.getStandardObjectInspector(oi);
    }

    return ObjectInspectorUtils.getStandardObjectInspector(inputStructOI);
}
 
Example 5
Source File: JSONSerDe.java    From searchanalytics-bigdata with MIT License 6 votes vote down vote up
/**
 * Deparses struct data into a serializable JSON object.
 *
 * @param obj
 *            - Hive struct data
 * @param structOI
 *            - ObjectInspector for the struct
 * @param isRow
 *            - Whether or not this struct represents a top-level row
 * @return - A deparsed struct
 */
private Object deparseStruct(final Object obj,
		final StructObjectInspector structOI, final boolean isRow) {
	final Map<Object, Object> struct = new HashMap<Object, Object>();
	final List<? extends StructField> fields = structOI
			.getAllStructFieldRefs();
	for (int i = 0; i < fields.size(); i++) {
		final StructField field = fields.get(i);
		// The top-level row object is treated slightly differently from
		// other
		// structs, because the field names for the row do not correctly
		// reflect
		// the Hive column names. For lower-level structs, we can get the
		// field
		// name from the associated StructField object.
		final String fieldName = isRow ? colNames.get(i) : field
				.getFieldName();
		final ObjectInspector fieldOI = field.getFieldObjectInspector();
		final Object fieldObj = structOI.getStructFieldData(obj, field);
		struct.put(fieldName, deparseObject(fieldObj, fieldOI));
	}
	return struct;
}
 
Example 6
Source File: ObjectInspectorTest.java    From Hive-XML-SerDe with Apache License 2.0 6 votes vote down vote up
@SuppressWarnings("rawtypes")
public void testSimpleXmlMap() throws SerDeException {
     XmlSerDe xmlSerDe = new XmlSerDe();
     Configuration configuration = new Configuration();
     Properties properties = new Properties();
     properties.put(LIST_COLUMNS, "test");
     properties.put(LIST_COLUMN_TYPES, "map<string,string>");
     properties.setProperty("column.xpath.test", "//*[contains(name(),'test')]");
     xmlSerDe.initialize(configuration, properties);
     Text text = new Text();
     text.set("<root><test1>string1</test1><test2>string2</test2></root>");
     Object o = xmlSerDe.deserialize(text);
     XmlStructObjectInspector structInspector = ((XmlStructObjectInspector) xmlSerDe.getObjectInspector());
     StructField structField = structInspector.getStructFieldRef("test");
     Object data = structInspector.getStructFieldData(o, structField);
     XmlMapObjectInspector fieldInspector = (XmlMapObjectInspector) structField.getFieldObjectInspector();
     Map map = fieldInspector.getMap(data);
     PrimitiveObjectInspector valueObjectInspector = (PrimitiveObjectInspector) fieldInspector.getMapValueObjectInspector();
     String test = (String) valueObjectInspector.getPrimitiveJavaObject(map.get("test1"));
     assertEquals("string1", test);
}
 
Example 7
Source File: BlurSerializer.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private String getFieldData(String columnName, Object data, StructObjectInspector structObjectInspector,
    Map<String, StructField> allStructFieldRefs, String name) throws SerDeException {
  StructField structField = allStructFieldRefs.get(name);
  ObjectInspector fieldObjectInspector = structField.getFieldObjectInspector();
  Object structFieldData = structObjectInspector.getStructFieldData(data, structField);
  if (fieldObjectInspector instanceof PrimitiveObjectInspector) {
    return toString(columnName, structFieldData, (PrimitiveObjectInspector) fieldObjectInspector);
  } else {
    throw new SerDeException("Embedded non-primitive type is not supported columnName [" + columnName
        + "] objectInspector [" + fieldObjectInspector + "].");
  }
}
 
Example 8
Source File: HiveFieldExtractor.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
protected Object extractField(Object target) {
    List<String> flNames = fieldNames;

    for (int i = 0; i < flNames.size(); i++) {
        String fl = flNames.get(i);
        if (target instanceof HiveType) {
            HiveType type = (HiveType) target;
            ObjectInspector inspector = type.getObjectInspector();
            if (inspector instanceof StructObjectInspector) {
                StructObjectInspector soi = (StructObjectInspector) inspector;
                StructField field = soi.getStructFieldRef(fl);
                ObjectInspector foi = field.getFieldObjectInspector();
                Assert.isTrue(foi.getCategory() == ObjectInspector.Category.PRIMITIVE,
                        String.format("Field [%s] needs to be a primitive; found [%s]", fl, foi.getTypeName()));

                // expecting a writeable - simply do a toString
                target = soi.getStructFieldData(type.getObject(), field);
            }
            else {
                return FieldExtractor.NOT_FOUND;
            }
        }
        else {
            return FieldExtractor.NOT_FOUND;
        }
    }

    if (target == null || target instanceof NullWritable) {
        return StringUtils.EMPTY;
    }
    return target.toString();
}
 
Example 9
Source File: ParquetHiveSerDe.java    From parquet-mr with Apache License 2.0 5 votes vote down vote up
private ArrayWritable createStruct(final Object obj, final StructObjectInspector inspector)
    throws SerDeException {
  final List<? extends StructField> fields = inspector.getAllStructFieldRefs();
  final Writable[] arr = new Writable[fields.size()];
  for (int i = 0; i < fields.size(); i++) {
    final StructField field = fields.get(i);
    final Object subObj = inspector.getStructFieldData(obj, field);
    final ObjectInspector subInspector = field.getFieldObjectInspector();
    arr[i] = createObject(subObj, subInspector);
  }
  return new ArrayWritable(Writable.class, arr);
}
 
Example 10
Source File: BitcoinTransactionHashSegwitUDF.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
/**
* Read list of Bitcoin ScriptWitness items from a table in Hive in any format (e.g. ORC, Parquet)
*
* @param loi ObjectInspector for processing the Object containing a list
* @param listOfScriptWitnessItemObject object containing the list of scriptwitnessitems of a Bitcoin Transaction
*
* @return a list of BitcoinScriptWitnessItem 
*
*/

private List<BitcoinScriptWitnessItem> readListOfBitcoinScriptWitnessFromTable(ListObjectInspector loi, Object listOfScriptWitnessItemObject) {
int listLength=loi.getListLength(listOfScriptWitnessItemObject);
List<BitcoinScriptWitnessItem> result = new ArrayList<>(listLength);
StructObjectInspector listOfScriptwitnessItemElementObjectInspector = (StructObjectInspector)loi.getListElementObjectInspector();
for (int i=0;i<listLength;i++) {
	Object currentlistofscriptwitnessitemObject = loi.getListElement(listOfScriptWitnessItemObject,i);
	StructField stackitemcounterSF = listOfScriptwitnessItemElementObjectInspector.getStructFieldRef("stackitemcounter");
	StructField scriptwitnesslistSF = listOfScriptwitnessItemElementObjectInspector.getStructFieldRef("scriptwitnesslist");
	boolean scriptwitnessitemNull = (stackitemcounterSF==null) || (scriptwitnesslistSF==null) ; 
	if (scriptwitnessitemNull) {
		LOG.warn("Invalid BitcoinScriptWitnessItem detected at position "+i);
		return new ArrayList<>();
	}
	byte[] stackItemCounter = wboi.getPrimitiveJavaObject(listOfScriptwitnessItemElementObjectInspector.getStructFieldData(currentlistofscriptwitnessitemObject,stackitemcounterSF));
	Object listofscriptwitnessObject =  soi.getStructFieldData(currentlistofscriptwitnessitemObject,scriptwitnesslistSF);
	ListObjectInspector loiScriptWitness=(ListObjectInspector)scriptwitnesslistSF.getFieldObjectInspector();
	StructObjectInspector listOfScriptwitnessElementObjectInspector = (StructObjectInspector)loiScriptWitness.getListElementObjectInspector();
	int listWitnessLength = 	loiScriptWitness.getListLength(listofscriptwitnessObject);
	List<BitcoinScriptWitness> currentScriptWitnessList = new ArrayList<>(listWitnessLength);
	for (int j=0;j<listWitnessLength;j++) {
		Object currentlistofscriptwitnessObject = loi.getListElement(listofscriptwitnessObject,j);
		
		StructField witnessscriptlengthSF = listOfScriptwitnessElementObjectInspector.getStructFieldRef("witnessscriptlength");
		StructField witnessscriptSF = listOfScriptwitnessElementObjectInspector.getStructFieldRef("witnessscript");
		boolean scriptwitnessNull = (witnessscriptlengthSF==null)  || (witnessscriptSF==null);
		if (scriptwitnessNull) {
			LOG.warn("Invalid BitcoinScriptWitness detected at position "+j+ "for BitcoinScriptWitnessItem "+i);
			return new ArrayList<>();
		}
		byte[] scriptWitnessLength = wboi.getPrimitiveJavaObject(listOfScriptwitnessElementObjectInspector.getStructFieldData(currentlistofscriptwitnessObject,witnessscriptlengthSF));
		byte[] scriptWitness = wboi.getPrimitiveJavaObject(listOfScriptwitnessElementObjectInspector.getStructFieldData(currentlistofscriptwitnessObject,witnessscriptSF));
		currentScriptWitnessList.add(new BitcoinScriptWitness(scriptWitnessLength,scriptWitness));
	}
	BitcoinScriptWitnessItem currentBitcoinScriptWitnessItem = new BitcoinScriptWitnessItem(stackItemCounter,currentScriptWitnessList);
	result.add(currentBitcoinScriptWitnessItem);
}
return result;
}
 
Example 11
Source File: MapProcessor.java    From hive-solr with Apache License 2.0 5 votes vote down vote up
public static void resolve(boolean enableFieldMapping, LWDocument doc, String docFieldName, Object data,
  StructField structField, StructObjectInspector inspector) {
  MapObjectInspector moi = (MapObjectInspector) structField.getFieldObjectInspector();
  Object mapValue = inspector.getStructFieldData(data, structField);
  Map<Object, Object> map = (Map<Object, Object>) copyToStandardJavaObject(mapValue, moi);
  Map<String, Object> entries = new HashMap<>();
  resolveMap(enableFieldMapping, entries, docFieldName, map);

  for (Map.Entry<String, Object> entry : entries.entrySet()) {
    doc.addField(entry.getKey(), entry.getValue());
  }

  entries.clear();
}
 
Example 12
Source File: ArrayProcessor.java    From hive-solr with Apache License 2.0 5 votes vote down vote up
public static void resolve(boolean enableFieldMapping, LWDocument doc, String docFieldName, Object data,
  StructField structField, StructObjectInspector inspector) throws Exception {
  ListObjectInspector loi = (ListObjectInspector) structField.getFieldObjectInspector();
  Object listValue = inspector.getStructFieldData(data, structField);
  List<Object> list = (List<Object>) copyToStandardJavaObject(listValue, loi);
  Map<String, Object> entries = new HashMap<>();
  resolveList(enableFieldMapping, entries, docFieldName, list, -1);

  for (Map.Entry<String, Object> entry : entries.entrySet()) {
    doc.addField(entry.getKey(), entry.getValue());
  }

  entries.clear();
}
 
Example 13
Source File: HiveKuduSerDe.java    From HiveKudu-Handler with Apache License 2.0 5 votes vote down vote up
@Override
public HiveKuduWritable serialize(Object row, ObjectInspector inspector)
    throws SerDeException {

    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != fieldCount) {
        throw new SerDeException(String.format(
                "Required %d columns, received %d.", fieldCount,
                fields.size()));
    }

    cachedWritable.clear();

    for (int i = 0; i < fieldCount; i++) {
        StructField structField = fields.get(i);
        if (structField != null) {
            Object field = structInspector.getStructFieldData(row,
                    structField);
            ObjectInspector fieldOI = structField.getFieldObjectInspector();

            Object javaObject = HiveKuduBridgeUtils.deparseObject(field,
                    fieldOI);
            LOG.warn("Column value of " + i + " is " + javaObject.toString());
            cachedWritable.set(i, javaObject);
        }
    }
    return cachedWritable;
}
 
Example 14
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI, WritableFloatObjectInspector covarOI)
        throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI,
                    covarOI);
            }
        } else {
            LazySimpleSerDe serde =
                    HiveUtils.getLineSerde(labelOI, featureOI, weightOI, covarOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            StructField c4ref = lineOI.getStructFieldRef("c4");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();
            FloatObjectInspector c4refOI =
                    (FloatObjectInspector) c4ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    Object f3 = fields.get(3);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid unexpected case
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    float cov =
                            (f3 == null) ? WeightValueWithCovar.DEFAULT_COVAR : c4refOI.get(f3);
                    model.set(k, new WeightValueWithCovar(v, cov, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}
 
Example 15
Source File: GenericHiveRecordCursor.java    From presto with Apache License 2.0 4 votes vote down vote up
public GenericHiveRecordCursor(
        Configuration configuration,
        Path path,
        RecordReader<K, V> recordReader,
        long totalBytes,
        Properties splitSchema,
        List<HiveColumnHandle> columns,
        DateTimeZone hiveStorageTimeZone)
{
    requireNonNull(path, "path is null");
    requireNonNull(recordReader, "recordReader is null");
    checkArgument(totalBytes >= 0, "totalBytes is negative");
    requireNonNull(splitSchema, "splitSchema is null");
    requireNonNull(columns, "columns is null");
    requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null");

    this.path = path;
    this.recordReader = recordReader;
    this.totalBytes = totalBytes;
    this.key = recordReader.createKey();
    this.value = recordReader.createValue();
    this.hiveStorageTimeZone = hiveStorageTimeZone;

    this.deserializer = getDeserializer(configuration, splitSchema);
    this.rowInspector = getTableObjectInspector(deserializer);

    int size = columns.size();

    this.types = new Type[size];
    this.hiveTypes = new HiveType[size];

    this.structFields = new StructField[size];
    this.fieldInspectors = new ObjectInspector[size];

    this.loaded = new boolean[size];
    this.booleans = new boolean[size];
    this.longs = new long[size];
    this.doubles = new double[size];
    this.slices = new Slice[size];
    this.objects = new Object[size];
    this.nulls = new boolean[size];

    // initialize data columns
    for (int i = 0; i < columns.size(); i++) {
        HiveColumnHandle column = columns.get(i);
        checkState(column.getColumnType() == REGULAR, "column type must be regular");

        types[i] = column.getType();
        hiveTypes[i] = column.getHiveType();

        StructField field = rowInspector.getStructFieldRef(column.getName());
        structFields[i] = field;
        fieldInspectors[i] = field.getFieldObjectInspector();
    }
}
 
Example 16
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 4 votes vote down vote up
private long loadPredictionModel(Map<Object, PredictionModel> label2model, File file,
        PrimitiveObjectInspector labelOI, PrimitiveObjectInspector featureOI,
        WritableFloatObjectInspector weightOI) throws IOException, SerDeException {
    long count = 0L;
    if (!file.exists()) {
        return count;
    }
    if (!file.getName().endsWith(".crc")) {
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                count += loadPredictionModel(label2model, f, labelOI, featureOI, weightOI);
            }
        } else {
            LazySimpleSerDe serde = HiveUtils.getLineSerde(labelOI, featureOI, weightOI);
            StructObjectInspector lineOI = (StructObjectInspector) serde.getObjectInspector();
            StructField c1ref = lineOI.getStructFieldRef("c1");
            StructField c2ref = lineOI.getStructFieldRef("c2");
            StructField c3ref = lineOI.getStructFieldRef("c3");
            PrimitiveObjectInspector c1refOI =
                    (PrimitiveObjectInspector) c1ref.getFieldObjectInspector();
            PrimitiveObjectInspector c2refOI =
                    (PrimitiveObjectInspector) c2ref.getFieldObjectInspector();
            FloatObjectInspector c3refOI =
                    (FloatObjectInspector) c3ref.getFieldObjectInspector();

            BufferedReader reader = null;
            try {
                reader = HadoopUtils.getBufferedReader(file);
                String line;
                while ((line = reader.readLine()) != null) {
                    count++;
                    Text lineText = new Text(line);
                    Object lineObj = serde.deserialize(lineText);
                    List<Object> fields = lineOI.getStructFieldsDataAsList(lineObj);
                    Object f0 = fields.get(0);
                    Object f1 = fields.get(1);
                    Object f2 = fields.get(2);
                    if (f0 == null || f1 == null || f2 == null) {
                        continue; // avoid the case that key or value is null
                    }
                    Object label = c1refOI.getPrimitiveWritableObject(c1refOI.copyObject(f0));
                    PredictionModel model = label2model.get(label);
                    if (model == null) {
                        model = createModel();
                        label2model.put(label, model);
                    }
                    Object k = c2refOI.getPrimitiveWritableObject(c2refOI.copyObject(f1));
                    float v = c3refOI.get(f2);
                    model.set(k, new WeightValue(v, false));
                }
            } finally {
                IOUtils.closeQuietly(reader);
            }
        }
    }
    return count;
}
 
Example 17
Source File: DynamoDBSerDe.java    From emr-dynamodb-connector with Apache License 2.0 4 votes vote down vote up
@Override
public Writable serialize(Object obj, ObjectInspector objInspector) throws SerDeException {
  // Prepare the field ObjectInspectors
  StructObjectInspector soi = (StructObjectInspector) objInspector;
  List<? extends StructField> fields = soi.getAllStructFieldRefs();
  List<Object> rowData = soi.getStructFieldsDataAsList(obj);
  Map<String, AttributeValue> item = Maps.newHashMap();

  validateData(fields, rowData);

  for (int i = 0; i < fields.size(); i++) {
    StructField field = fields.get(i);
    Object data = rowData.get(i);
    String columnName = columnNames.get(i);
    ObjectInspector fieldOI = field.getFieldObjectInspector();

    // Get the Hive to DynamoDB mapper
    HiveDynamoDBType ddType = typeMappings.get(columnName);

    // Check if this column maps a DynamoDB item.
    if (HiveDynamoDBTypeFactory.isHiveDynamoDBItemMapType(ddType)) {
      HiveDynamoDBItemType ddItemType = (HiveDynamoDBItemType) ddType;
      Map<String, AttributeValue> backupItem = ddItemType.parseDynamoDBData(data, fieldOI);

      // We give higher priority to attributes directly mapped to
      // columns. So we do not update the value of an attribute if
      // it already exists. This can happen in case of partial schemas
      // when there is a full backup column and attribute mapped
      // columns.
      for (Map.Entry<String, AttributeValue> entry : backupItem.entrySet()) {
        if (!columnMappings.containsValue(entry.getKey())) {
          item.put(entry.getKey(), entry.getValue());
        }
      }
    } else {
      // User has mapped individual attribute in DynamoDB to
      // corresponding Hive columns.
      AttributeValue attributeValue = data == null ?
          DynamoDBDataParser.getNullAttribute(nullSerialization) :
          ddType.getDynamoDBData(data, fieldOI, nullSerialization);

      if (attributeValue != null) {
        item.put(columnMappings.get(columnName), attributeValue);
      }
    }
  }

  return new DynamoDBItemWritable(item);
}
 
Example 18
Source File: BitcoinTransactionHashSegwitUDF.java    From hadoopcryptoledger with Apache License 2.0 4 votes vote down vote up
/**
* This method evaluates a given Object (of type BitcoinTransaction) or a struct which has all necessary fields corresponding to a BitcoinTransaction. The first case occurs, if the UDF evaluates data represented in a table provided by the HiveSerde as part of the hadoocryptoledger library. The second case occurs, if BitcoinTransaction data has been imported in a table in another format, such as ORC or Parquet.
* 
* @param arguments array of length 1 containing one object of type BitcoinTransaction or a Struct representing a BitcoinTransaction
*
* @return BytesWritable containing a byte array with the double hash of the BitcoinTransaction
*
* @throws org.apache.hadoop.hive.ql.metadata.HiveException in case an itnernal HiveError occurred
*/

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
	if ((arguments==null) || (arguments.length!=1)) { 
		return null;
	}
	BitcoinTransaction bitcoinTransaction;
	if (arguments[0].get() instanceof HiveBitcoinTransaction) { // this happens if the table is in the original file format
		 bitcoinTransaction = BitcoinUDFUtil.convertToBitcoinTransaction((HiveBitcoinTransaction)arguments[0].get());
	} else { // this happens if the table has been imported into a more optimized analytics format, such as ORC. However, usually we expect that the first case will be used mostly (the hash is generated during extraction from the input format)
		// check if all bitcointransaction fields are available <struct<version:int,incounter:binary,outcounter:binary,listofinputs:array<struct<prevtransactionhash:binary,previoustxoutindex:bigint,txinscriptlength:binary,txinscript:binary,seqno:bigint>>,listofoutputs:array<struct<value:bigint,txoutscriptlength:binary,txoutscript:binary>>,locktime:int>
		Object originalObject=arguments[0].get();
		StructField versionSF=soi.getStructFieldRef("version");
		StructField markerSF=soi.getStructFieldRef("marker");
		StructField flagSF=soi.getStructFieldRef("flag");
		StructField incounterSF=soi.getStructFieldRef("incounter");
		StructField outcounterSF=soi.getStructFieldRef("outcounter");
		StructField listofinputsSF=soi.getStructFieldRef("listofinputs");
		StructField listofoutputsSF=soi.getStructFieldRef("listofoutputs");
		StructField listofscriptwitnessitemSF=soi.getStructFieldRef("listofscriptwitnessitem");
		StructField locktimeSF=soi.getStructFieldRef("locktime");
		boolean inputsNull =  (incounterSF==null) || (listofinputsSF==null);
		boolean outputsNull = (outcounterSF==null) || (listofoutputsSF==null);
		boolean otherAttributeNull = (versionSF==null) || (locktimeSF==null);
		boolean segwitInformationNull = (markerSF==null) || (flagSF==null) || (listofscriptwitnessitemSF==null);
		if (inputsNull || outputsNull || otherAttributeNull || segwitInformationNull) {
			LOG.info("Structure does not correspond to BitcoinTransaction");
			return null;
		} 
		int version = wioi.get(soi.getStructFieldData(originalObject,versionSF));
		byte marker = wbyoi.get(soi.getStructFieldData(originalObject, markerSF));
		byte flag =  wbyoi.get(soi.getStructFieldData(originalObject, flagSF));
		byte[] inCounter = wboi.getPrimitiveJavaObject(soi.getStructFieldData(originalObject,incounterSF));
		byte[] outCounter = wboi.getPrimitiveJavaObject(soi.getStructFieldData(originalObject,outcounterSF));
		int locktime = wioi.get(soi.getStructFieldData(originalObject,locktimeSF));
		
		Object listofinputsObject = soi.getStructFieldData(originalObject,listofinputsSF);
		ListObjectInspector loiInputs=(ListObjectInspector)listofinputsSF.getFieldObjectInspector();
		List<BitcoinTransactionInput> listOfInputsArray = readListOfInputsFromTable(loiInputs,listofinputsObject);
		
		Object listofoutputsObject = soi.getStructFieldData(originalObject,listofoutputsSF);
		ListObjectInspector loiOutputs=(ListObjectInspector)listofoutputsSF.getFieldObjectInspector();
		List<BitcoinTransactionOutput> listOfOutputsArray = readListOfOutputsFromTable(loiOutputs,listofoutputsObject);
		
		Object listofscriptwitnessitemObject =  soi.getStructFieldData(originalObject,listofscriptwitnessitemSF);
		ListObjectInspector loiScriptWitnessItem=(ListObjectInspector)listofscriptwitnessitemSF.getFieldObjectInspector();
		List<BitcoinScriptWitnessItem> listOfScriptWitnessitemArray = readListOfBitcoinScriptWitnessFromTable(loiScriptWitnessItem,listofscriptwitnessitemObject);
		bitcoinTransaction = new BitcoinTransaction(marker, flag, version,inCounter,listOfInputsArray,outCounter,listOfOutputsArray,listOfScriptWitnessitemArray,locktime);

	}
	byte[] transactionHash=null;
	try {
		 transactionHash = BitcoinUtil.getTransactionHashSegwit(bitcoinTransaction);
	}  catch (IOException ioe) {
		LOG.error(ioe);
		throw new HiveException(ioe.toString());
	}
	return new BytesWritable(transactionHash);
}
 
Example 19
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private static boolean searchAllFields(final ObjectInspector rootOI,
                                       final String name,
                                       final int[] childCounts,
                                       SearchResult position
) {
  Category category = rootOI.getCategory();
  if (category == Category.STRUCT) {
    position.index++; // first child is immediately next to parent
    StructObjectInspector sOi = (StructObjectInspector) rootOI;
    for (StructField sf : sOi.getAllStructFieldRefs()) {
      // We depend on the fact that caller takes care of calling current method
      // once for each segment in the selected column path. So, we should always get
      // searched field as immediate child
      if (sf.getFieldName().equalsIgnoreCase(name)) {
        position.oI = sf.getFieldObjectInspector();
        return true;
      } else {
        if (position.index >= childCounts.length) {
          return false;
        }
        position.index += childCounts[position.index];
      }
    }
  } else if (category == Category.MAP) {
    position.index++; // first child is immediately next to parent
    if (name.equalsIgnoreCase(HiveUtilities.MAP_KEY_FIELD_NAME)) {
      ObjectInspector kOi = ((MapObjectInspector) rootOI).getMapKeyObjectInspector();
      position.oI = kOi;
      return true;
    }
    if (position.index >= childCounts.length) {
      return false;
    }
    position.index += childCounts[position.index];
    if (name.equalsIgnoreCase(HiveUtilities.MAP_VALUE_FIELD_NAME)) {
      ObjectInspector vOi = ((MapObjectInspector) rootOI).getMapValueObjectInspector();
      position.oI = vOi;
      return true;
    }
  }
  return false;
}
 
Example 20
Source File: HiveORCVectorizedReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
private static boolean searchAllFields(final ObjectInspector rootOI,
                                       final String name,
                                       final int[] childCounts,
                                       SearchResult position
                                       ) {
  Category category = rootOI.getCategory();
  if (category == Category.STRUCT) {
    position.index++; // first child is immediately next to parent
    StructObjectInspector sOi = (StructObjectInspector) rootOI;
    for (StructField sf : sOi.getAllStructFieldRefs()) {
      // We depend on the fact that caller takes care of calling current method
      // once for each segment in the selected column path. So, we should always get
      // searched field as immediate child
      if (position.index >= childCounts.length) {
        // input schema has more columns than what reader can read
        return false;
      }
      if (sf.getFieldName().equalsIgnoreCase(name)) {
        position.oI = sf.getFieldObjectInspector();
        return true;
      } else {
        position.index += childCounts[position.index];
      }
    }
  } else if (category == Category.MAP) {
    position.index++; // first child is immediately next to parent
    if (position.index >= childCounts.length) {
      // input schema has more columns than what reader can read
      return false;
    }
    if (name.equalsIgnoreCase(HiveUtilities.MAP_KEY_FIELD_NAME)) {
      ObjectInspector kOi = ((MapObjectInspector) rootOI).getMapKeyObjectInspector();
      position.oI = kOi;
      return true;
    }
    position.index += childCounts[position.index];
    if (position.index >= childCounts.length) {
      // input schema has more columns than what reader can read
      return false;
    }
    if (name.equalsIgnoreCase(HiveUtilities.MAP_VALUE_FIELD_NAME)) {
      ObjectInspector vOi = ((MapObjectInspector) rootOI).getMapValueObjectInspector();
      position.oI = vOi;
      return true;
    }
  }
  return false;
}