Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector#getStructFieldData()

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector#getStructFieldData() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestGeoJsonSerDe.java    From spatial-framework-for-hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void TestDateParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new GeoJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       value.set("{\"properties\":{\"when\":\"2020-02-20\"}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("when");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals("2020-02-20",
						((DateWritable)fieldData).get().toString());
       value.set("{\"properties\":{\"when\":\"2017-05-05\"}}");
       row = jserde.deserialize(value);
	fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals("2017-05-05",
						((DateWritable)fieldData).get().toString());
}
 
Example 2
Source File: TestEsriJsonSerDe.java    From spatial-framework-for-hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void TestDateParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new EsriJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       value.set("{\"attributes\":{\"when\":\"2020-02-20\"}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("when");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals("2020-02-20",
						((DateWritable)fieldData).get().toString());
       value.set("{\"attributes\":{\"when\":\"2017-05-05\"}}");
       row = jserde.deserialize(value);
	fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals("2017-05-05",
						((DateWritable)fieldData).get().toString());
}
 
Example 3
Source File: TestEsriJsonSerDe.java    From spatial-framework-for-hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void TestIntParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new EsriJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "num");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "int");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       //value.set("{\"attributes\":{\"num\":7},\"geometry\":null}");
       value.set("{\"attributes\":{\"num\":7}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("num");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals(7, ((IntWritable)fieldData).get());
       value.set("{\"attributes\":{\"num\":9}}");
       row = jserde.deserialize(value);
	f0 = rowOI.getStructFieldRef("num");
	fieldData = rowOI.getStructFieldData(row, f0);
	Assert.assertEquals(9, ((IntWritable)fieldData).get());
}
 
Example 4
Source File: IndexRSerde.java    From indexr with Apache License 2.0 6 votes vote down vote up
@Override
public Writable serialize(Object obj, ObjectInspector objectInspector) throws SerDeException {
    if (!objectInspector.getCategory().equals(ObjectInspector.Category.STRUCT)) {
        throw new SerDeException("Cannot serialize " + objectInspector.getCategory() + ". Can only serialize a struct");
    }

    StructObjectInspector inspector = (StructObjectInspector) objectInspector;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    Writable[] arr = new Writable[fields.size()];
    for (int i = 0; i < fields.size(); i++) {
        StructField field = fields.get(i);
        Object subObj = inspector.getStructFieldData(obj, field);
        ObjectInspector subInspector = field.getFieldObjectInspector();
        arr[i] = createPrimitive(subObj, (PrimitiveObjectInspector) subInspector);
    }
    serdeSize = arr.length;
    return new ArrayWritable(Writable.class, arr);
}
 
Example 5
Source File: TestEsriJsonSerDe.java    From spatial-framework-for-hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void TestEpochParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new EsriJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       value.set("{\"attributes\":{\"when\":147147147147}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("when");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	//Assert.assertEquals(147147147147L, ((DateWritable)fieldData).get().getTime());
	Assert.assertEquals(new java.sql.Date(147147147147L).toString(),
						((DateWritable)fieldData).get().toString());
       value.set("{\"attributes\":{\"when\":142857142857}}");
       row = jserde.deserialize(value);
	fieldData = rowOI.getStructFieldData(row, f0);
	//Assert.assertEquals(142857142857L, ((DateWritable)fieldData).get());
	Assert.assertEquals(new java.sql.Date(142857142857L).toString(),
						((DateWritable)fieldData).get().toString());
}
 
Example 6
Source File: TestGeoJsonSerDe.java    From spatial-framework-for-hadoop with Apache License 2.0 6 votes vote down vote up
@Test
public void TestEpochParse() throws Exception {
	Configuration config = new Configuration();
	Text value = new Text();

	AbstractSerDe jserde = new GeoJsonSerDe();
	Properties proptab = new Properties();
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMNS, "when");
	proptab.setProperty(HiveShims.serdeConstants.LIST_COLUMN_TYPES, "date");
	jserde.initialize(config, proptab);
       StructObjectInspector rowOI = (StructObjectInspector)jserde.getObjectInspector();

       value.set("{\"properties\":{\"when\":147147147147}}");
	Object row = jserde.deserialize(value);
	StructField f0 = rowOI.getStructFieldRef("when");
	Object fieldData = rowOI.getStructFieldData(row, f0);
	//Assert.assertEquals(147147147147L, ((DateWritable)fieldData).get().getTime());
	Assert.assertEquals(new java.sql.Date(147147147147L).toString(),
						((DateWritable)fieldData).get().toString());
       value.set("{\"properties\":{\"when\":142857142857}}");
       row = jserde.deserialize(value);
	fieldData = rowOI.getStructFieldData(row, f0);
	//Assert.assertEquals(142857142857L, ((DateWritable)fieldData).get());
	Assert.assertEquals(new java.sql.Date(142857142857L).toString(),
						((DateWritable)fieldData).get().toString());
}
 
Example 7
Source File: JdbcSerDe.java    From HiveJdbcStorageHandler with Apache License 2.0 6 votes vote down vote up
/**
 * This method takes an object representing a row of data from Hive, and uses
 * the ObjectInspector to get the data for each column and serialize.
 */
@Override
public DbRecordWritable serialize(Object row, ObjectInspector inspector) throws SerDeException {
    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if(fields.size() != fieldCount) {
        throw new SerDeException(String.format("Required %d columns, received %d.", fieldCount, fields.size()));
    }

    cachedWritable.clear();

    for(int i = 0; i < fieldCount; i++) {
        StructField structField = fields.get(i);
        if(structField != null) {
            Object field = structInspector.getStructFieldData(row, structField);
            ObjectInspector fieldOI = structField.getFieldObjectInspector();
            Object javaObject = HiveJdbcBridgeUtils.deparseObject(field, fieldOI);
            cachedWritable.set(i, javaObject);
        }
    }

    return cachedWritable;
}
 
Example 8
Source File: OrcTester.java    From presto with Apache License 2.0 5 votes vote down vote up
private static void assertFileContentsOrcHive(
        Type type,
        TempFile tempFile,
        Iterable<?> expectedValues)
        throws Exception
{
    JobConf configuration = new JobConf(new Configuration(false));
    configuration.set(READ_COLUMN_IDS_CONF_STR, "0");
    configuration.setBoolean(READ_ALL_COLUMNS, false);

    Reader reader = OrcFile.createReader(
            new Path(tempFile.getFile().getAbsolutePath()),
            new ReaderOptions(configuration));
    RecordReader recordReader = reader.rows();

    StructObjectInspector rowInspector = (StructObjectInspector) reader.getObjectInspector();
    StructField field = rowInspector.getStructFieldRef("test");

    Iterator<?> iterator = expectedValues.iterator();
    Object rowData = null;
    while (recordReader.hasNext()) {
        rowData = recordReader.next(rowData);
        Object expectedValue = iterator.next();

        Object actualValue = rowInspector.getStructFieldData(rowData, field);
        actualValue = decodeRecordReaderValue(type, actualValue);
        assertColumnValueEquals(type, actualValue, expectedValue);
    }
    assertFalse(iterator.hasNext());
}
 
Example 9
Source File: BlurSerializer.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private String getFieldData(String columnName, Object data, StructObjectInspector structObjectInspector,
    Map<String, StructField> allStructFieldRefs, String name) throws SerDeException {
  StructField structField = allStructFieldRefs.get(name);
  ObjectInspector fieldObjectInspector = structField.getFieldObjectInspector();
  Object structFieldData = structObjectInspector.getStructFieldData(data, structField);
  if (fieldObjectInspector instanceof PrimitiveObjectInspector) {
    return toString(columnName, structFieldData, (PrimitiveObjectInspector) fieldObjectInspector);
  } else {
    throw new SerDeException("Embedded non-primitive type is not supported columnName [" + columnName
        + "] objectInspector [" + fieldObjectInspector + "].");
  }
}
 
Example 10
Source File: HiveKuduSerDe.java    From HiveKudu-Handler with Apache License 2.0 5 votes vote down vote up
@Override
public HiveKuduWritable serialize(Object row, ObjectInspector inspector)
    throws SerDeException {

    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != fieldCount) {
        throw new SerDeException(String.format(
                "Required %d columns, received %d.", fieldCount,
                fields.size()));
    }

    cachedWritable.clear();

    for (int i = 0; i < fieldCount; i++) {
        StructField structField = fields.get(i);
        if (structField != null) {
            Object field = structInspector.getStructFieldData(row,
                    structField);
            ObjectInspector fieldOI = structField.getFieldObjectInspector();

            Object javaObject = HiveKuduBridgeUtils.deparseObject(field,
                    fieldOI);
            LOG.warn("Column value of " + i + " is " + javaObject.toString());
            cachedWritable.set(i, javaObject);
        }
    }
    return cachedWritable;
}
 
Example 11
Source File: TestAzureEntitySerDe.java    From azure-tables-hadoop with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("serial")
public void testPropertyNotFound(boolean requireFieldExists) throws Exception {
	WritableEntity entity = new WritableEntity();
	entity.setProperties(new LinkedHashMap<String, EntityProperty>() {{
		put("a", new EntityProperty(7));
		put("b", new EntityProperty("hello"));
	}});
	AzureEntitySerDe serDe = new AzureEntitySerDe();
	Properties tbl = new Properties();
	tbl.put(LIST_COLUMNS, "a,b,c");
	tbl.put(LIST_COLUMN_TYPES, "int,string,int");
	Configuration conf = new Configuration();
	if (requireFieldExists) {
		conf.setBoolean(Keys.REQUIRE_FIELD_EXISTS.getKey(), true);
	}
	serDe.initialize(conf, tbl);
	StructObjectInspector inspector = (StructObjectInspector)serDe.getObjectInspector();
	assertEquals(7, inspector.getStructFieldData(entity,
			inspector.getStructFieldRef("a")));
	try {
		Object returned = inspector.getStructFieldData(entity,
				inspector.getStructFieldRef("c"));
		if (requireFieldExists) {
			fail("Should've thrown here.");
		} else {
			assertNull(returned);
		}
	} catch (IllegalArgumentException ex) {
		if (requireFieldExists) {
			assertEquals("No property found with name c. Properties found: a,b",
					ex.getMessage());
		} else {
			throw ex;
		}
	}
}
 
Example 12
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_array_of_records() throws Exception {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
    List<GenericRecord> innerRecords = new LinkedList<>();

    final GenericRecord outerRecord = new GenericData.Record(schema);

    Schema arraySchema = schema.getField("records").schema();
    Schema innerRecordSchema = arraySchema.getElementType();
    final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
    innerRecord1.put("name", "Joe");
    innerRecord1.put("age", 42);

    innerRecords.add(innerRecord1);

    final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
    innerRecord2.put("name", "Mary");
    innerRecord2.put("age", 28);

    innerRecords.add(innerRecord2);

    GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
    outerRecord.put("records", array);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        dataFileWriter.append(outerRecord);
    }
    out.close();

    // Build a flow file from the Avro record
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
            "(records ARRAY<STRUCT<name:STRING, age:INT>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));

    // Verify the record contains an array
    Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
    assertTrue(arrayFieldObject instanceof ArrayList);
    ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
    assertEquals(2, arrayField.size());

    // Verify the first element. Should be a record with two fields "name" and "age"
    Object element = arrayField.get(0);
    assertTrue(element instanceof OrcStruct);
    StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
    Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Joe", nameObject.toString());
    Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(42, ((IntWritable) ageObject).get());

    // Verify the first element. Should be a record with two fields "name" and "age"
    element = arrayField.get(1);
    assertTrue(element instanceof OrcStruct);
    nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Mary", nameObject.toString());
    ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(28, ((IntWritable) ageObject).get());
}
 
Example 13
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_complex_record() throws Exception {

    Map<String, Double> mapData1 = new TreeMap<String, Double>() {{
        put("key1", 1.0);
        put("key2", 2.0);
    }};

    GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, Double> mapData2 = new TreeMap<String, Double>() {{
        put("key1", 3.0);
        put("key2", 4.0);
    }};

    record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " +
            "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());

    Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap"));
    assertTrue(mapFieldObject instanceof Map);
    Map map = (Map) mapFieldObject;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);

    mapValue = map.get(new Text("key2"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
}
 
Example 14
Source File: JsonSerDeTestingBase.java    From spatial-framework-for-hadoop with Apache License 2.0 4 votes vote down vote up
protected Object getField(String col, Object row, StructObjectInspector rowOI) {
	StructField f0 = rowOI.getStructFieldRef(col);
	return rowOI.getStructFieldData(row, f0);
}
 
Example 15
Source File: SMSerDe.java    From spliceengine with GNU Affero General Public License v3.0 4 votes vote down vote up
/**
   * This method takes an object representing a row of data from Hive, and
   * uses the ObjectInspector to get the data for each column and serialize
   * it.
   */
  //@Override
  public Writable serialize(Object obj, ObjectInspector oi)
          throws SerDeException {
  	ExecRow row = null;
  	int[] execRowFormatIds = null;
      try {
	List<NameType> nameTypes = sqlUtil.getTableStructure(tableName);
	execRowFormatIds = sqlUtil.getExecRowFormatIds(colNames, nameTypes);
	row = sqlUtil.getExecRow(execRowFormatIds);
	if (row == null)
		throw new SerDeException("ExecRow Cannot be Null");
} catch (SQLException | StandardException | IOException e1) {
	throw new SerDeException(e1);
}    	
  	if (Log.isTraceEnabled())
  		SpliceLogUtils.trace(Log, "serialize with obj=%s, oi=%s",obj,oi);
      if (oi.getCategory() != ObjectInspector.Category.STRUCT) {
          throw new SerDeException(getClass().toString()
                  + " can only serialize struct types, but we got: "
                  + oi.getTypeName());
      }

      StructObjectInspector soi = (StructObjectInspector) oi;
      List<? extends StructField> fields = soi.getAllStructFieldRefs();

      try {

      	DataValueDescriptor dvd;
          for (int i = 0; i < fields.size(); i++) {
              StructField field = fields.get(i);
              dvd = row.getColumn(i+1);
              ObjectInspector fieldOI = field.getFieldObjectInspector();
              Object fieldObj = soi.getStructFieldData(obj, field);
              PrimitiveObjectInspector primOI = (PrimitiveObjectInspector) fieldOI;
              Object data = primOI.getPrimitiveJavaObject(fieldObj);

              PrimitiveCategory primitiveCategory = primOI.getPrimitiveCategory();
              switch (primitiveCategory) {
                  case BYTE:
                      dvd.setValue(((Byte) data).byteValue());
                      break;
                  case INT:
                      dvd.setValue(((Integer) data).intValue());
                      break;
                  case VARCHAR:
                      dvd.setValue(((HiveVarchar)data).getValue());
                      break;
                  case CHAR:
                      dvd.setValue(((HiveChar)data).getValue());
                      break;
                  case STRING:
                      dvd.setValue((String) data);
                      break;
                  case BINARY:
                      dvd.setValue((SerializationUtils.serialize((Serializable) data))); // is this right?  Should just be a byte[]
                      break;
                  case BOOLEAN:
                      dvd.setValue(((Boolean) data).booleanValue());
                      break;
                  case DECIMAL:
                      dvd.setValue(((HiveDecimal) data).doubleValue());
                      break;
                  case DOUBLE:
                      dvd.setValue(((Double) data).doubleValue());
                      break;
                  case FLOAT:
                      dvd.setValue(((Float) data).floatValue());
                      break;
                  case LONG:
                      dvd.setValue(((Long) data).longValue());
                      break;
                  case SHORT:
                      dvd.setValue(((Short) data).shortValue());
                      break;
                  case TIMESTAMP:
                      dvd.setValue((Timestamp) data);
                      break;
                  case DATE:
                      dvd.setValue((java.sql.Date) data);
                      break;
                  default:
                      throw new SerDeException(String.format("Hive Type %s Not Supported Yet",primOI.getPrimitiveCategory()));
              }
          }

      } catch (StandardException e) {
          // TODO Auto-generated catch block
          throw new RuntimeException("Serialized Object To Java Type Error");
      }
      ExecRowWritable rowWritable = new ExecRowWritable(WriteReadUtils.getExecRowFromTypeFormatIds(execRowFormatIds));
      rowWritable.set(row);
      return rowWritable;
  }
 
Example 16
Source File: ExcelSpreadSheetCellDAOSerde.java    From hadoopoffice with Apache License 2.0 4 votes vote down vote up
/** 
 * Writes a row in Hive containing exactly 5 elements (String) to a SpreadSheetCellDAO. Order: "formattedValue","comment","formula","address","sheetName"
 * 
 */
@Override
public Writable serialize(Object arg0, ObjectInspector arg1) throws SerDeException {
	if (!(arg1 instanceof StructObjectInspector)) {
		throw new SerDeException("Expect a row of Strings for serialization");
	}
	final StructObjectInspector outputOI = (StructObjectInspector) arg1;
	final List<? extends StructField> outputFields = outputOI.getAllStructFieldRefs();
	if (outputFields.size()!=ExcelSpreadSheetCellDAOSerde.columnNames.length) {
		throw new SerDeException("Expected "+ExcelSpreadSheetCellDAOSerde.columnNames.length+" fields characterizing a cell: \"formattedValue\",\"comment\",\"formula\",\"address\",\"sheetName\", but found "+outputFields.size()+" fields");
	}
	if (arg0==null) {
		return null;
	}
	// get field data
	// formattedValue
	int columnNum=0;
	final Object foFormattedValue = outputOI.getStructFieldData(arg0, outputFields.get(columnNum));
	final ObjectInspector oiFormattedValue = outputFields.get(columnNum).getFieldObjectInspector();
	String formattedValue = String.valueOf(((PrimitiveObjectInspector) oiFormattedValue).getPrimitiveJavaObject(foFormattedValue));
	// comment
	columnNum=1;
	final Object foComment= outputOI.getStructFieldData(arg0, outputFields.get(columnNum));
	final ObjectInspector oiComment = outputFields.get(columnNum).getFieldObjectInspector();
	String comment = String.valueOf(((PrimitiveObjectInspector) oiComment).getPrimitiveJavaObject(foComment));
	// formula
	columnNum=2;
	final Object foFormula= outputOI.getStructFieldData(arg0, outputFields.get(columnNum));
	final ObjectInspector oiFormula = outputFields.get(columnNum).getFieldObjectInspector();
	String formula = String.valueOf(((PrimitiveObjectInspector) oiFormula).getPrimitiveJavaObject(foFormula));
	// address
	columnNum=3;
	final Object foAddress= outputOI.getStructFieldData(arg0, outputFields.get(columnNum));
	final ObjectInspector oiAddress = outputFields.get(columnNum).getFieldObjectInspector();
	String address = String.valueOf(((PrimitiveObjectInspector) oiAddress).getPrimitiveJavaObject(foAddress));
	// sheetName
	columnNum=4;
	final Object foSheetName= outputOI.getStructFieldData(arg0, outputFields.get(columnNum));
	final ObjectInspector oiSheetName = outputFields.get(columnNum).getFieldObjectInspector();
	String sheetName = String.valueOf(((PrimitiveObjectInspector) oiSheetName).getPrimitiveJavaObject(foSheetName));
	return new SpreadSheetCellDAO(formattedValue,comment,formula,address,sheetName);
}
 
Example 17
Source File: HiveTextReader.java    From dremio-oss with Apache License 2.0 4 votes vote down vote up
@Override
public int populateData() throws IOException, SerDeException {
  final SkipRecordsInspector skipRecordsInspector = this.skipRecordsInspector;
  final RecordReader<Object, Object> reader = this.reader;
  final Converter partTblObjectInspectorConverter = this.partTblObjectInspectorConverter;
  final Object key = this.key;

  final int numRowsPerBatch = (int) this.numRowsPerBatch;

  final StructField[] selectedStructFieldRefs = this.selectedStructFieldRefs;
  final AbstractSerDe partitionSerDe = this.partitionSerDe;
  final StructObjectInspector finalOI = this.finalOI;
  final ObjectInspector[] selectedColumnObjInspectors = this.selectedColumnObjInspectors;
  final HiveFieldConverter[] selectedColumnFieldConverters = this.selectedColumnFieldConverters;
  final ValueVector[] vectors = this.vectors;

  skipRecordsInspector.reset();
  Object value;

  int recordCount = 0;

  while (recordCount < numRowsPerBatch) {
    try (OperatorStats.WaitRecorder recorder = OperatorStats.getWaitRecorder(this.context.getStats())) {
      boolean hasNext = reader.next(key, value = skipRecordsInspector.getNextValue());
      if (!hasNext) {
        break;
      }
    }
    catch(FSError e) {
      throw HadoopFileSystemWrapper.propagateFSError(e);
    }
    if (skipRecordsInspector.doSkipHeader(recordCount++)) {
      continue;
    }
    Object bufferedValue = skipRecordsInspector.bufferAdd(value);
    if (bufferedValue != null) {
      Object deSerializedValue = partitionSerDe.deserialize((Writable) bufferedValue);
      if (partTblObjectInspectorConverter != null) {
        deSerializedValue = partTblObjectInspectorConverter.convert(deSerializedValue);
      }

      for (int i = 0; i < selectedStructFieldRefs.length; i++) {
        Object hiveValue = finalOI.getStructFieldData(deSerializedValue, selectedStructFieldRefs[i]);
        if (hiveValue != null) {
          selectedColumnFieldConverters[i].setSafeValue(selectedColumnObjInspectors[i], hiveValue, vectors[i], skipRecordsInspector.getActualCount());
        }
      }
      skipRecordsInspector.incrementActualCount();
    }
    skipRecordsInspector.incrementTempCount();
  }
  for (int i = 0; i < selectedStructFieldRefs.length; i++) {
    vectors[i].setValueCount(skipRecordsInspector.getActualCount());
  }

  skipRecordsInspector.updateContinuance();
  return skipRecordsInspector.getActualCount();
}
 
Example 18
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_array_of_records() throws Exception {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
    List<GenericRecord> innerRecords = new LinkedList<>();

    final GenericRecord outerRecord = new GenericData.Record(schema);

    Schema arraySchema = schema.getField("records").schema();
    Schema innerRecordSchema = arraySchema.getElementType();
    final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
    innerRecord1.put("name", "Joe");
    innerRecord1.put("age", 42);

    innerRecords.add(innerRecord1);

    final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
    innerRecord2.put("name", "Mary");
    innerRecord2.put("age", 28);

    innerRecords.add(innerRecord2);

    GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
    outerRecord.put("records", array);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        dataFileWriter.append(outerRecord);
    }
    out.close();

    // Build a flow file from the Avro record
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
            "(records ARRAY<STRUCT<name:STRING, age:INT>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));

    // Verify the record contains an array
    Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
    assertTrue(arrayFieldObject instanceof ArrayList);
    ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
    assertEquals(2, arrayField.size());

    // Verify the first element. Should be a record with two fields "name" and "age"
    Object element = arrayField.get(0);
    assertTrue(element instanceof OrcStruct);
    StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
    Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Joe", nameObject.toString());
    Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(42, ((IntWritable) ageObject).get());

    // Verify the first element. Should be a record with two fields "name" and "age"
    element = arrayField.get(1);
    assertTrue(element instanceof OrcStruct);
    nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Mary", nameObject.toString());
    ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(28, ((IntWritable) ageObject).get());
}
 
Example 19
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_complex_record() throws Exception {

    Map<String, Double> mapData1 = new TreeMap<String, Double>() {{
        put("key1", 1.0);
        put("key2", 2.0);
    }};

    GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, Double> mapData2 = new TreeMap<String, Double>() {{
        put("key1", 3.0);
        put("key2", 4.0);
    }};

    record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " +
            "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());

    // This is pretty awkward and messy. The map object is a Map (not a MapWritable) but the keys are writables (in this case Text)
    // and so are the values (DoubleWritables in this case).
    Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap"));
    assertTrue(mapFieldObject instanceof Map);
    Map map = (Map) mapFieldObject;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);

    mapValue = map.get(new Text("key2"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
}
 
Example 20
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_primitive_record() throws Exception {
    GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World");

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    // Put another record in
    record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record");
    fileWriter.append(record);
    // And one more
    record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!");
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());
    Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string"));
    assertTrue(stringFieldObject instanceof Text);
    assertEquals("World", stringFieldObject.toString());

}