org.apache.hadoop.hive.ql.io.orc.OrcStruct Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.io.orc.OrcStruct. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OrcTestTools.java    From incubator-gobblin with Apache License 2.0 6 votes vote down vote up
/**
 * Given the fact that we couldn't access OrcStruct easily, here uses the hacky way(reflection)
 * to go around access modifier for integration test purpose only.
 * @param realRow A row containing a list of Java objects.
 * @param struct An {@link OrcStruct} which essentially is a list of {@link Writable} objects.
 */
private boolean compareJavaRowAndOrcStruct(Object realRow, OrcStruct struct) {
  boolean isIdentical = true;
  ArrayList<Object> javaObjRow = (ArrayList) realRow;

  try {
    Field objectArr = OrcStruct.class.getDeclaredField("fields");
    objectArr.setAccessible(true);
    Object[] dataArr = (Object[]) objectArr.get(struct);

    int index = 0;
    for (Object dataField : dataArr) {
      if (dataField instanceof OrcStruct) {
        isIdentical = isIdentical && compareJavaRowAndOrcStruct(javaObjRow.get(index), (OrcStruct) dataField);
      } else {
        isIdentical = isIdentical && objCastHelper(javaObjRow.get(index), (Writable) dataField);
      }
      index++;
    }
  } catch (NoSuchFieldException | IllegalAccessException nfe) {
    throw new RuntimeException("Failed in compare a java object row and orcstruct");
  }

  return isIdentical;
}
 
Example #2
Source File: OrcTestTools.java    From incubator-gobblin with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param expected
 * @param observed
 * @param allowDifferentOrder ORC tools will not use this parameter currently.
 * @param blacklistRecordFields ORC tools will not use this parameter currently.
 * @return If two sets of files are identical.
 * Note that there might be an ordering issue in this comparison method. When one is drafting an ORC integration
 * test, try to name all json files differently.
 */
@Override
public boolean checkSameFilesAndRecords(TreeMap<String, OrcRowIterator> expected,
    TreeMap<String, OrcRowIterator> observed, boolean allowDifferentOrder, Collection<String> blacklistRecordFields,
    boolean allowDifferentSchema) {
  Iterator<String> keys1 = expected.navigableKeySet().iterator();
  Iterator<String> keys2 = observed.navigableKeySet().iterator();

  return compareIterators(keys1, keys2, (key1, key2) -> {
    // ORC file doesn't have extension by Linkedin's convention.
    if (!removeExtension(key1).equals(key2)) {
      log.error(String.format("Mismatched files: %s and %s", key1, key2));
      return false;
    }

    OrcRowIterator it1 = expected.get(key1);
    OrcRowIterator it2 = observed.get(key2);

    if (!it1.getTypeInfo().equals(it2.getTypeInfo())) {
      log.error(String.format("Mismatched Typeinfo: %s and %s", key1, key2));
      return false;
    }

    boolean result = true;
    while (it1.hasNext()) {
      if (!it2.hasNext() || !result) {
        return false;
      }
      result = compareJavaRowAndOrcStruct(((AvroRow) it1.next()).getRow(), (OrcStruct) it2.next());
    }
    return result;
  });
}
 
Example #3
Source File: OrcStorage.java    From spork with Apache License 2.0 5 votes vote down vote up
@Override
public void setLocation(String location, Job job) throws IOException {
    Properties p = UDFContext.getUDFContext().getUDFProperties(this.getClass());
    if (!UDFContext.getUDFContext().isFrontend()) {
        typeInfo = (TypeInfo)ObjectSerializer.deserialize(p.getProperty(signature + SchemaSignatureSuffix));
    } else if (typeInfo == null) {
        typeInfo = getTypeInfo(location, job);
    }
    if (typeInfo != null && oi == null) {
        oi = OrcStruct.createObjectInspector(typeInfo);
    }
    if (!UDFContext.getUDFContext().isFrontend()) {
        if (p.getProperty(signature + RequiredColumnsSuffix) != null) {
            mRequiredColumns = (boolean[]) ObjectSerializer.deserialize(p
                    .getProperty(signature + RequiredColumnsSuffix));
            job.getConfiguration().setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false);
            job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR,
                    getReqiredColumnIdString(mRequiredColumns));
            if (p.getProperty(signature + SearchArgsSuffix) != null) {
                // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set
                job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
                        getReqiredColumnNamesString(getSchema(location, job), mRequiredColumns));
            }
        } else if (p.getProperty(signature + SearchArgsSuffix) != null) {
            // Bug in setSearchArgument which always expects READ_COLUMN_NAMES_CONF_STR to be set
            job.getConfiguration().set(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR,
                    getReqiredColumnNamesString(getSchema(location, job)));
        }
        if (p.getProperty(signature + SearchArgsSuffix) != null) {
            job.getConfiguration().set(SARG_PUSHDOWN, p.getProperty(signature + SearchArgsSuffix));
        }

    }
    FileInputFormat.setInputPaths(job, location);
}
 
Example #4
Source File: PutORCTest.java    From nifi with Apache License 2.0 5 votes vote down vote up
private void verifyORCUsers(final Path orcUsers, final int numExpectedUsers, BiFunction<List<Object>, Integer, Void> assertFunction) throws IOException {
    Reader reader = OrcFile.createReader(orcUsers, OrcFile.readerOptions(testConf));
    RecordReader recordReader = reader.rows();

    TypeInfo typeInfo =
            TypeInfoUtils.getTypeInfoFromTypeString("struct<name:string,favorite_number:int,favorite_color:string,scale:double>");
    StructObjectInspector inspector = (StructObjectInspector)
            OrcStruct.createObjectInspector(typeInfo);

    int currUser = 0;
    Object nextRecord = null;
    while ((nextRecord = recordReader.next(nextRecord)) != null) {
        Assert.assertNotNull(nextRecord);
        Assert.assertTrue("Not an OrcStruct", nextRecord instanceof OrcStruct);
        List<Object> x = inspector.getStructFieldsDataAsList(nextRecord);

        if (assertFunction == null) {
            assertEquals("name" + currUser, x.get(0).toString());
            assertEquals(currUser, ((IntWritable) x.get(1)).get());
            assertEquals("blue" + currUser, x.get(2).toString());
            assertEquals(10.0 * currUser, ((DoubleWritable) x.get(3)).get(), Double.MIN_VALUE);
        } else {
            assertFunction.apply(x, currUser);
        }
        currUser++;
    }

    assertEquals(numExpectedUsers, currUser);
}
 
Example #5
Source File: OrcTester.java    From presto with Apache License 2.0 4 votes vote down vote up
private static Object decodeRecordReaderValue(Type type, Object actualValue)
{
    if (actualValue instanceof BooleanWritable) {
        actualValue = ((BooleanWritable) actualValue).get();
    }
    else if (actualValue instanceof ByteWritable) {
        actualValue = ((ByteWritable) actualValue).get();
    }
    else if (actualValue instanceof BytesWritable) {
        actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes());
    }
    else if (actualValue instanceof DateWritable) {
        actualValue = new SqlDate(((DateWritable) actualValue).getDays());
    }
    else if (actualValue instanceof DoubleWritable) {
        actualValue = ((DoubleWritable) actualValue).get();
    }
    else if (actualValue instanceof FloatWritable) {
        actualValue = ((FloatWritable) actualValue).get();
    }
    else if (actualValue instanceof IntWritable) {
        actualValue = ((IntWritable) actualValue).get();
    }
    else if (actualValue instanceof HiveCharWritable) {
        actualValue = ((HiveCharWritable) actualValue).getPaddedValue().toString();
    }
    else if (actualValue instanceof LongWritable) {
        actualValue = ((LongWritable) actualValue).get();
    }
    else if (actualValue instanceof ShortWritable) {
        actualValue = ((ShortWritable) actualValue).get();
    }
    else if (actualValue instanceof HiveDecimalWritable) {
        DecimalType decimalType = (DecimalType) type;
        HiveDecimalWritable writable = (HiveDecimalWritable) actualValue;
        // writable messes with the scale so rescale the values to the Presto type
        BigInteger rescaledValue = rescale(writable.getHiveDecimal().unscaledValue(), writable.getScale(), decimalType.getScale());
        actualValue = new SqlDecimal(rescaledValue, decimalType.getPrecision(), decimalType.getScale());
    }
    else if (actualValue instanceof Text) {
        actualValue = actualValue.toString();
    }
    else if (actualValue instanceof TimestampWritable) {
        TimestampWritable timestamp = (TimestampWritable) actualValue;
        actualValue = sqlTimestampOf((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), SESSION);
    }
    else if (actualValue instanceof OrcStruct) {
        List<Object> fields = new ArrayList<>();
        OrcStruct structObject = (OrcStruct) actualValue;
        for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) {
            fields.add(OrcUtil.getFieldValue(structObject, fieldId));
        }
        actualValue = decodeRecordReaderStruct(type, fields);
    }
    else if (actualValue instanceof List) {
        actualValue = decodeRecordReaderList(type, ((List<?>) actualValue));
    }
    else if (actualValue instanceof Map) {
        actualValue = decodeRecordReaderMap(type, (Map<?, ?>) actualValue);
    }
    return actualValue;
}
 
Example #6
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_primitive_record() throws Exception {
    GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World");

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    // Put another record in
    record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record");
    fileWriter.append(record);
    // And one more
    record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!");
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());
    Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string"));
    assertTrue(stringFieldObject instanceof Text);
    assertEquals("World", stringFieldObject.toString());

}
 
Example #7
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_complex_record() throws Exception {

    Map<String, Double> mapData1 = new TreeMap<String, Double>() {{
        put("key1", 1.0);
        put("key2", 2.0);
    }};

    GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, Double> mapData2 = new TreeMap<String, Double>() {{
        put("key1", 3.0);
        put("key2", 4.0);
    }};

    record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " +
            "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());

    // This is pretty awkward and messy. The map object is a Map (not a MapWritable) but the keys are writables (in this case Text)
    // and so are the values (DoubleWritables in this case).
    Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap"));
    assertTrue(mapFieldObject instanceof Map);
    Map map = (Map) mapFieldObject;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);

    mapValue = map.get(new Text("key2"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
}
 
Example #8
Source File: TestConvertAvroToORC.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_array_of_records() throws Exception {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
    List<GenericRecord> innerRecords = new LinkedList<>();

    final GenericRecord outerRecord = new GenericData.Record(schema);

    Schema arraySchema = schema.getField("records").schema();
    Schema innerRecordSchema = arraySchema.getElementType();
    final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
    innerRecord1.put("name", "Joe");
    innerRecord1.put("age", 42);

    innerRecords.add(innerRecord1);

    final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
    innerRecord2.put("name", "Mary");
    innerRecord2.put("age", 28);

    innerRecords.add(innerRecord2);

    GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
    outerRecord.put("records", array);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        dataFileWriter.append(outerRecord);
    }
    out.close();

    // Build a flow file from the Avro record
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
            "(records ARRAY<STRUCT<name:STRING, age:INT>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));

    // Verify the record contains an array
    Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
    assertTrue(arrayFieldObject instanceof ArrayList);
    ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
    assertEquals(2, arrayField.size());

    // Verify the first element. Should be a record with two fields "name" and "age"
    Object element = arrayField.get(0);
    assertTrue(element instanceof OrcStruct);
    StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
    Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Joe", nameObject.toString());
    Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(42, ((IntWritable) ageObject).get());

    // Verify the first element. Should be a record with two fields "name" and "age"
    element = arrayField.get(1);
    assertTrue(element instanceof OrcStruct);
    nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Mary", nameObject.toString());
    ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(28, ((IntWritable) ageObject).get());
}
 
Example #9
Source File: TestOrcStorage.java    From spork with Apache License 2.0 4 votes vote down vote up
@SuppressWarnings("rawtypes")
private void compareData(Object expected, Object actual) {
    if (expected instanceof Text) {
        assertEquals(String.class, actual.getClass());
        assertEquals(expected.toString(), actual);
    } else if (expected instanceof ShortWritable) {
        assertEquals(Integer.class, actual.getClass());
        assertEquals((int)((ShortWritable) expected).get(), actual);
    } else if (expected instanceof IntWritable) {
        assertEquals(Integer.class, actual.getClass());
        assertEquals(((IntWritable) expected).get(), actual);
    } else if (expected instanceof LongWritable) {
        assertEquals(Long.class, actual.getClass());
        assertEquals(((LongWritable) expected).get(), actual);
    } else if (expected instanceof FloatWritable) {
        assertEquals(Float.class, actual.getClass());
        assertEquals(((FloatWritable) expected).get(), actual);
    } else if (expected instanceof HiveDecimalWritable) {
        assertEquals(BigDecimal.class, actual.getClass());
        assertEquals(((HiveDecimalWritable) expected).toString(), actual.toString());
    } else if (expected instanceof DoubleWritable) {
        assertEquals(Double.class, actual.getClass());
        assertEquals(((DoubleWritable) expected).get(), actual);
    } else if (expected instanceof BooleanWritable) {
        assertEquals(Boolean.class, actual.getClass());
        assertEquals(((BooleanWritable) expected).get(), actual);
    } else if (expected instanceof TimestampWritable) {
        assertEquals(DateTime.class, actual.getClass());
        assertEquals(((TimestampWritable) expected).getTimestamp().getTime(),
                ((DateTime) actual).getMillis());
    } else if (expected instanceof BytesWritable) {
        assertEquals(DataByteArray.class, actual.getClass());
        BytesWritable bw = (BytesWritable) expected;
        assertEquals(new DataByteArray(bw.getBytes(), 0, bw.getLength()), actual);
    } else if (expected instanceof ByteWritable) {
        assertEquals(Integer.class, actual.getClass());
        assertEquals((int) ((ByteWritable) expected).get(), actual);
    } else if (expected instanceof OrcStruct) {
        assertEquals(BinSedesTuple.class, actual.getClass());
        // TODO: compare actual values. No getters in OrcStruct
    } else if (expected instanceof ArrayList) {
        assertEquals(DefaultDataBag.class, actual.getClass());
        // TODO: compare actual values. No getters in OrcStruct
    } else if (expected instanceof HashMap) {
        assertEquals(HashMap.class, actual.getClass());
        assertEquals(((HashMap) expected).size(), ((HashMap) actual).size());
        // TODO: compare actual values. No getters in OrcStruct
    } else if (expected == null) {
        assertEquals(expected, actual);
    } else {
        Assert.fail("Unknown object type: " + expected.getClass().getName());
    }
}
 
Example #10
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_primitive_record() throws Exception {
    GenericData.Record record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(10, 20L, true, 30.0f, 40, StandardCharsets.UTF_8.encode("Hello"), "World");

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);
    // Put another record in
    record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(1, 2L, false, 3.0f, 4L, StandardCharsets.UTF_8.encode("I am"), "another record");
    fileWriter.append(record);
    // And one more
    record = TestNiFiOrcUtils.buildPrimitiveAvroRecord(100, 200L, true, 300.0f, 400L, StandardCharsets.UTF_8.encode("Me"), "too!");
    fileWriter.append(record);
    fileWriter.flush();
    fileWriter.close();
    out.close();
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test.avro");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS test_record (int INT, long BIGINT, boolean BOOLEAN, float FLOAT, double DOUBLE, bytes BINARY, string STRING)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("3", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildPrimitiveOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("int"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());
    Object stringFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("string"));
    assertTrue(stringFieldObject instanceof Text);
    assertEquals("World", stringFieldObject.toString());

}
 
Example #11
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_complex_record() throws Exception {

    Map<String, Double> mapData1 = new TreeMap<String, Double>() {{
        put("key1", 1.0);
        put("key2", 2.0);
    }};

    GenericData.Record record = TestNiFiOrcUtils.buildComplexAvroRecord(10, mapData1, "DEF", 3.0f, Arrays.asList(10, 20));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, Double> mapData2 = new TreeMap<String, Double>() {{
        put("key1", 3.0);
        put("key2", 4.0);
    }};

    record = TestNiFiOrcUtils.buildComplexAvroRecord(null, mapData2, "XYZ", 4L, Arrays.asList(100, 200));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS complex_record " +
            "(myInt INT, myMap MAP<STRING, DOUBLE>, myEnum STRING, myLongOrFloat UNIONTYPE<BIGINT, FLOAT>, myIntList ARRAY<INT>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);

    // Check some fields in the first row
    Object intFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myInt"));
    assertTrue(intFieldObject instanceof IntWritable);
    assertEquals(10, ((IntWritable) intFieldObject).get());

    Object mapFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMap"));
    assertTrue(mapFieldObject instanceof Map);
    Map map = (Map) mapFieldObject;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(1.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);

    mapValue = map.get(new Text("key2"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof DoubleWritable);
    assertEquals(2.0, ((DoubleWritable) mapValue).get(), Double.MIN_VALUE);
}
 
Example #12
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_array_of_records() throws Exception {
    final Schema schema = new Schema.Parser().parse(new File("src/test/resources/array_of_records.avsc"));
    List<GenericRecord> innerRecords = new LinkedList<>();

    final GenericRecord outerRecord = new GenericData.Record(schema);

    Schema arraySchema = schema.getField("records").schema();
    Schema innerRecordSchema = arraySchema.getElementType();
    final GenericRecord innerRecord1 = new GenericData.Record(innerRecordSchema);
    innerRecord1.put("name", "Joe");
    innerRecord1.put("age", 42);

    innerRecords.add(innerRecord1);

    final GenericRecord innerRecord2 = new GenericData.Record(innerRecordSchema);
    innerRecord2.put("name", "Mary");
    innerRecord2.put("age", 28);

    innerRecords.add(innerRecord2);

    GenericData.Array<GenericRecord> array = new GenericData.Array<>(arraySchema, innerRecords);
    outerRecord.put("records", array);

    final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
        dataFileWriter.create(schema, out);
        dataFileWriter.append(outerRecord);
    }
    out.close();

    // Build a flow file from the Avro record
    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS org_apache_nifi_outer_record " +
            "(records ARRAY<STRUCT<name:STRING, age:INT>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("1", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(schema));

    // Verify the record contains an array
    Object arrayFieldObject = inspector.getStructFieldData(o, inspector.getStructFieldRef("records"));
    assertTrue(arrayFieldObject instanceof ArrayList);
    ArrayList<?> arrayField = (ArrayList<?>) arrayFieldObject;
    assertEquals(2, arrayField.size());

    // Verify the first element. Should be a record with two fields "name" and "age"
    Object element = arrayField.get(0);
    assertTrue(element instanceof OrcStruct);
    StructObjectInspector elementInspector = (StructObjectInspector) OrcStruct.createObjectInspector(NiFiOrcUtils.getOrcField(innerRecordSchema));
    Object nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Joe", nameObject.toString());
    Object ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(42, ((IntWritable) ageObject).get());

    // Verify the first element. Should be a record with two fields "name" and "age"
    element = arrayField.get(1);
    assertTrue(element instanceof OrcStruct);
    nameObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("name"));
    assertTrue(nameObject instanceof Text);
    assertEquals("Mary", nameObject.toString());
    ageObject = elementInspector.getStructFieldData(element, elementInspector.getStructFieldRef("age"));
    assertTrue(ageObject instanceof IntWritable);
    assertEquals(28, ((IntWritable) ageObject).get());
}
 
Example #13
Source File: TestConvertAvroToORC.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test
public void test_onTrigger_nested_complex_record() throws Exception {

    Map<String, List<Double>> mapData1 = new TreeMap<String, List<Double>>() {{
        put("key1", Arrays.asList(1.0, 2.0));
        put("key2", Arrays.asList(3.0, 4.0));
    }};

    Map<String, String> arrayMap11 = new TreeMap<String, String>() {{
        put("key1", "v1");
        put("key2", "v2");
    }};
    Map<String, String> arrayMap12 = new TreeMap<String, String>() {{
        put("key3", "v3");
        put("key4", "v4");
    }};

    GenericData.Record record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData1, Arrays.asList(arrayMap11, arrayMap12));

    DatumWriter<GenericData.Record> writer = new GenericDatumWriter<>(record.getSchema());
    DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(writer);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    fileWriter.create(record.getSchema(), out);
    fileWriter.append(record);

    // Put another record in
    Map<String, List<Double>> mapData2 = new TreeMap<String, List<Double>>() {{
        put("key1", Arrays.asList(-1.0, -2.0));
        put("key2", Arrays.asList(-3.0, -4.0));
    }};

    Map<String, String> arrayMap21 = new TreeMap<String, String>() {{
        put("key1", "v-1");
        put("key2", "v-2");
    }};
    Map<String, String> arrayMap22 = new TreeMap<String, String>() {{
        put("key3", "v-3");
        put("key4", "v-4");
    }};

    record = TestNiFiOrcUtils.buildNestedComplexAvroRecord(mapData2, Arrays.asList(arrayMap21, arrayMap22));
    fileWriter.append(record);

    fileWriter.flush();
    fileWriter.close();
    out.close();

    Map<String, String> attributes = new HashMap<String, String>() {{
        put(CoreAttributes.FILENAME.key(), "test");
    }};
    runner.enqueue(out.toByteArray(), attributes);
    runner.run();

    runner.assertAllFlowFilesTransferred(ConvertAvroToORC.REL_SUCCESS, 1);

    // Write the flow file out to disk, since the ORC Reader needs a path
    MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(ConvertAvroToORC.REL_SUCCESS).get(0);
    assertEquals("CREATE EXTERNAL TABLE IF NOT EXISTS nested_complex_record " +
            "(myMapOfArray MAP<STRING, ARRAY<DOUBLE>>, myArrayOfMap ARRAY<MAP<STRING, STRING>>)"
            + " STORED AS ORC", resultFlowFile.getAttribute(ConvertAvroToORC.HIVE_DDL_ATTRIBUTE));
    assertEquals("2", resultFlowFile.getAttribute(ConvertAvroToORC.RECORD_COUNT_ATTRIBUTE));
    assertEquals("test.orc", resultFlowFile.getAttribute(CoreAttributes.FILENAME.key()));
    byte[] resultContents = runner.getContentAsByteArray(resultFlowFile);
    FileOutputStream fos = new FileOutputStream("target/test1.orc");
    fos.write(resultContents);
    fos.flush();
    fos.close();

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    Reader reader = OrcFile.createReader(new Path("target/test1.orc"), OrcFile.readerOptions(conf).filesystem(fs));
    RecordReader rows = reader.rows();
    Object o = rows.next(null);
    assertNotNull(o);
    assertTrue(o instanceof OrcStruct);
    TypeInfo resultSchema = TestNiFiOrcUtils.buildNestedComplexOrcSchema();
    StructObjectInspector inspector = (StructObjectInspector) OrcStruct.createObjectInspector(resultSchema);


    // check values
    Object myMapOfArray = inspector.getStructFieldData(o, inspector.getStructFieldRef("myMapOfArray"));
    assertTrue(myMapOfArray instanceof Map);
    Map map = (Map) myMapOfArray;
    Object mapValue = map.get(new Text("key1"));
    assertNotNull(mapValue);
    assertTrue(mapValue instanceof List);
    assertEquals(Arrays.asList(new DoubleWritable(1.0), new DoubleWritable(2.0)), mapValue);

    Object myArrayOfMap = inspector.getStructFieldData(o, inspector.getStructFieldRef("myArrayOfMap"));
    assertTrue(myArrayOfMap instanceof List);
    List list = (List) myArrayOfMap;
    Object el0 = list.get(0);
    assertNotNull(el0);
    assertTrue(el0 instanceof Map);
    assertEquals(new Text("v1"), ((Map) el0).get(new Text("key1")));
}