package com.facebook.hive.orc; import java.io.IOException; import java.sql.Timestamp; import junit.framework.Assert; import org.apache.hadoop.hive.serde2.ReaderWriterProfiler; import org.apache.hadoop.hive.serde2.io.ByteWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.io.TimestampWritable; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.junit.Test; import com.facebook.hive.orc.lazy.LazyBinaryTreeReader; import com.facebook.hive.orc.lazy.LazyBooleanTreeReader; import com.facebook.hive.orc.lazy.LazyByteTreeReader; import com.facebook.hive.orc.lazy.LazyDoubleTreeReader; import com.facebook.hive.orc.lazy.LazyFloatTreeReader; import com.facebook.hive.orc.lazy.LazyIntTreeReader; import com.facebook.hive.orc.lazy.LazyLongTreeReader; import com.facebook.hive.orc.lazy.LazyShortTreeReader; import com.facebook.hive.orc.lazy.LazyStringTreeReader; import com.facebook.hive.orc.lazy.LazyTimestampTreeReader; import com.facebook.hive.orc.lazy.OrcLazyBinary; import com.facebook.hive.orc.lazy.OrcLazyBoolean; import com.facebook.hive.orc.lazy.OrcLazyByte; import com.facebook.hive.orc.lazy.OrcLazyDouble; import com.facebook.hive.orc.lazy.OrcLazyFloat; import com.facebook.hive.orc.lazy.OrcLazyInt; import com.facebook.hive.orc.lazy.OrcLazyList; import com.facebook.hive.orc.lazy.OrcLazyListObjectInspector; import com.facebook.hive.orc.lazy.OrcLazyLong; import com.facebook.hive.orc.lazy.OrcLazyMap; import com.facebook.hive.orc.lazy.OrcLazyMapObjectInspector; import com.facebook.hive.orc.lazy.OrcLazyObjectInspectorUtils; import com.facebook.hive.orc.lazy.OrcLazyShort; import com.facebook.hive.orc.lazy.OrcLazyString; import com.facebook.hive.orc.lazy.OrcLazyStruct; import com.facebook.hive.orc.lazy.OrcLazyStructObjectInspector; import com.facebook.hive.orc.lazy.OrcLazyTimestamp; import com.facebook.hive.orc.lazy.OrcLazyUnionObjectInspector; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; public class TestObjectInspector { private static final String FIELD_0 = "field0"; private static final ListTypeInfo LIST_TYPE_INFO = (ListTypeInfo) TypeInfoFactory.getListTypeInfo(TypeInfoFactory.stringTypeInfo); private static final MapTypeInfo MAP_TYPE_INFO = (MapTypeInfo) TypeInfoFactory.getMapTypeInfo( TypeInfoFactory.stringTypeInfo, TypeInfoFactory.stringTypeInfo); private static final StructTypeInfo STRUCT_TYPE_INFO = (StructTypeInfo) TypeInfoFactory.getStructTypeInfo(Lists.newArrayList(FIELD_0), Lists.newArrayList((TypeInfo) TypeInfoFactory.stringTypeInfo)); private static final UnionTypeInfo UNION_TYPE_INFO = (UnionTypeInfo) TypeInfoFactory.getUnionTypeInfo( Lists.newArrayList((TypeInfo) TypeInfoFactory.stringTypeInfo)); private static final ListObjectInspector LIST_OI = new OrcLazyListObjectInspector(LIST_TYPE_INFO); private static final MapObjectInspector MAP_OI = new OrcLazyMapObjectInspector(MAP_TYPE_INFO); private static final StructObjectInspector STRUCT_OI = new OrcLazyStructObjectInspector(STRUCT_TYPE_INFO); private static final UnionObjectInspector UNION_OI = new OrcLazyUnionObjectInspector(UNION_TYPE_INFO); private static final ListObjectInspector NON_LAZY_LIST_OI = new OrcStruct.OrcListObjectInspector(LIST_TYPE_INFO); private static final MapObjectInspector NON_LAZY_MAP_OI = new OrcStruct.OrcMapObjectInspector(MAP_TYPE_INFO); private static final StructObjectInspector NON_LAZY_STRUCT_OI = new OrcStruct.OrcStructInspector(STRUCT_TYPE_INFO); private static final UnionObjectInspector NON_LAZY_UNION_OI = new OrcUnion.OrcUnionObjectInspector(UNION_TYPE_INFO); @Test public void TestNullList() { Assert.assertNull(LIST_OI.getList(null)); Assert.assertEquals(-1, LIST_OI.getListLength(null)); Assert.assertNull(LIST_OI.getListElement(null, 0)); } @Test public void TestNullMap() { Assert.assertNull(MAP_OI.getMap(null)); Assert.assertEquals(-1, MAP_OI.getMapSize(null)); Assert.assertNull(MAP_OI.getMapValueElement(null, "key")); } @Test public void TestNullStruct() { Assert.assertNull(STRUCT_OI.getStructFieldData(null, STRUCT_OI.getStructFieldRef(FIELD_0))); Assert.assertNull(STRUCT_OI.getStructFieldsDataAsList(null)); } @Test public void TestNullUnion() { Assert.assertNull(UNION_OI.getField(null)); Assert.assertEquals(-1, UNION_OI.getTag(null)); } @Test public void TestNullNonLazyList() { Assert.assertNull(NON_LAZY_LIST_OI.getList(null)); Assert.assertEquals(-1, NON_LAZY_LIST_OI.getListLength(null)); Assert.assertNull(NON_LAZY_LIST_OI.getListElement(null, 0)); } @Test public void TestNullNonLazyMap() { Assert.assertNull(NON_LAZY_MAP_OI.getMap(null)); Assert.assertEquals(-1, NON_LAZY_MAP_OI.getMapSize(null)); Assert.assertNull(NON_LAZY_MAP_OI.getMapValueElement(null, "key")); } @Test public void TestNullNonLazyStruct() { Assert.assertNull(NON_LAZY_STRUCT_OI.getStructFieldData(null, NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0))); Assert.assertNull(NON_LAZY_STRUCT_OI.getStructFieldsDataAsList(null)); } @Test public void TestNullNonLazyUnion() { Assert.assertNull(NON_LAZY_UNION_OI.getField(null)); Assert.assertEquals(-1, NON_LAZY_UNION_OI.getTag(null)); } /** * Tests accessing indices of an array that are outside the allowed range of * [0, size - 1] */ @Test public void TestInvalidIndexArray() { OrcLazyList list = new OrcLazyList(null) { @Override public Object materialize() throws IOException { return Lists.newArrayList("a"); } }; // Test an index < 0 Assert.assertNull(LIST_OI.getListElement(list, -1)); // Test a valid index (control case) Assert.assertEquals("a", LIST_OI.getListElement(list, 0)); //Test an index >= the size of the list Assert.assertNull(LIST_OI.getListElement(list, 1)); } /** * Tests trying to get the value for a key in a map that doesn't exist */ @Test public void TestNonexistentKeyMap() { OrcLazyMap map = new OrcLazyMap(null) { @Override public Object materialize() throws IOException { return ImmutableMap.of("a", "b"); } }; // Test a key that exists (control case) Assert.assertEquals("b", MAP_OI.getMapValueElement(map, "a")); //Test a key that doesn't exist Assert.assertNull(MAP_OI.getMapValueElement(map, "z")); } /** * Tests that after copying a lazy binary object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyBinary() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyBinary lazyBinary = new OrcLazyBinary(new LazyBinaryTreeReader(0, 0) { int nextCalls = 0; @Override public Object next(Object previous) throws IOException { if (nextCalls == 0) { nextCalls++; return new BytesWritable("a".getBytes()); } throw new IOException("next should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); BinaryObjectInspector binaryOI = (BinaryObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.binaryTypeInfo); OrcLazyBinary lazyBinary2 = (OrcLazyBinary) binaryOI.copyObject(lazyBinary); Assert.assertEquals("a", new String(((BytesWritable) lazyBinary.materialize()).getBytes())); Assert.assertEquals("a", new String(((BytesWritable) lazyBinary2.materialize()).getBytes())); } /** * Tests that after copying a lazy boolean object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyBoolean() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyBoolean lazyBoolean = new OrcLazyBoolean(new LazyBooleanTreeReader(0, 0) { int nextCalls = 0; @Override public Object next(Object previous) throws IOException { if (nextCalls == 0) { return new BooleanWritable(true); } throw new IOException("next should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); BooleanObjectInspector booleanOI = (BooleanObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.booleanTypeInfo); OrcLazyBoolean lazyBoolean2 = (OrcLazyBoolean) booleanOI.copyObject(lazyBoolean); Assert.assertEquals(true, ((BooleanWritable) lazyBoolean.materialize()).get()); Assert.assertEquals(true, ((BooleanWritable) lazyBoolean2.materialize()).get()); } /** * Tests that after copying a lazy byte object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyByte() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyByte lazyByte = new OrcLazyByte(new LazyByteTreeReader(0, 0) { int nextCalls = 0; @Override public Object next(Object previous) throws IOException { if (nextCalls == 0) { return new ByteWritable((byte) 1); } throw new IOException("next should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); ByteObjectInspector byteOI = (ByteObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.byteTypeInfo); OrcLazyByte lazyByte2 = (OrcLazyByte) byteOI.copyObject(lazyByte); Assert.assertEquals(1, ((ByteWritable) lazyByte.materialize()).get()); Assert.assertEquals(1, ((ByteWritable) lazyByte2.materialize()).get()); } /** * Tests that after copying a lazy double object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyDouble() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyDouble lazyDouble = new OrcLazyDouble(new LazyDoubleTreeReader(0, 0) { int nextCalls = 0; @Override public Object next(Object previous) throws IOException { if (nextCalls == 0) { return new DoubleWritable(1.0); } throw new IOException("next should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); DoubleObjectInspector doubleOI = (DoubleObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.doubleTypeInfo); OrcLazyDouble lazyDouble2 = (OrcLazyDouble) doubleOI.copyObject(lazyDouble); Assert.assertEquals(1.0, ((DoubleWritable) lazyDouble.materialize()).get()); Assert.assertEquals(1.0, ((DoubleWritable) lazyDouble2.materialize()).get()); } /** * Tests that after copying a lazy float object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyFloat() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyFloat lazyFloat = new OrcLazyFloat(new LazyFloatTreeReader(0, 0) { int nextCalls = 0; @Override public Object next(Object previous) throws IOException { if (nextCalls == 0) { return new FloatWritable(1.0f); } throw new IOException("next should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); FloatObjectInspector floatOI = (FloatObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.floatTypeInfo); OrcLazyFloat lazyFloat2 = (OrcLazyFloat) floatOI.copyObject(lazyFloat); Assert.assertEquals(1.0f, ((FloatWritable) lazyFloat.materialize()).get()); Assert.assertEquals(1.0f, ((FloatWritable) lazyFloat2.materialize()).get()); } /** * Tests that after copying a lazy int object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyInt() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyInt lazyInt = new OrcLazyInt(new LazyIntTreeReader(0, 0) { int getCalls = 0; @Override public Object get(long currentRow, Object previous) throws IOException { if (getCalls == 0) { return new IntWritable(1); } throw new IOException("get should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); IntObjectInspector intOI = (IntObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.intTypeInfo); OrcLazyInt lazyInt2 = (OrcLazyInt) intOI.copyObject(lazyInt); Assert.assertEquals(1, ((IntWritable) lazyInt.materialize()).get()); Assert.assertEquals(1, ((IntWritable) lazyInt2.materialize()).get()); } /** * Tests that after copying a lazy long object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyLong() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyLong lazyLong = new OrcLazyLong(new LazyLongTreeReader(0, 0) { int getCalls = 0; @Override public Object get(long currentRow, Object previous) throws IOException { if (getCalls == 0) { return new LongWritable(1); } throw new IOException("get should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); LongObjectInspector longOI = (LongObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.longTypeInfo); OrcLazyLong lazyLong2 = (OrcLazyLong) longOI.copyObject(lazyLong); Assert.assertEquals(1, ((LongWritable) lazyLong.materialize()).get()); Assert.assertEquals(1, ((LongWritable) lazyLong2.materialize()).get()); } /** * Tests that after copying a lazy short object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyShort() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyShort lazyShort = new OrcLazyShort(new LazyShortTreeReader(0, 0) { int getCalls = 0; @Override public Object get(long currentRow, Object previous) throws IOException { if (getCalls == 0) { return new ShortWritable((short) 1); } throw new IOException("get should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); ShortObjectInspector shortOI = (ShortObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.shortTypeInfo); OrcLazyShort lazyShort2 = (OrcLazyShort) shortOI.copyObject(lazyShort); Assert.assertEquals(1, ((ShortWritable) lazyShort.materialize()).get()); Assert.assertEquals(1, ((ShortWritable) lazyShort2.materialize()).get()); } /** * Tests that after copying a lazy string object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyString() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyString lazyString = new OrcLazyString(new LazyStringTreeReader(0, 0) { int getCalls = 0; @Override public Object get(long currentRow, Object previous) throws IOException { if (getCalls == 0) { return new Text("a"); } throw new IOException("get should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); StringObjectInspector stringOI = (StringObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.stringTypeInfo); OrcLazyString lazyString2 = (OrcLazyString) stringOI.copyObject(lazyString); Assert.assertEquals("a", ((Text) lazyString.materialize()).toString()); Assert.assertEquals("a", ((Text) lazyString2.materialize()).toString()); } /** * Tests that after copying a lazy timestamp object, calling materialize on the original and the * copy doesn't advance the tree reader twice * @throws Exception */ @Test public void TestCopyTimestamp() throws Exception { ReaderWriterProfiler.setProfilerOptions(null); OrcLazyTimestamp lazyTimestamp = new OrcLazyTimestamp(new LazyTimestampTreeReader(0, 0) { int nextCalls = 0; @Override public Object next(Object previous) throws IOException { if (nextCalls == 0) { return new TimestampWritable(new Timestamp(1)); } throw new IOException("next should only be called once"); } @Override protected boolean seekToRow(long currentRow) throws IOException { return true; } }); TimestampObjectInspector timestampOI = (TimestampObjectInspector) OrcLazyObjectInspectorUtils.createLazyObjectInspector(TypeInfoFactory.timestampTypeInfo); OrcLazyTimestamp lazyTimestamp2 = (OrcLazyTimestamp) timestampOI.copyObject(lazyTimestamp); Assert.assertEquals(new Timestamp(1), ((TimestampWritable) lazyTimestamp.materialize()).getTimestamp()); Assert.assertEquals(new Timestamp(1), ((TimestampWritable) lazyTimestamp2.materialize()).getTimestamp()); } /** * Tests that fields can be accessed from the OrcLazyStructObjectInspector in a case * insensitive manner. * @throws Exception */ @Test public void testCaseInsensitiveFieldsLazyStruct() throws Exception { OrcLazyStruct struct = new OrcLazyStruct(null) { @Override public Object materialize() throws IOException { OrcStruct struct = new OrcStruct(Lists.newArrayList(FIELD_0)); struct.setFieldValue(0, new Text("a")); return struct; } }; // Test control case (cases match) StructField field = STRUCT_OI.getStructFieldRef(FIELD_0); Assert.assertEquals("a", ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject( STRUCT_OI.getStructFieldData(struct, field))); // Test upper case field = STRUCT_OI.getStructFieldRef(FIELD_0.toUpperCase()); Assert.assertEquals("a", ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject( STRUCT_OI.getStructFieldData(struct, field))); // Test lower case (even if someone changes the value of FIELD_0 in the future either upper // or lower case should be different from the actual case) field = STRUCT_OI.getStructFieldRef(FIELD_0.toLowerCase()); Assert.assertEquals("a", ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject( STRUCT_OI.getStructFieldData(struct, field))); } /** * Tests that fields can be accessed from the OrcStructObjectInspector in a case * insensitive manner. * @throws Exception */ @Test public void testCaseInsensitiveFieldsStruct() throws Exception { OrcStruct struct = new OrcStruct(Lists.newArrayList(FIELD_0)); struct.setFieldValue(0, new Text("a")); // Test control case (cases match) StructField field = NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0); Assert.assertEquals("a", ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject( NON_LAZY_STRUCT_OI.getStructFieldData(struct, field))); // Test upper case field = NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0.toUpperCase()); Assert.assertEquals("a", ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject( NON_LAZY_STRUCT_OI.getStructFieldData(struct, field))); // Test lower case (even if someone changes the value of FIELD_0 in the future either upper // or lower case should be different from the actual case) field = NON_LAZY_STRUCT_OI.getStructFieldRef(FIELD_0.toLowerCase()); Assert.assertEquals("a", ((StringObjectInspector) field.getFieldObjectInspector()).getPrimitiveJavaObject( NON_LAZY_STRUCT_OI.getStructFieldData(struct, field))); } }