Java Code Examples for org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: incubator-hivemall   Source File: ArgrankUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 1) {
        throw new UDFArgumentLengthException(
            "argrank(array<ANY> a) takes exactly 1 argument: " + argOIs.length);
    }
    ObjectInspector argOI0 = argOIs[0];
    if (argOI0.getCategory() != Category.LIST) {
        throw new UDFArgumentException(
            "argrank(array<ANY> a) expects array<ANY> for the first argument: "
                    + argOI0.getTypeName());
    }

    this.listOI = HiveUtils.asListOI(argOI0);
    this.elemOI = listOI.getListElementObjectInspector();

    return ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableIntObjectInspector);
}
 
Example 2
Source Project: incubator-hivemall   Source File: OnehotEncodingUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Nonnull
private static StructObjectInspector internalMergeOutputOI(
        @CheckForNull PrimitiveObjectInspector[] inputOIs) throws UDFArgumentException {
    Preconditions.checkNotNull(inputOIs);

    final int numOIs = inputOIs.length;
    final List<String> fieldNames = new ArrayList<String>(numOIs);
    final List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numOIs);
    for (int i = 0; i < numOIs; i++) {
        fieldNames.add("f" + String.valueOf(i));
        ObjectInspector elemOI = ObjectInspectorUtils.getStandardObjectInspector(
            inputOIs[i], ObjectInspectorCopyOption.WRITABLE);
        ListObjectInspector listOI =
                ObjectInspectorFactory.getStandardListObjectInspector(elemOI);
        fieldOIs.add(listOI);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example 3
Source Project: DataLink   Source File: HdfsHelper.java    License: Apache License 2.0 6 votes vote down vote up
OrcWriterProxy(Configuration config, String fileName) throws IOException{
	// initial columns
       columns = config.getListConfiguration(Key.COLUMN);

       // initial inspector
       List<String> columnNames = getColumnNames(columns);
       List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
       inspector = (StructObjectInspector)ObjectInspectorFactory
               .getStandardStructObjectInspector(columnNames, columnTypeInspectors);

       // initial writer
       String compress = config.getString(Key.COMPRESS, null);
       FileOutputFormat outFormat = new OrcOutputFormat();
       if(!"NONE".equalsIgnoreCase(compress) && null != compress ) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);

       //initial orcSerde
       orcSerde = new OrcSerde();
}
 
Example 4
Source Project: incubator-hivemall   Source File: ArraySliceUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2 && argOIs.length != 3) {
        throw new UDFArgumentLengthException(
            "Expected 2 or 3 arguments, but got " + argOIs.length);
    }

    this.valuesOI = HiveUtils.asListOI(argOIs[0]);
    this.offsetOI = HiveUtils.asIntegerOI(argOIs[1]);
    if (argOIs.length == 3) {
        this.lengthOI = HiveUtils.asIntegerOI(argOIs[2]);
    }

    ObjectInspector elemOI = valuesOI.getListElementObjectInspector();
    return ObjectInspectorFactory.getStandardListObjectInspector(elemOI);
}
 
Example 5
Source Project: Cobol-to-Hive   Source File: CobolSerDe.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {

	// final int fixedRecordlLength =
	// Integer.parseInt(tbl.getProperty("fb.length"));
	// conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH,
	// fixedRecordlLength);
	try {
		this.ccb = new CobolCopybook(
				CobolSerdeUtils.determineLayoutOrThrowException(conf, tbl));
	} catch (IOException e) {
		e.printStackTrace();
		return;
	}

	numCols = ccb.getFieldNames().size();
	this.inspector = ObjectInspectorFactory
			.getStandardStructObjectInspector(ccb.getFieldNames(),
					ccb.getFieldOIs());
	this.columnTypes = ccb.getFieldTypeInfos();
	this.columnProperties = ccb.getFieldProperties();

}
 
Example 6
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Check if two arguments were passed
    if (arguments.length != ARG_COUNT) {
        throw new UDFArgumentLengthException(
                "The function split_to_multimap(string, string, string) takes exactly " + ARG_COUNT + " arguments.");
    }

    // Check if two argument is of string
    for (int i = 0; i < 3; i++) {
        if (!ObjectInspectorUtils.compareTypes(PrimitiveObjectInspectorFactory.javaStringObjectInspector, arguments[i])) {
            throw new UDFArgumentTypeException(i,
                    "\"" + PrimitiveObjectInspectorFactory.javaStringObjectInspector.getTypeName() + "\" "
                            + "expected at function split_to_multimap, but "
                            + "\"" + arguments[i].getTypeName() + "\" "
                            + "is found");
        }
    }

    ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector mapValueOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

    return ObjectInspectorFactory.getStandardMapObjectInspector(mapKeyOI, mapValueOI);
}
 
Example 7
Source Project: incubator-hivemall   Source File: CollectAllUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
    super.init(m, parameters);
    if (m == Mode.PARTIAL1) {
        inputOI = parameters[0];
        return ObjectInspectorFactory.getStandardListObjectInspector(
            ObjectInspectorUtils.getStandardObjectInspector(inputOI));
    } else {
        if (!(parameters[0] instanceof StandardListObjectInspector)) {
            inputOI = ObjectInspectorUtils.getStandardObjectInspector(parameters[0]);
            return (StandardListObjectInspector) ObjectInspectorFactory.getStandardListObjectInspector(
                inputOI);
        } else {
            internalMergeOI = (StandardListObjectInspector) parameters[0];
            inputOI = internalMergeOI.getListElementObjectInspector();
            loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(
                internalMergeOI);
            return loi;
        }
    }
}
 
Example 8
Source Project: hive-third-functions   Source File: UDFMapConcatTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMapConcat() throws Exception {
    UDFMapConcat udf = new UDFMapConcat();
    ObjectInspector leftMapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector rightMapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector[] arguments = {leftMapOI, rightMapOI};
    udf.initialize(arguments);

    LinkedHashMap<String, String> leftMap = Maps.newLinkedHashMap();
    leftMap.putAll(ImmutableMap.<String, String>of("key1", "11", "key2", "12", "key3", "13"));
    LinkedHashMap<String, String> rightMap = Maps.newLinkedHashMap();
    rightMap.putAll(ImmutableMap.<String, String>of("key3", "21", "key4", "22", "key5", "23"));

    DeferredObject leftMapObj = new DeferredJavaObject(leftMap);
    DeferredObject rightMapObj = new DeferredJavaObject(rightMap);
    DeferredObject[] args = {leftMapObj, rightMapObj};
    LinkedHashMap<String, String> output = (LinkedHashMap<String, String>) udf.evaluate(args);
    LinkedHashMap<String, String> expect = Maps.newLinkedHashMap();
    expect.putAll(ImmutableMap.<String, String>of("key1", "11", "key2", "12", "key3", "21", "key4", "22", "key5", "23"));

    Assert.assertEquals("map_concat() test", true, MapUtils.mapEquals(output, expect));
}
 
Example 9
Source Project: elasticsearch-hadoop   Source File: HiveUtils.java    License: Apache License 2.0 6 votes vote down vote up
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
        inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
 
Example 10
Source Project: incubator-hivemall   Source File: OnehotEncodingUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Nonnull
private static StructObjectInspector terminalOutputOI(
        @CheckForNull PrimitiveObjectInspector[] inputOIs) {
    Preconditions.checkNotNull(inputOIs);
    Preconditions.checkArgument(inputOIs.length >= 1, inputOIs.length);

    final List<String> fieldNames = new ArrayList<>(inputOIs.length);
    final List<ObjectInspector> fieldOIs = new ArrayList<>(inputOIs.length);
    for (int i = 0; i < inputOIs.length; i++) {
        fieldNames.add("f" + String.valueOf(i + 1));
        ObjectInspector keyOI = ObjectInspectorUtils.getStandardObjectInspector(inputOIs[i],
            ObjectInspectorCopyOption.WRITABLE);
        MapObjectInspector mapOI = ObjectInspectorFactory.getStandardMapObjectInspector(
            keyOI, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        fieldOIs.add(mapOI);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example 11
Source Project: hive-funnel-udf   Source File: FalloutTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertToFalloutWithZeros() throws HiveException {
    Fallout udf = new Fallout();

    ObjectInspector[] inputOiList = new ObjectInspector[]{
            ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector)
    };

    udf.initialize(inputOiList);

    List<Long> inputList = Arrays.asList(10L, 5L, 0L, 0L, 0L);

    DeferredObject obj1 = mock(DeferredObject.class);
    DeferredObject[] objs = new DeferredObject[] { obj1 };
    when(obj1.get()).thenReturn(inputList);

    Assert.assertEquals(Arrays.asList(0.0, 0.5, 0.0, 0.0, 0.0), udf.evaluate(objs));
}
 
Example 12
Source Project: incubator-hivemall   Source File: ConditionalEmitUDTF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2) {
        throw new UDFArgumentException(
            "conditional_emit takes 2 arguments: array<boolean>, array<primitive>");
    }

    this.conditionsOI = HiveUtils.asListOI(argOIs[0]);
    this.condElemOI = HiveUtils.asBooleanOI(conditionsOI.getListElementObjectInspector());

    this.featuresOI = HiveUtils.asListOI(argOIs[1]);
    this.featureElemOI =
            HiveUtils.asPrimitiveObjectInspector(featuresOI.getListElementObjectInspector());

    List<String> fieldNames = Arrays.asList("feature");
    List<ObjectInspector> fieldOIs = Arrays.<ObjectInspector>asList(featureElemOI);

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example 13
@Test
public void testNoOptions() throws Exception {
    List<String> x = Arrays.asList("1:-2", "2:-1");
    int y = 0;

    GeneralClassifierUDTF udtf = new GeneralClassifierUDTF();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);

    udtf.initialize(new ObjectInspector[] {stringListOI, intOI});

    udtf.process(new Object[] {x, y});

    udtf.finalizeTraining();

    float score = udtf.predict(udtf.parseFeatures(x));
    int predicted = score > 0.f ? 1 : 0;
    Assert.assertTrue(y == predicted);
}
 
Example 14
Source Project: incubator-hivemall   Source File: MapTailNUDF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 2) {
        throw new UDFArgumentLengthException(
            "map_tail_n only takes 2 arguments: map<object, object>, int");
    }
    if (!(arguments[0] instanceof MapObjectInspector)) {
        throw new UDFArgumentException("The first argument must be a map");
    }
    this.mapObjectInspector = (MapObjectInspector) arguments[0];
    if (!(arguments[1] instanceof IntObjectInspector)) {
        throw new UDFArgumentException("The second argument must be an int");
    }
    this.intObjectInspector = (IntObjectInspector) arguments[1];

    ObjectInspector keyOI = ObjectInspectorUtils.getStandardObjectInspector(
        mapObjectInspector.getMapKeyObjectInspector());
    ObjectInspector valueOI = mapObjectInspector.getMapValueObjectInspector();

    return ObjectInspectorFactory.getStandardMapObjectInspector(keyOI, valueOI);
}
 
Example 15
Source Project: hive-dwrf   Source File: TestFileDump.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testEntropyThreshold() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD, 1);
  OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 11);
  // Make sure having too few distinct values won't turn off dictionary encoding
  OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_STRING_KEY_SIZE_THRESHOLD, 1);

  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      100000, CompressionKind.SNAPPY, 10000, 10000, new MemoryManager(conf));
  Random r1 = new Random(1);
  for(int i=0; i < 21000; ++i) {
    writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
        Integer.toString(r1.nextInt())));
  }
  writer.close();
  checkOutput("orc-file-dump-entropy-threshold.out");
}
 
Example 16
Source Project: incubator-hivemall   Source File: LastElementUDFTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws IOException, HiveException {
    LastElementUDF udf = new LastElementUDF();

    udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)});

    DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject(
        WritableUtils.toWritableList(new double[] {0, 1, 2}))};

    Assert.assertEquals(WritableUtils.val(2.d), udf.evaluate(args));

    udf.close();
}
 
Example 17
Source Project: flink   Source File: HiveInspectors.java    License: Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example 18
@Test(expected = UDFArgumentException.class)
public void testInvalidLossFunction() throws Exception {
    GeneralRegressorUDTF udtf = new GeneralRegressorUDTF();
    ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-loss HingeLoss");

    udtf.initialize(new ObjectInspector[] {stringListOI, floatOI, params});
}
 
Example 19
@Test(expected = UDFArgumentException.class)
public void testUnsupportedLossFunction() throws Exception {
    GeneralRegressorUDTF udtf = new GeneralRegressorUDTF();
    ObjectInspector floatOI = PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-loss UnsupportedLoss");

    udtf.initialize(new ObjectInspector[] {stringListOI, floatOI, params});
}
 
Example 20
@Test
public void testCategoricalVariable() throws HiveException, IOException {
    VectorizeFeaturesUDF udf = new VectorizeFeaturesUDF();
    ObjectInspector[] argOIs = new ObjectInspector[3];
    List<String> featureNames = Arrays.asList("a", "b");
    argOIs[0] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames);
    argOIs[1] = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    argOIs[2] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    udf.initialize(argOIs);

    DeferredObject[] arguments = new DeferredObject[3];
    arguments[1] = new DeferredJavaObject(new Double(0.1));
    arguments[2] = new DeferredJavaObject("dayofweek");

    List<Text> actuals = udf.evaluate(arguments);
    //System.out.println(actuals);
    List<Text> expected = WritableUtils.val("a:0.1", "b#dayofweek");
    Assert.assertEquals(expected, actuals);

    arguments[2] = new DeferredJavaObject("1.0");
    actuals = udf.evaluate(arguments);
    //System.out.println(actuals);
    expected = WritableUtils.val("a:0.1", "b:1.0");
    Assert.assertEquals(expected, actuals);

    arguments[2] = new DeferredJavaObject("1");
    actuals = udf.evaluate(arguments);
    //System.out.println(actuals);
    expected = WritableUtils.val("a:0.1", "b:1.0");
    Assert.assertEquals(expected, actuals);

    arguments[2] = new DeferredJavaObject("0");
    actuals = udf.evaluate(arguments);
    //System.out.println(actuals);
    expected = WritableUtils.val(new String[] {"a:0.1"});
    Assert.assertEquals(expected, actuals);

    udf.close();
}
 
Example 21
@Test
public void test() throws HiveException {
    ConditionalEmitUDTF udtf = new ConditionalEmitUDTF();

    udtf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaBooleanObjectInspector),
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.javaStringObjectInspector),});

    final List<Object> actual = new ArrayList<>();
    udtf.setCollector(new Collector() {
        @Override
        public void collect(Object input) throws HiveException {
            Object[] forwardObj = (Object[]) input;
            Assert.assertEquals(1, forwardObj.length);
            actual.add(forwardObj[0]);
        }
    });

    udtf.process(
        new Object[] {Arrays.asList(true, false, true), Arrays.asList("one", "two", "three")});

    Assert.assertEquals(Arrays.asList("one", "three"), actual);

    actual.clear();

    udtf.process(
        new Object[] {Arrays.asList(true, true, false), Arrays.asList("one", "two", "three")});
    Assert.assertEquals(Arrays.asList("one", "two"), actual);

    udtf.close();
}
 
Example 22
Source Project: marble   Source File: TypeInferenceUtil.java    License: Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(
    RelDataTypeHolder relDataTypeHolder) {
  SqlTypeName sqlTypeName = relDataTypeHolder.getSqlTypeName();
  if (sqlTypeName.equals(SqlTypeName.ARRAY)) {
    RelDataTypeHolder componentType = relDataTypeHolder.getComponentType();
    if (componentType == null) {
      throw new IllegalStateException("element type of array is null !");
    }
    return ObjectInspectorFactory.getStandardListObjectInspector(
        getObjectInspector(relDataTypeHolder.getComponentType()));
  } else {
    return getPrimitiveObjectInspector(relDataTypeHolder);
  }
}
 
Example 23
Source Project: incubator-hivemall   Source File: StoptagsExcludeUDFTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testTwoArguments() throws IOException, HiveException {
    StoptagsExcludeUDF udf = new StoptagsExcludeUDF();

    udf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.writableStringObjectInspector),
            HiveUtils.getConstStringObjectInspector("ja")});

    List<String> actual = udf.evaluate(new DeferredObject[] {new GenericUDF.DeferredJavaObject(
        Arrays.asList(new Text("形容詞"), new Text("フィラー")))});
    String[] expected = new String[] {"名詞", "名詞-一般", "名詞-固有名詞", "名詞-固有名詞-一般", "名詞-固有名詞-人名",
            "名詞-固有名詞-人名-一般", "名詞-固有名詞-人名-姓", "名詞-固有名詞-人名-名", "名詞-固有名詞-組織", "名詞-固有名詞-地域",
            "名詞-固有名詞-地域-一般", "名詞-固有名詞-地域-国", "名詞-代名詞", "名詞-代名詞-一般", "名詞-代名詞-縮約", "名詞-副詞可能",
            "名詞-サ変接続", "名詞-形容動詞語幹", "名詞-数", "名詞-非自立", "名詞-非自立-一般", "名詞-非自立-副詞可能",
            "名詞-非自立-助動詞語幹", "名詞-非自立-形容動詞語幹", "名詞-特殊", "名詞-特殊-助動詞語幹", "名詞-接尾", "名詞-接尾-一般",
            "名詞-接尾-人名", "名詞-接尾-地域", "名詞-接尾-サ変接続", "名詞-接尾-助動詞語幹", "名詞-接尾-形容動詞語幹", "名詞-接尾-副詞可能",
            "名詞-接尾-助数詞", "名詞-接尾-特殊", "名詞-接続詞的", "名詞-動詞非自立的", "名詞-引用文字列", "名詞-ナイ形容詞語幹", "接頭詞",
            "接頭詞-名詞接続", "接頭詞-動詞接続", "接頭詞-形容詞接続", "接頭詞-数接", "動詞", "動詞-自立", "動詞-非自立", "動詞-接尾",
            // "形容詞", "形容詞-自立", "形容詞-非自立", "形容詞-接尾", 
            "副詞", "副詞-一般", "副詞-助詞類接続", "連体詞", "接続詞", "助詞", "助詞-格助詞", "助詞-格助詞-一般", "助詞-格助詞-引用",
            "助詞-格助詞-連語", "助詞-接続助詞", "助詞-係助詞", "助詞-副助詞", "助詞-間投助詞", "助詞-並立助詞", "助詞-終助詞",
            "助詞-副助詞/並立助詞/終助詞", "助詞-連体化", "助詞-副詞化", "助詞-特殊", "助動詞", "感動詞", "記号", "記号-一般",
            "記号-読点", "記号-句点", "記号-空白", "記号-括弧開", "記号-括弧閉", "記号-アルファベット", "その他", "その他-間投",
            //"フィラー", 
            "非言語音", "語断片", "未知語"};
    Arrays.sort(expected);
    Assert.assertEquals(Arrays.asList(expected), actual);

    udf.close();
}
 
Example 24
public static ObjectInspector getObjectInspector(int sqlType, String hiveType)
        throws SerDeException {
    switch(sqlType) {
        case Types.VARCHAR:
            return PrimitiveObjectInspectorFactory.javaStringObjectInspector;
        case Types.FLOAT:
            return PrimitiveObjectInspectorFactory.javaFloatObjectInspector;
        case Types.DOUBLE:
            return PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
        case Types.BOOLEAN:
            return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
        case Types.TINYINT:
            return PrimitiveObjectInspectorFactory.javaByteObjectInspector;
        case Types.SMALLINT:
            return PrimitiveObjectInspectorFactory.javaShortObjectInspector;
        case Types.INTEGER:
            return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
        case Types.BIGINT:
            return PrimitiveObjectInspectorFactory.javaLongObjectInspector;
        case Types.TIMESTAMP:
            return PrimitiveObjectInspectorFactory.javaTimestampObjectInspector;
        case Types.BINARY:
            return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector;
        case Types.ARRAY:
            String hiveElemType = hiveType.substring(hiveType.indexOf('<') + 1, hiveType.indexOf('>')).trim();
            int sqlElemType = hiveTypeToSqlType(hiveElemType);
            ObjectInspector listElementOI = getObjectInspector(sqlElemType, hiveElemType);
            return ObjectInspectorFactory.getStandardListObjectInspector(listElementOI);
        default:
            throw new SerDeException("Cannot find getObjectInspecto for: " + hiveType);
    }
}
 
Example 25
Source Project: incubator-hivemall   Source File: ToStringArrayUDF.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 1) {
        throw new UDFArgumentException(
            "to_string_array expects exactly one argument: " + argOIs.length);
    }
    this.listOI = HiveUtils.asListOI(argOIs[0]);
    this.result = new ArrayList<>();

    return ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
 
Example 26
Source Project: flink   Source File: HiveInspectors.java    License: Apache License 2.0 5 votes vote down vote up
private static ObjectInspector getObjectInspector(TypeInfo type) {
	switch (type.getCategory()) {

		case PRIMITIVE:
			PrimitiveTypeInfo primitiveType = (PrimitiveTypeInfo) type;
			return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(primitiveType);
		case LIST:
			ListTypeInfo listType = (ListTypeInfo) type;
			return ObjectInspectorFactory.getStandardListObjectInspector(
					getObjectInspector(listType.getListElementTypeInfo()));
		case MAP:
			MapTypeInfo mapType = (MapTypeInfo) type;
			return ObjectInspectorFactory.getStandardMapObjectInspector(
					getObjectInspector(mapType.getMapKeyTypeInfo()), getObjectInspector(mapType.getMapValueTypeInfo()));
		case STRUCT:
			StructTypeInfo structType = (StructTypeInfo) type;
			List<TypeInfo> fieldTypes = structType.getAllStructFieldTypeInfos();

			List<ObjectInspector> fieldInspectors = new ArrayList<ObjectInspector>();
			for (TypeInfo fieldType : fieldTypes) {
				fieldInspectors.add(getObjectInspector(fieldType));
			}

			return ObjectInspectorFactory.getStandardStructObjectInspector(
					structType.getAllStructFieldNames(), fieldInspectors);
		default:
			throw new CatalogException("Unsupported Hive type category " + type.getCategory());
	}
}
 
Example 27
Source Project: incubator-hivemall   Source File: ChangeFinder2DTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testCf1d() throws IOException, HiveException {
    Parameters params = new Parameters();
    params.set(LossFunction.logloss);
    PrimitiveObjectInspector oi = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    ListObjectInspector listOI = ObjectInspectorFactory.getStandardListObjectInspector(oi);
    ChangeFinder2D cf = new ChangeFinder2D(params, listOI);
    double[] outScores = new double[2];
    List<Double> x = new ArrayList<Double>(1);

    BufferedReader reader = readFile("cf1d.csv.gz");
    println("x outlier change");
    String line;
    int i = 1, numOutliers = 0, numChangepoints = 0;
    while ((line = reader.readLine()) != null) {
        double d = Double.parseDouble(line);
        x.add(Double.valueOf(d));

        cf.update(x, outScores);
        printf("%d %f %f %f%n", i, d, outScores[0], outScores[1]);
        if (outScores[0] > 10.d) {
            numOutliers++;
        }
        if (outScores[1] > 10.d) {
            numChangepoints++;
        }
        x.clear();
        i++;
    }
    Assert.assertTrue("#outliers SHOULD be greater than 10: " + numOutliers, numOutliers > 10);
    Assert.assertTrue("#outliers SHOULD be less than 20: " + numOutliers, numOutliers < 20);
    Assert.assertTrue("#changepoints SHOULD be greater than 0: " + numChangepoints,
        numChangepoints > 0);
    Assert.assertTrue("#changepoints SHOULD be less than 5: " + numChangepoints,
        numChangepoints < 5);
}
 
Example 28
Source Project: incubator-hivemall   Source File: ArrayUnionUDFTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testUnion3() throws HiveException, IOException {
    ArrayUnionUDF udf = new ArrayUnionUDF();

    udf.initialize(new ObjectInspector[] {
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.writableDoubleObjectInspector),
            ObjectInspectorFactory.getStandardListObjectInspector(
                PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)});

    DeferredObject[] args = new DeferredObject[] {
            new GenericUDF.DeferredJavaObject(
                WritableUtils.toWritableList(new double[] {0, 1, 2})),
            new GenericUDF.DeferredJavaObject(
                WritableUtils.toWritableList(new double[] {2, 3, 4})),
            new GenericUDF.DeferredJavaObject(
                WritableUtils.toWritableList(new double[] {4, 5}))};

    List<Object> result = udf.evaluate(args);

    Assert.assertEquals(6, result.size());
    Assert.assertEquals(WritableUtils.toWritableList(new double[] {0, 1, 2, 3, 4, 5}), result);

    udf.close();
}
 
Example 29
Source Project: incubator-hivemall   Source File: ToBitsUDF.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 1) {
        throw new UDFArgumentLengthException(
            "Expected 1 argument for _FUNC_(int[] indexes) but got " + argOIs.length
                    + " arguments");
    }
    this.listOI = HiveUtils.asListOI(argOIs[0]);
    this.listElemOI = HiveUtils.asIntCompatibleOI(listOI.getListElementObjectInspector());
    this.bitset = new BitSet();

    return ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableLongObjectInspector);
}
 
Example 30
@Override public StructObjectInspector initialize(ObjectInspector[] objectInspectors)
    throws UDFArgumentException {
    argumentOIs = objectInspectors;
    // output inspectors -- an object with two fields!  no use
    List<String> fieldNames = new ArrayList<String>(2);
    List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(2);
    fieldNames.add("name");
    fieldNames.add("surname");
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}