org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CollectAllUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
    super.init(m, parameters);
    if (m == Mode.PARTIAL1) {
        inputOI = parameters[0];
        return ObjectInspectorFactory.getStandardListObjectInspector(
            ObjectInspectorUtils.getStandardObjectInspector(inputOI));
    } else {
        if (!(parameters[0] instanceof StandardListObjectInspector)) {
            inputOI = ObjectInspectorUtils.getStandardObjectInspector(parameters[0]);
            return (StandardListObjectInspector) ObjectInspectorFactory.getStandardListObjectInspector(
                inputOI);
        } else {
            internalMergeOI = (StandardListObjectInspector) parameters[0];
            inputOI = internalMergeOI.getListElementObjectInspector();
            loi = (StandardListObjectInspector) ObjectInspectorUtils.getStandardObjectInspector(
                internalMergeOI);
            return loi;
        }
    }
}
 
Example #2
Source File: OnehotEncodingUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static StructObjectInspector internalMergeOutputOI(
        @CheckForNull PrimitiveObjectInspector[] inputOIs) throws UDFArgumentException {
    Preconditions.checkNotNull(inputOIs);

    final int numOIs = inputOIs.length;
    final List<String> fieldNames = new ArrayList<String>(numOIs);
    final List<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(numOIs);
    for (int i = 0; i < numOIs; i++) {
        fieldNames.add("f" + String.valueOf(i));
        ObjectInspector elemOI = ObjectInspectorUtils.getStandardObjectInspector(
            inputOIs[i], ObjectInspectorCopyOption.WRITABLE);
        ListObjectInspector listOI =
                ObjectInspectorFactory.getStandardListObjectInspector(elemOI);
        fieldOIs.add(listOI);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #3
Source File: OnehotEncodingUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nonnull
private static StructObjectInspector terminalOutputOI(
        @CheckForNull PrimitiveObjectInspector[] inputOIs) {
    Preconditions.checkNotNull(inputOIs);
    Preconditions.checkArgument(inputOIs.length >= 1, inputOIs.length);

    final List<String> fieldNames = new ArrayList<>(inputOIs.length);
    final List<ObjectInspector> fieldOIs = new ArrayList<>(inputOIs.length);
    for (int i = 0; i < inputOIs.length; i++) {
        fieldNames.add("f" + String.valueOf(i + 1));
        ObjectInspector keyOI = ObjectInspectorUtils.getStandardObjectInspector(inputOIs[i],
            ObjectInspectorCopyOption.WRITABLE);
        MapObjectInspector mapOI = ObjectInspectorFactory.getStandardMapObjectInspector(
            keyOI, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        fieldOIs.add(mapOI);
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #4
Source File: FalloutTest.java    From hive-funnel-udf with Apache License 2.0 6 votes vote down vote up
@Test
public void testConvertToFalloutWithZeros() throws HiveException {
    Fallout udf = new Fallout();

    ObjectInspector[] inputOiList = new ObjectInspector[]{
            ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector)
    };

    udf.initialize(inputOiList);

    List<Long> inputList = Arrays.asList(10L, 5L, 0L, 0L, 0L);

    DeferredObject obj1 = mock(DeferredObject.class);
    DeferredObject[] objs = new DeferredObject[] { obj1 };
    when(obj1.get()).thenReturn(inputList);

    Assert.assertEquals(Arrays.asList(0.0, 0.5, 0.0, 0.0, 0.0), udf.evaluate(objs));
}
 
Example #5
Source File: HiveUtils.java    From elasticsearch-hadoop with Apache License 2.0 6 votes vote down vote up
static StandardStructObjectInspector structObjectInspector(Properties tableProperties) {
    // extract column info - don't use Hive constants as they were renamed in 0.9 breaking compatibility
    // the column names are saved as the given inspector to #serialize doesn't preserves them (maybe because it's an external table)
    // use the class since StructType requires it ...
    List<String> columnNames = StringUtils.tokenize(tableProperties.getProperty(HiveConstants.COLUMNS), ",");
    List<TypeInfo> colTypes = TypeInfoUtils.getTypeInfosFromTypeString(tableProperties.getProperty(HiveConstants.COLUMNS_TYPES));

    // create a standard writable Object Inspector - used later on by serialization/deserialization
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>();

    for (TypeInfo typeInfo : colTypes) {
        inspectors.add(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo));
    }

    return ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
 
Example #6
Source File: ArgrankUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 1) {
        throw new UDFArgumentLengthException(
            "argrank(array<ANY> a) takes exactly 1 argument: " + argOIs.length);
    }
    ObjectInspector argOI0 = argOIs[0];
    if (argOI0.getCategory() != Category.LIST) {
        throw new UDFArgumentException(
            "argrank(array<ANY> a) expects array<ANY> for the first argument: "
                    + argOI0.getTypeName());
    }

    this.listOI = HiveUtils.asListOI(argOI0);
    this.elemOI = listOI.getListElementObjectInspector();

    return ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableIntObjectInspector);
}
 
Example #7
Source File: ArraySliceUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2 && argOIs.length != 3) {
        throw new UDFArgumentLengthException(
            "Expected 2 or 3 arguments, but got " + argOIs.length);
    }

    this.valuesOI = HiveUtils.asListOI(argOIs[0]);
    this.offsetOI = HiveUtils.asIntegerOI(argOIs[1]);
    if (argOIs.length == 3) {
        this.lengthOI = HiveUtils.asIntegerOI(argOIs[2]);
    }

    ObjectInspector elemOI = valuesOI.getListElementObjectInspector();
    return ObjectInspectorFactory.getStandardListObjectInspector(elemOI);
}
 
Example #8
Source File: UDFMapConcatTest.java    From hive-third-functions with Apache License 2.0 6 votes vote down vote up
@Test
public void testMapConcat() throws Exception {
    UDFMapConcat udf = new UDFMapConcat();
    ObjectInspector leftMapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector rightMapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector[] arguments = {leftMapOI, rightMapOI};
    udf.initialize(arguments);

    LinkedHashMap<String, String> leftMap = Maps.newLinkedHashMap();
    leftMap.putAll(ImmutableMap.<String, String>of("key1", "11", "key2", "12", "key3", "13"));
    LinkedHashMap<String, String> rightMap = Maps.newLinkedHashMap();
    rightMap.putAll(ImmutableMap.<String, String>of("key3", "21", "key4", "22", "key5", "23"));

    DeferredObject leftMapObj = new DeferredJavaObject(leftMap);
    DeferredObject rightMapObj = new DeferredJavaObject(rightMap);
    DeferredObject[] args = {leftMapObj, rightMapObj};
    LinkedHashMap<String, String> output = (LinkedHashMap<String, String>) udf.evaluate(args);
    LinkedHashMap<String, String> expect = Maps.newLinkedHashMap();
    expect.putAll(ImmutableMap.<String, String>of("key1", "11", "key2", "12", "key3", "21", "key4", "22", "key5", "23"));

    Assert.assertEquals("map_concat() test", true, MapUtils.mapEquals(output, expect));
}
 
Example #9
Source File: ConditionalEmitUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public StructObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2) {
        throw new UDFArgumentException(
            "conditional_emit takes 2 arguments: array<boolean>, array<primitive>");
    }

    this.conditionsOI = HiveUtils.asListOI(argOIs[0]);
    this.condElemOI = HiveUtils.asBooleanOI(conditionsOI.getListElementObjectInspector());

    this.featuresOI = HiveUtils.asListOI(argOIs[1]);
    this.featureElemOI =
            HiveUtils.asPrimitiveObjectInspector(featuresOI.getListElementObjectInspector());

    List<String> fieldNames = Arrays.asList("feature");
    List<ObjectInspector> fieldOIs = Arrays.<ObjectInspector>asList(featureElemOI);

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #10
Source File: CobolSerDe.java    From Cobol-to-Hive with Apache License 2.0 6 votes vote down vote up
@Override
public void initialize(final Configuration conf, final Properties tbl)
		throws SerDeException {

	// final int fixedRecordlLength =
	// Integer.parseInt(tbl.getProperty("fb.length"));
	// conf.setInt(FixedLengthInputFormat.FIXED_RECORD_LENGTH,
	// fixedRecordlLength);
	try {
		this.ccb = new CobolCopybook(
				CobolSerdeUtils.determineLayoutOrThrowException(conf, tbl));
	} catch (IOException e) {
		e.printStackTrace();
		return;
	}

	numCols = ccb.getFieldNames().size();
	this.inspector = ObjectInspectorFactory
			.getStandardStructObjectInspector(ccb.getFieldNames(),
					ccb.getFieldOIs());
	this.columnTypes = ccb.getFieldTypeInfos();
	this.columnProperties = ccb.getFieldProperties();

}
 
Example #11
Source File: GeneralClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testNoOptions() throws Exception {
    List<String> x = Arrays.asList("1:-2", "2:-1");
    int y = 0;

    GeneralClassifierUDTF udtf = new GeneralClassifierUDTF();
    ObjectInspector intOI = PrimitiveObjectInspectorFactory.javaIntObjectInspector;
    ObjectInspector stringOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ListObjectInspector stringListOI =
            ObjectInspectorFactory.getStandardListObjectInspector(stringOI);

    udtf.initialize(new ObjectInspector[] {stringListOI, intOI});

    udtf.process(new Object[] {x, y});

    udtf.finalizeTraining();

    float score = udtf.predict(udtf.parseFeatures(x));
    int predicted = score > 0.f ? 1 : 0;
    Assert.assertTrue(y == predicted);
}
 
Example #12
Source File: MapTailNUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    if (arguments.length != 2) {
        throw new UDFArgumentLengthException(
            "map_tail_n only takes 2 arguments: map<object, object>, int");
    }
    if (!(arguments[0] instanceof MapObjectInspector)) {
        throw new UDFArgumentException("The first argument must be a map");
    }
    this.mapObjectInspector = (MapObjectInspector) arguments[0];
    if (!(arguments[1] instanceof IntObjectInspector)) {
        throw new UDFArgumentException("The second argument must be an int");
    }
    this.intObjectInspector = (IntObjectInspector) arguments[1];

    ObjectInspector keyOI = ObjectInspectorUtils.getStandardObjectInspector(
        mapObjectInspector.getMapKeyObjectInspector());
    ObjectInspector valueOI = mapObjectInspector.getMapValueObjectInspector();

    return ObjectInspectorFactory.getStandardMapObjectInspector(keyOI, valueOI);
}
 
Example #13
Source File: UDFStringSplitToMultimap.java    From hive-third-functions with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Check if two arguments were passed
    if (arguments.length != ARG_COUNT) {
        throw new UDFArgumentLengthException(
                "The function split_to_multimap(string, string, string) takes exactly " + ARG_COUNT + " arguments.");
    }

    // Check if two argument is of string
    for (int i = 0; i < 3; i++) {
        if (!ObjectInspectorUtils.compareTypes(PrimitiveObjectInspectorFactory.javaStringObjectInspector, arguments[i])) {
            throw new UDFArgumentTypeException(i,
                    "\"" + PrimitiveObjectInspectorFactory.javaStringObjectInspector.getTypeName() + "\" "
                            + "expected at function split_to_multimap, but "
                            + "\"" + arguments[i].getTypeName() + "\" "
                            + "is found");
        }
    }

    ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    ObjectInspector mapValueOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

    return ObjectInspectorFactory.getStandardMapObjectInspector(mapKeyOI, mapValueOI);
}
 
Example #14
Source File: TestFileDump.java    From hive-dwrf with Apache License 2.0 6 votes vote down vote up
@Test
public void testEntropyThreshold() throws Exception {
  ObjectInspector inspector;
  synchronized (TestOrcFile.class) {
    inspector = ObjectInspectorFactory.getReflectionObjectInspector
        (MyRecord.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
  }
  OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD, 1);
  OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 11);
  // Make sure having too few distinct values won't turn off dictionary encoding
  OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_STRING_KEY_SIZE_THRESHOLD, 1);

  ReaderWriterProfiler.setProfilerOptions(conf);
  Writer writer = new WriterImpl(fs, testFilePath, conf, inspector,
      100000, CompressionKind.SNAPPY, 10000, 10000, new MemoryManager(conf));
  Random r1 = new Random(1);
  for(int i=0; i < 21000; ++i) {
    writer.addRow(new MyRecord(r1.nextInt(), r1.nextLong(),
        Integer.toString(r1.nextInt())));
  }
  writer.close();
  checkOutput("orc-file-dump-entropy-threshold.out");
}
 
Example #15
Source File: HdfsHelper.java    From DataLink with Apache License 2.0 6 votes vote down vote up
OrcWriterProxy(Configuration config, String fileName) throws IOException{
	// initial columns
       columns = config.getListConfiguration(Key.COLUMN);

       // initial inspector
       List<String> columnNames = getColumnNames(columns);
       List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
       inspector = (StructObjectInspector)ObjectInspectorFactory
               .getStandardStructObjectInspector(columnNames, columnTypeInspectors);

       // initial writer
       String compress = config.getString(Key.COMPRESS, null);
       FileOutputFormat outFormat = new OrcOutputFormat();
       if(!"NONE".equalsIgnoreCase(compress) && null != compress ) {
           Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
           if (null != codecClass) {
               outFormat.setOutputCompressorClass(conf, codecClass);
           }
       }
       writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);

       //initial orcSerde
       orcSerde = new OrcSerde();
}
 
Example #16
Source File: XGBoostBatchPredictUDTF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
/** Return (string rowid, array<double> predicted) as a result */
@Nonnull
protected StructObjectInspector getReturnOI(@Nonnull PrimitiveObjectInspector rowIdOI) {
    List<String> fieldNames = new ArrayList<>(2);
    List<ObjectInspector> fieldOIs = new ArrayList<>(2);
    fieldNames.add("rowid");
    fieldOIs.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
        rowIdOI.getPrimitiveCategory()));
    fieldNames.add("predicted");
    fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableFloatObjectInspector));
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #17
Source File: HiveWriterFactory.java    From flink with Apache License 2.0 5 votes vote down vote up
private void checkInitialize() throws Exception {
	if (initialized) {
		return;
	}

	JobConf jobConf = confWrapper.conf();
	Object serdeLib = Class.forName(serDeInfo.getSerializationLib()).newInstance();
	Preconditions.checkArgument(serdeLib instanceof Serializer && serdeLib instanceof Deserializer,
			"Expect a SerDe lib implementing both Serializer and Deserializer, but actually got "
					+ serdeLib.getClass().getName());
	this.recordSerDe = (Serializer) serdeLib;
	ReflectionUtils.setConf(recordSerDe, jobConf);

	// TODO: support partition properties, for now assume they're same as table properties
	SerDeUtils.initializeSerDe((Deserializer) recordSerDe, jobConf, tableProperties, null);

	this.formatFields = allColumns.length - partitionColumns.length;
	this.hiveConversions = new HiveObjectConversion[formatFields];
	this.converters = new DataFormatConverter[formatFields];
	List<ObjectInspector> objectInspectors = new ArrayList<>(hiveConversions.length);
	for (int i = 0; i < formatFields; i++) {
		DataType type = allTypes[i];
		ObjectInspector objectInspector = HiveInspectors.getObjectInspector(type);
		objectInspectors.add(objectInspector);
		hiveConversions[i] = HiveInspectors.getConversion(
				objectInspector, type.getLogicalType(), hiveShim);
		converters[i] = DataFormatConverters.getConverterForDataType(type);
	}

	this.formatInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
			Arrays.asList(allColumns).subList(0, formatFields),
			objectInspectors);
	this.initialized = true;
}
 
Example #18
Source File: GetFrequentItemsFromStringsSketchUDTF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
@Override
public StructObjectInspector initialize(final ObjectInspector[] inspectors) throws UDFArgumentException {
  if (inspectors.length != 1 && inspectors.length != 2) {
    throw new UDFArgumentException("One or two arguments expected");
  }

  if (inspectors[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
    throw new UDFArgumentTypeException(0, "Primitive argument expected, but "
        + inspectors[0].getCategory().name() + " was recieved");
  }
  inputObjectInspector = (PrimitiveObjectInspector) inspectors[0];
  if (inputObjectInspector.getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.BINARY) {
    throw new UDFArgumentTypeException(0, "Binary value expected as the first argument, but "
        + inputObjectInspector.getPrimitiveCategory().name() + " was recieved");
  }

  if (inspectors.length > 1) {
    if (inspectors[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
      throw new UDFArgumentTypeException(1, "Primitive argument expected, but "
          + inspectors[1].getCategory().name() + " was recieved");
    }
    errorTypeObjectInspector = (PrimitiveObjectInspector) inspectors[1];
    if (errorTypeObjectInspector.getPrimitiveCategory()
        != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
      throw new UDFArgumentTypeException(1, "String value expected as the first argument, but "
          + errorTypeObjectInspector.getPrimitiveCategory().name() + " was recieved");
    }
  }

  return ObjectInspectorFactory.getStandardStructObjectInspector(
    Arrays.asList("item", "estimate", "lower_bound", "upper_bound"),
    Arrays.asList(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        PrimitiveObjectInspectorFactory.javaLongObjectInspector,
        PrimitiveObjectInspectorFactory.javaLongObjectInspector,
        PrimitiveObjectInspectorFactory.javaLongObjectInspector
    )
  );
}
 
Example #19
Source File: GeneralClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerialization() throws HiveException {
    TestUtils.testGenericUDTFSerialization(GeneralClassifierUDTF.class,
        new ObjectInspector[] {
                ObjectInspectorFactory.getStandardListObjectInspector(
                    PrimitiveObjectInspectorFactory.javaStringObjectInspector),
                PrimitiveObjectInspectorFactory.javaIntObjectInspector},
        new Object[][] {{Arrays.asList("1:-2", "2:-1"), 0}});
}
 
Example #20
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
/**
 * Receives the passed in argument object inspectors and returns the desired
 * return type's object inspector to inform hive of return type of UDAF.
 *
 * @param mode
 *          Mode (i.e. PARTIAL 1, COMPLETE...) for determining input and output
 *          object inspector type.
 * @param parameters
 *          List of object inspectors for input arguments.
 * @return The object inspector type indicates the UDAF return type (i.e.
 *         returned type of terminate(...)).
 */
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);

  if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
    inputInspector_ = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // mode = partial2 || final
    intermediateInspector_ = (StandardStructObjectInspector) parameters[0];
  }

  if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
    // intermediate results need to include the lgK and the target HLL type
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(LG_K_FIELD, HLL_TYPE_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.STRING),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #21
Source File: StoptagsExcludeUDF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 1 && argOIs.length != 2) {
        throw new UDFArgumentException(
            "stoptags_exclude(array<string> tags, [, const string lang='ja']) takes one or two arguments: "
                    + argOIs.length);
    }

    if (!HiveUtils.isStringListOI(argOIs[0])) {
        throw new UDFArgumentException(
            "stoptags_exclude(array<string> tags, [, const string lang='ja']) expects array<string> for the first argument : "
                    + argOIs[0].getTypeName());
    }
    this.tagsOI = HiveUtils.asListOI(argOIs[0]);

    if (argOIs.length == 2) {
        if (!HiveUtils.isConstString(argOIs[1])) {
            throw new UDFArgumentException(
                "stoptags_exclude(array<string> tags, [, const string lang='ja']) expects const string for the second argument: "
                        + argOIs[1].getTypeName());
        }
        String lang = HiveUtils.getConstString(argOIs[1]);
        if (!"ja".equalsIgnoreCase(lang)) {
            throw new UDFArgumentException("Unsupported lang: " + lang);
        }
    }
    this.stopTags = STOPTAGS_JA;

    if (ObjectInspectorUtils.isConstantObjectInspector(tagsOI)) {
        String[] excludeTags = HiveUtils.getConstStringArray(tagsOI);
        this.result = getStoptags(stopTags, excludeTags);
    }

    return ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
 
Example #22
Source File: DataToSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);
  mode_ = mode;
  if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
    // input is original data
    inputInspector_ = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      seedInspector_ = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // input for PARTIAL2 and FINAL is the output from PARTIAL1
    intermediateInspector_ = (StructObjectInspector) parameters[0];
  }

  if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
    // intermediate results need to include the lgK and the target HLL type
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(LG_K_FIELD, SEED_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.LONG),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #23
Source File: UDFMapBuild.java    From hive-third-functions with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Check if two arguments were passed
    if (arguments.length != ARG_COUNT) {
        throw new UDFArgumentLengthException(
                "The function map_build(array, array) takes exactly " + ARG_COUNT + " arguments.");
    }

    // Check if two argument is of category LIST
    for (int i = 0; i < 2; i++) {
        if (!arguments[i].getCategory().equals(ObjectInspector.Category.LIST)) {
            throw new UDFArgumentTypeException(i,
                    "\"" + org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME + "\" "
                            + "expected at function map_build, but "
                            + "\"" + arguments[i].getTypeName() + "\" "
                            + "is found");
        }
    }

    keyArrayOI = (ListObjectInspector) arguments[0];
    valueArrayOI = (ListObjectInspector) arguments[1];

    ObjectInspector mapKeyOI = keyArrayOI.getListElementObjectInspector();
    ObjectInspector mapValueOI = valueArrayOI.getListElementObjectInspector();

    return ObjectInspectorFactory.getStandardMapObjectInspector(mapKeyOI, mapValueOI);
}
 
Example #24
Source File: RandomForestClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerialization() throws HiveException, IOException, ParseException {
    URL url = new URL(
        "https://gist.githubusercontent.com/myui/143fa9d05bd6e7db0114/raw/500f178316b802f1cade6e3bf8dc814a96e84b1e/iris.arff");
    InputStream is = new BufferedInputStream(url.openStream());

    ArffParser arffParser = new ArffParser();
    arffParser.setResponseIndex(4);

    AttributeDataset iris = arffParser.parse(is);
    int size = iris.size();
    double[][] x = iris.toArray(new double[size][]);
    int[] y = iris.toArray(new int[size]);

    final Object[][] rows = new Object[size][2];
    for (int i = 0; i < size; i++) {
        double[] row = x[i];
        final List<String> xi = new ArrayList<String>(x[0].length);
        for (int j = 0; j < row.length; j++) {
            xi.add(j + ":" + row[j]);
        }
        rows[i][0] = xi;
        rows[i][1] = y[i];
    }

    TestUtils.testGenericUDTFSerialization(RandomForestClassifierUDTF.class,
        new ObjectInspector[] {
                ObjectInspectorFactory.getStandardListObjectInspector(
                    PrimitiveObjectInspectorFactory.javaStringObjectInspector),
                PrimitiveObjectInspectorFactory.javaIntObjectInspector,
                ObjectInspectorUtils.getConstantObjectInspector(
                    PrimitiveObjectInspectorFactory.javaStringObjectInspector, "-trees 49")},
        rows);
}
 
Example #25
Source File: BprSamplingUDTF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Override
public StructObjectInspector initialize(@Nonnull ObjectInspector[] argOIs)
        throws UDFArgumentException {
    if (argOIs.length != 2 && argOIs.length != 3) {
        throw new UDFArgumentException(
            "_FUNC_(int userid, array<int> itemid, [, const string options])"
                    + " takes at least two arguments");
    }
    this.userOI = HiveUtils.asIntegerOI(argOIs[0]);
    this.itemListOI = HiveUtils.asListOI(argOIs[1]);
    this.itemElemOI = HiveUtils.asIntegerOI(itemListOI.getListElementObjectInspector());

    processOptions(argOIs);

    this.userId = new IntWritable();
    this.posItemId = new IntWritable();
    this.negItemId = new IntWritable();
    this.forwardObjs = new Object[] {userId, posItemId, negItemId};

    ArrayList<String> fieldNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    fieldNames.add("user");
    fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
    fieldNames.add("pos_item");
    fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);
    fieldNames.add("neg_item");
    fieldOIs.add(PrimitiveObjectInspectorFactory.writableIntObjectInspector);

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #26
Source File: FMPredictGenericUDAF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static StructObjectInspector internalMergeOI() {
    ArrayList<String> fieldNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();

    fieldNames.add("ret");
    fieldOIs.add(PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
    fieldNames.add("sumVjXj");
    fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));
    fieldNames.add("sumV2X2");
    fieldOIs.add(ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableDoubleObjectInspector));

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #27
Source File: UDFMathCosineSimilarityTest.java    From hive-third-functions with Apache License 2.0 5 votes vote down vote up
public Double getResult(Map<String, Double> leftMap, Map<String, Double> rightMap) throws HiveException {
    UDFMathCosineSimilarity udf = new UDFMathCosineSimilarity();

    ObjectInspector leftMapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
    ObjectInspector rightMapOI = ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
    ObjectInspector[] arguments = {leftMapOI, rightMapOI};
    udf.initialize(arguments);

    GenericUDF.DeferredObject leftMapObj = new GenericUDF.DeferredJavaObject(leftMap);
    GenericUDF.DeferredObject rightMapObj = new GenericUDF.DeferredJavaObject(rightMap);
    GenericUDF.DeferredObject[] args = {leftMapObj, rightMapObj};
    DoubleWritable output = (DoubleWritable) udf.evaluate(args);
    return output == null ? null : output.get();
}
 
Example #28
Source File: UDFRe2JRegexpExtractAll.java    From hive-third-functions with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    // Check if two arguments were passed
    if (arguments.length != 2 && arguments.length != 3) {
        throw new UDFArgumentLengthException(
                "The function regexp_extract_all takes exactly 2 or 3 arguments.");
    }

    for (int i = 0; i < 2; i++) {
        if (!ObjectInspectorUtils.compareTypes(PrimitiveObjectInspectorFactory.javaStringObjectInspector, arguments[i])) {
            throw new UDFArgumentTypeException(i,
                    "\"" + PrimitiveObjectInspectorFactory.javaStringObjectInspector.getTypeName() + "\" "
                            + "expected at function regexp_extract_all, but "
                            + "\"" + arguments[i].getTypeName() + "\" "
                            + "is found");
        }
    }

    if (arguments.length == 3) {
        if (!ObjectInspectorUtils.compareTypes(PrimitiveObjectInspectorFactory.javaLongObjectInspector, arguments[2])) {
            throw new UDFArgumentTypeException(2,
                    "\"" + PrimitiveObjectInspectorFactory.javaLongObjectInspector.getTypeName() + "\" "
                            + "expected at function regexp_extract_all, but "
                            + "\"" + arguments[2].getTypeName() + "\" "
                            + "is found");
        }
    }

    ObjectInspector expectOI = PrimitiveObjectInspectorFactory.javaStringObjectInspector;

    return ObjectInspectorFactory.getStandardListObjectInspector(expectOI);
}
 
Example #29
Source File: AUCUDAF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
private static StructObjectInspector internalMergeOI() {
    ArrayList<String> fieldNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();

    fieldNames.add("sum");
    fieldOIs.add(writableDoubleObjectInspector);
    fieldNames.add("count");
    fieldOIs.add(writableLongObjectInspector);

    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
 
Example #30
Source File: LastElementUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void test() throws IOException, HiveException {
    LastElementUDF udf = new LastElementUDF();

    udf.initialize(new ObjectInspector[] {ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.writableDoubleObjectInspector)});

    DeferredObject[] args = new DeferredObject[] {new GenericUDF.DeferredJavaObject(
        WritableUtils.toWritableList(new double[] {0, 1, 2}))};

    Assert.assertEquals(WritableUtils.val(2.d), udf.evaluate(args));

    udf.close();
}