org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector Java Examples

The following examples show how to use org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: GenerateSeriesUDTFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testNegativeStepInt() throws HiveException {
    GenerateSeriesUDTF udtf = new GenerateSeriesUDTF();

    udtf.initialize(
        new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaIntObjectInspector,
                PrimitiveObjectInspectorFactory.writableIntObjectInspector,
                PrimitiveObjectInspectorFactory.javaLongObjectInspector});

    final List<IntWritable> actual = new ArrayList<>();

    udtf.setCollector(new Collector() {
        @Override
        public void collect(Object args) throws HiveException {
            Object[] row = (Object[]) args;
            IntWritable row0 = (IntWritable) row[0];
            actual.add(new IntWritable(row0.get()));
        }
    });

    udtf.process(new Object[] {5, new IntWritable(1), -2L});

    List<IntWritable> expected =
            Arrays.asList(new IntWritable(5), new IntWritable(3), new IntWritable(1));
    Assert.assertEquals(expected, actual);
}
 
Example #2
Source File: MulticlassOnlineClassifierUDTF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Nullable
protected final FeatureValue[] parseFeatures(@Nonnull final List<?> features) {
    final int size = features.size();
    if (size == 0) {
        return null;
    }

    final ObjectInspector featureInspector = featureListOI.getListElementObjectInspector();
    final FeatureValue[] featureVector = new FeatureValue[size];
    for (int i = 0; i < size; i++) {
        Object f = features.get(i);
        if (f == null) {
            continue;
        }
        final FeatureValue fv;
        if (parseFeature) {
            fv = FeatureValue.parse(f);
        } else {
            Object k = ObjectInspectorUtils.copyToStandardObject(f, featureInspector);
            fv = new FeatureValue(k, 1.f);
        }
        featureVector[i] = fv;
    }
    return featureVector;
}
 
Example #3
Source File: BitsCollectUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector init(Mode mode, ObjectInspector[] argOIs) throws HiveException {
    assert (argOIs.length == 1);
    super.init(mode, argOIs);

    // initialize input
    if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {// from original data
        this.inputOI = HiveUtils.asLongCompatibleOI(argOIs[0]);
    } else {// from partial aggregation
        this.mergeOI = HiveUtils.asListOI(argOIs[0]);
        this.mergeListElemOI = HiveUtils.asPrimitiveObjectInspector(
            mergeOI.getListElementObjectInspector());
    }

    // initialize output
    final ObjectInspector outputOI;
    if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {// terminatePartial
        outputOI = ObjectInspectorFactory.getStandardListObjectInspector(
            PrimitiveObjectInspectorFactory.writableLongObjectInspector);
    } else {// terminate
        outputOI = ObjectInspectorFactory.getStandardListObjectInspector(
            PrimitiveObjectInspectorFactory.writableLongObjectInspector);
    }
    return outputOI;
}
 
Example #4
Source File: VectorizeFeaturesUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testTwoArguments() throws HiveException, IOException {
    VectorizeFeaturesUDF udf = new VectorizeFeaturesUDF();
    ObjectInspector[] argOIs = new ObjectInspector[3];
    List<String> featureNames = Arrays.asList("a", "b");
    argOIs[0] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames);
    argOIs[1] = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    argOIs[2] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    udf.initialize(argOIs);

    DeferredObject[] arguments = new DeferredObject[3];
    arguments[1] = new DeferredJavaObject(new Double(0.1));
    arguments[2] = new DeferredJavaObject("1.1");

    List<Text> actuals = udf.evaluate(arguments);
    //System.out.println(actuals);
    List<Text> expected = WritableUtils.val("a:0.1", "b:1.1");
    Assert.assertEquals(expected, actuals);

    udf.close();
}
 
Example #5
Source File: VectorizeFeaturesUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testOneArgument() throws HiveException, IOException {
    VectorizeFeaturesUDF udf = new VectorizeFeaturesUDF();
    ObjectInspector[] argOIs = new ObjectInspector[2];
    List<String> featureNames = Arrays.asList("a");
    argOIs[0] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, featureNames);
    argOIs[1] = PrimitiveObjectInspectorFactory.javaDoubleObjectInspector;
    udf.initialize(argOIs);

    DeferredObject[] arguments = new DeferredObject[2];
    arguments[1] = new DeferredJavaObject(new Double(0.1));

    List<Text> actuals = udf.evaluate(arguments);
    //System.out.println(actuals);
    List<Text> expected = WritableUtils.val(new String[] {"a:0.1"});
    Assert.assertEquals(expected, actuals);

    udf.close();
}
 
Example #6
Source File: StrContainsUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    if (argOIs.length != 2 && argOIs.length != 3) {
        throw new UDFArgumentLengthException("str_contains expects two or three arguments");
    }

    this.queryOI = HiveUtils.asStringOI(argOIs, 0);
    if (!HiveUtils.isStringListOI(argOIs[1])) {
        throw new UDFArgumentTypeException(1,
            "Expected array<string> for the second argument but got "
                    + argOIs[1].getTypeName());
    }
    this.searchTermsOI = HiveUtils.asListOI(argOIs, 1);

    if (argOIs.length == 3) {
        this.orQueryOI = HiveUtils.asBooleanOI(argOIs, 2);
    }

    return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
}
 
Example #7
Source File: IntersectSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info)
    throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
  if (inspectors.length < 1) {
    throw new UDFArgumentException("Please specify at least 1 argument");
  }
  if (inspectors.length > 2) {
    throw new
    UDFArgumentTypeException(inspectors.length - 1, "Please specify no more than 2 arguments");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0,
      PrimitiveCategory.BINARY);
  if (inspectors.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
  }
  return new IntersectSketchUDAFEvaluator();
}
 
Example #8
Source File: UnionDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partia1ModelGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()), new IntWritable(256) });

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #9
Source File: UDAFToOrderedListTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testReverseTopK() throws Exception {
    // = tail-k
    ObjectInspector[] inputOIs =
            new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                    ObjectInspectorUtils.getConstantObjectInspector(
                        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                        "-k 2 -reverse")};

    final String[] values = new String[] {"banana", "apple", "candy"};

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < values.length; i++) {
        evaluator.iterate(agg, new Object[] {values[i]});
    }

    @SuppressWarnings("unchecked")
    List<Object> res = (List<Object>) evaluator.terminate(agg);

    Assert.assertEquals(2, res.size());
    Assert.assertEquals("apple", res.get(0));
    Assert.assertEquals("banana", res.get(1));
}
 
Example #10
Source File: HiveInspectors.java    From flink with Apache License 2.0 6 votes vote down vote up
/**
 * Get an array of ObjectInspector from the give array of args and their types.
 */
public static ObjectInspector[] toInspectors(Object[] args, DataType[] argTypes) {
	assert args.length == argTypes.length;

	ObjectInspector[] argumentInspectors = new ObjectInspector[argTypes.length];

	for (int i = 0; i < argTypes.length; i++) {
		Object constant = args[i];

		if (constant == null) {
			argumentInspectors[i] =
				TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(
					HiveTypeUtil.toHiveTypeInfo(argTypes[i]));
		} else {
			argumentInspectors[i] =
				HiveInspectors.getPrimitiveJavaConstantObjectInspector(
					(PrimitiveTypeInfo) HiveTypeUtil.toHiveTypeInfo(argTypes[i]),
					constant
				);
		}
	}

	return argumentInspectors;
}
 
Example #11
Source File: GuessAttributesUDF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    final StringBuilder buf = new StringBuilder(128);
    final int numArgs = argOIs.length;
    final int last = numArgs - 1;
    for (int i = 0; i < numArgs; i++) {
        if (HiveUtils.isNumberOI(argOIs[i])) {
            buf.append('Q'); // quantitative
        } else {
            buf.append('C'); // categorical            
        }
        if (i != last) {
            buf.append(',');
        }
    }
    String value = buf.toString();
    return ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, value);
}
 
Example #12
Source File: UDFMapBuildTest.java    From hive-third-functions with Apache License 2.0 6 votes vote down vote up
@Test
public void testMapBuild() throws Exception {
    UDFMapBuild udf = new UDFMapBuild();
    ObjectInspector keyArrayOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector valueArrayOI = ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector[] arguments = {keyArrayOI, valueArrayOI};
    udf.initialize(arguments);

    List<String> keyArray = ImmutableList.of("key1", "key2", "key3");
    List<String> valueArray = ImmutableList.of("value1", "value2", "value3");
    DeferredObject keyArrayObj = new DeferredJavaObject(keyArray);
    DeferredObject valueArrayObj = new DeferredJavaObject(valueArray);
    DeferredObject[] args = {keyArrayObj, valueArrayObj};
    LinkedHashMap<String, String> output = (LinkedHashMap<String, String>) udf.evaluate(args);
    LinkedHashMap<String, String> expect = Maps.newLinkedHashMap();
    expect.putAll(ImmutableMap.<String, String>of("key1", "value1", "key2", "value2", "key3", "value3"));

    Assert.assertEquals("map_build() test", true, MapUtils.mapEquals(output, expect));
}
 
Example #13
Source File: TreePredictUDFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
private static double evalPredict(RegressionTree tree, double[] x)
        throws HiveException, IOException {
    byte[] b = tree.serialize(true);
    byte[] encoded = Base91.encode(b);
    Text model = new Text(encoded);

    TreePredictUDF udf = new TreePredictUDF();
    udf.initialize(
        new ObjectInspector[] {PrimitiveObjectInspectorFactory.javaStringObjectInspector,
                PrimitiveObjectInspectorFactory.writableStringObjectInspector,
                ObjectInspectorFactory.getStandardListObjectInspector(
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector),
                ObjectInspectorUtils.getConstantObjectInspector(
                    PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, false)});
    DeferredObject[] arguments = new DeferredObject[] {new DeferredJavaObject("model_id#1"),
            new DeferredJavaObject(model), new DeferredJavaObject(ArrayUtils.toList(x)),
            new DeferredJavaObject(false)};

    DoubleWritable result = (DoubleWritable) udf.evaluate(arguments);
    udf.close();
    return result.get();
}
 
Example #14
Source File: DynamoDBSerDeTest.java    From emr-dynamodb-connector with Apache License 2.0 6 votes vote down vote up
@Test
public void testNull() throws SerDeException {
  List<String> attributeNames = PRIMITIVE_FIELDS.subList(0, 2);
  List<ObjectInspector> colOIs = PRIMITIVE_OIS.subList(0, 2);

  List<String> data = Lists.newArrayList(PRIMITIVE_STRING_DATA.subList(0, 2));
  data.set(1, null);

  Map<String, AttributeValue> expectedItemMap = Maps.newHashMap();
  expectedItemMap.put(attributeNames.get(0), new AttributeValue(data.get(0)));

  List<Object> rowData = Lists.newArrayList();
  rowData.addAll(data);

  // no null serialization
  Map<String, AttributeValue> actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, false);
  assertEquals(expectedItemMap, actualItemMap);

  // with null serialization
  expectedItemMap.put(attributeNames.get(1), new AttributeValue().withNULL(true));
  actualItemMap = getSerializedItem(attributeNames, colOIs, rowData, true);
  assertEquals(expectedItemMap, actualItemMap);
}
 
Example #15
Source File: TestParquetDecimalScaling.java    From presto with Apache License 2.0 5 votes vote down vote up
private static void writeParquetDecimalsRecord(Path output, List<ParquetDecimalInsert> inserts)
{
    List<String> fields = inserts.stream().map(ParquetDecimalInsert::schemaFieldDeclaration).collect(toImmutableList());
    MessageType schema = parseMessageType(format("message hive_record { %s; }", Joiner.on("; ").join(fields)));
    List<ObjectInspector> inspectors = inserts.stream().map(ParquetDecimalInsert::getParquetObjectInspector).collect(toImmutableList());
    List<String> columnNames = inserts.stream().map(ParquetDecimalInsert::getColumnName).collect(toImmutableList());
    Iterator<?>[] values = inserts.stream().map(ParquetDecimalInsert::getValues).map(Iterable::iterator).toArray(Iterator[]::new);

    createParquetFile(
            output,
            getStandardStructObjectInspector(columnNames, inspectors),
            values,
            schema,
            Collections.singletonList("hive_record"));
}
 
Example #16
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial2Mode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] {structInspector});
    DataToSketchUDAFTest.checkIntermediateResultInspector(resultInspector);

    UnionState state = (UnionState) eval.getNewAggregationBuffer();

    UpdateSketch sketch1 = UpdateSketch.builder().build();
    sketch1.update(1);
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new LongWritable(DEFAULT_UPDATE_SEED),
      new BytesWritable(sketch1.compact().toByteArray()))
    );

    UpdateSketch sketch2 = UpdateSketch.builder().build();
    sketch2.update(2);
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new LongWritable(DEFAULT_UPDATE_SEED),
      new BytesWritable(sketch2.compact().toByteArray()))
    );

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), DEFAULT_NOMINAL_ENTRIES);
    Assert.assertEquals(((LongWritable) r.get(1)).get(), DEFAULT_UPDATE_SEED);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #17
Source File: VectorDotUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testSerialization() throws HiveException, IOException {
    TestUtils.testGenericUDFSerialization(VectorDotUDF.class,
        new ObjectInspector[] {
                ObjectInspectorFactory.getStandardListObjectInspector(
                    PrimitiveObjectInspectorFactory.javaDoubleObjectInspector),
                ObjectInspectorFactory.getStandardListObjectInspector(
                    PrimitiveObjectInspectorFactory.javaFloatObjectInspector)},
        new Object[] {Arrays.asList(1.d, 2.d, 3.d), Arrays.asList(2.f, 3.f, 4.f)});
}
 
Example #18
Source File: EthereumUDFTest.java    From hadoopcryptoledger with Apache License 2.0 5 votes vote down vote up
@Test
 public void EthereumGetChainIdUDFNull() throws HiveException {
EthereumGetChainIdUDF egcidUDF = new EthereumGetChainIdUDF();
ObjectInspector[] arguments = new ObjectInspector[1];
arguments[0] =  ObjectInspectorFactory.getReflectionObjectInspector(EthereumTransaction.class,ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
egcidUDF.initialize(arguments);	
assertNull(egcidUDF.evaluate(null),"Null argument to UDF returns null");
 }
 
Example #19
Source File: XmlObjectInspectorFactory.java    From Hive-XML-SerDe with Apache License 2.0 5 votes vote down vote up
/**
 * Returns the standard java object inspector
 * 
 * @param typeInfo
 *            the type info
 * @param xmlProcessor
 *            the XML processor
 * @return the standard java object inspector
 */
public static ObjectInspector getStandardJavaObjectInspectorFromTypeInfo(TypeInfo typeInfo, XmlProcessor xmlProcessor) {
    switch (typeInfo.getCategory()) {
        case PRIMITIVE: {
            return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory());
        }
        case LIST: {
            ObjectInspector listElementObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(((ListTypeInfo) typeInfo).getListElementTypeInfo(),
                xmlProcessor);
            return new XmlListObjectInspector(listElementObjectInspector, xmlProcessor);
        }
        case MAP: {
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            ObjectInspector mapKeyObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo(),
                xmlProcessor);
            ObjectInspector mapValueObjectInspector = getStandardJavaObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo(),
                xmlProcessor);
            return new XmlMapObjectInspector(mapKeyObjectInspector, mapValueObjectInspector, xmlProcessor);
        }
        case STRUCT: {
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<String> structFieldNames = structTypeInfo.getAllStructFieldNames();
            List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
            for (int fieldIndex = 0; fieldIndex < fieldTypeInfos.size(); ++fieldIndex) {
                structFieldObjectInspectors.add(getStandardJavaObjectInspectorFromTypeInfo(fieldTypeInfos.get(fieldIndex), xmlProcessor));
            }
            return getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors, xmlProcessor);
        }
        default: {
            throw new IllegalStateException();
        }
    }
}
 
Example #20
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalMode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    checkFinalResultInspector(resultInspector);

    UnionState state = (UnionState) eval.getNewAggregationBuffer();

    UpdateSketch sketch1 = UpdateSketch.builder().build();
    sketch1.update(1);
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new LongWritable(DEFAULT_UPDATE_SEED),
      new BytesWritable(sketch1.compact().toByteArray()))
    );

    UpdateSketch sketch2 = UpdateSketch.builder().build();
    sketch2.update(2);
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new LongWritable(DEFAULT_UPDATE_SEED),
      new BytesWritable(sketch2.compact().toByteArray()))
    );

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #21
Source File: GetFrequentItemsFromStringsSketchUDTFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@SuppressWarnings("deprecation")
@Test(expectedExceptions = UDFArgumentException.class)
public void initializeTooManyInspectors() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, stringInspector, stringInspector };
  GenericUDTF func = new GetFrequentItemsFromStringsSketchUDTF();
  func.initialize(inspectors);
}
 
Example #22
Source File: GeneralClassifierUDTFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testAdamInvScaleEta() throws IOException, HiveException {
    String filePath = "adam_test_10000.tsv.gz";
    String options =
            "-eta inv -eta0 0.1 -loss logloss -opt Adam -reg l1 -lambda 0.0001 -iter 10 -mini_batch 1 -cv_rate 0.00005";

    GeneralClassifierUDTF udtf = new GeneralClassifierUDTF();

    ListObjectInspector stringListOI = ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    ObjectInspector params = ObjectInspectorUtils.getConstantObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, options);

    udtf.initialize(new ObjectInspector[] {stringListOI,
            PrimitiveObjectInspectorFactory.javaIntObjectInspector, params});

    BufferedReader reader = readFile(filePath);
    for (String line = reader.readLine(); line != null; line = reader.readLine()) {
        StringTokenizer tokenizer = new StringTokenizer(line, " ");

        String featureLine = tokenizer.nextToken();
        List<String> X = Arrays.asList(featureLine.split(","));

        String labelLine = tokenizer.nextToken();
        Integer y = Integer.valueOf(labelLine);

        udtf.process(new Object[] {X, y});
    }

    udtf.finalizeTraining();

    Assert.assertTrue(
        "CumulativeLoss is expected to be less than 900: " + udtf.getCumulativeLoss(),
        udtf.getCumulativeLoss() < 900);
}
 
Example #23
Source File: ST_GeomFromGeoJson.java    From spatial-framework-for-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] arguments)
		throws UDFArgumentException {
	
	if (arguments.length != 1) {
		throw new UDFArgumentLengthException("ST_GeomFromJson takes only one argument");
	}

	ObjectInspector argJsonOI = arguments[0];
	
	if (argJsonOI.getCategory() == Category.PRIMITIVE)
	{
		PrimitiveObjectInspector poi = (PrimitiveObjectInspector)argJsonOI;
		
		if (poi.getPrimitiveCategory() != PrimitiveCategory.STRING)
		{
			throw new UDFArgumentTypeException(0, "ST_GeomFromJson argument category must be either a string primitive or struct");
		}
	} else if (argJsonOI.getCategory() != Category.STRUCT) {
		
	} else {
		throw new UDFArgumentTypeException(0, "ST_GeomFromJson argument category must be either a string primitive or struct");
	}
	
	jsonOI = argJsonOI;

	return GeometryUtils.geometryTransportObjectInspector;
}
 
Example #24
Source File: IndexedFeatures.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(ObjectInspector[] argOIs) throws UDFArgumentException {
    int numArgs = argOIs.length;
    if (numArgs < 1) {
        throw new UDFArgumentLengthException(
            "features(v1, ..) requires at least 1 arguments, got " + argOIs.length);
    }

    this.list = null;
    return ObjectInspectorFactory.getStandardListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector);
}
 
Example #25
Source File: DataToSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);
  mode_ = mode;
  if ((mode == Mode.PARTIAL1) || (mode == Mode.COMPLETE)) {
    // input is original data
    inputInspector_ = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      lgKInspector_ = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      hllTypeInspector_ = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // input for PARTIAL2 and FINAL is the output from PARTIAL1
    intermediateInspector_ = (StructObjectInspector) parameters[0];
  }

  if ((mode == Mode.PARTIAL1) || (mode == Mode.PARTIAL2)) {
    // intermediate results need to include the lgK and the target HLL type
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(LG_K_FIELD, HLL_TYPE_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.STRING),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #26
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void mergeTerminateEmptyState() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] { binaryInspector });
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();

    ItemsSketch<String> sketch1 = new ItemsSketch<>(256);
    sketch1.update("a");
    eval.merge(state, new BytesWritable(sketch1.toByteArray(serDe)));

    ItemsSketch<String> sketch2 = new ItemsSketch<>(256);
    sketch2.update("b");
    eval.merge(state, new BytesWritable(sketch2.toByteArray(serDe)));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example #27
Source File: SigmoidGenericUDF.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Override
public ObjectInspector initialize(@Nonnull ObjectInspector[] argOIs)
        throws UDFArgumentException {
    if (argOIs.length != 1) {
        throw new UDFArgumentException("_FUNC_ takes 1 argument");
    }
    this.argOI = HiveUtils.asDoubleCompatibleOI(argOIs[0]);
    return PrimitiveObjectInspectorFactory.writableFloatObjectInspector;
}
 
Example #28
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
/**
 * Receives the passed in argument object inspectors and returns the desired
 * return type's object inspector to inform hive of return type of UDAF.
 *
 * @param mode
 *          Mode (i.e. PARTIAL 1, COMPLETE...) for determining input/output
 *          object inspector type.
 * @param parameters
 *          List of object inspectors for input arguments.
 * @return The object inspector type indicates the UDAF return type (i.e.
 *         returned type of terminate(...)).
 */
@Override
public ObjectInspector init(final Mode mode, final ObjectInspector[] parameters) throws HiveException {
  super.init(mode, parameters);

  if (mode == Mode.PARTIAL1 || mode == Mode.COMPLETE) {
    inputObjectInspector = (PrimitiveObjectInspector) parameters[0];
    if (parameters.length > 1) {
      nominalEntriesObjectInspector = (PrimitiveObjectInspector) parameters[1];
    }
    if (parameters.length > 2) {
      seedObjectInspector = (PrimitiveObjectInspector) parameters[2];
    }
  } else {
    // mode = partial2 || final
    intermediateObjectInspector = (StandardStructObjectInspector) parameters[0];
  }

  if (mode == Mode.PARTIAL1 || mode == Mode.PARTIAL2) {
    // intermediate results need to include the nominal number of entries and the seed
    return ObjectInspectorFactory.getStandardStructObjectInspector(
      Arrays.asList(NOMINAL_ENTRIES_FIELD, SEED_FIELD, SKETCH_FIELD),
      Arrays.asList(
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.LONG),
        PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY)
      )
    );
  }
  // final results include just the sketch
  return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.BINARY);
}
 
Example #29
Source File: SmartcnUDFTest.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Test
public void testTwoArgument() throws UDFArgumentException, IOException {
    GenericUDF udf = new SmartcnUDF();
    ObjectInspector[] argOIs = new ObjectInspector[2];
    // line
    argOIs[0] = PrimitiveObjectInspectorFactory.javaStringObjectInspector;
    // stopWords
    argOIs[1] = ObjectInspectorFactory.getStandardConstantListObjectInspector(
        PrimitiveObjectInspectorFactory.javaStringObjectInspector, null);
    udf.initialize(argOIs);
    udf.close();
}
 
Example #30
Source File: EsSerDe.java    From elasticsearch-hadoop with Apache License 2.0 5 votes vote down vote up
@Override
public Writable serialize(Object data, ObjectInspector objInspector) throws SerDeException {
    lazyInitializeWrite();

    // serialize the type directly to json (to avoid converting to Writable and then serializing)
    scratchPad.reset();
    hiveType.setObjectInspector(objInspector);
    hiveType.setObject(data);

    // We use the command directly instead of the bulk entry writer since there is no close() method on SerDes.
    // See FileSinkOperator#process() for more info of how this is used with the output format.
    command.write(hiveType).copyTo(scratchPad);
    result.setContent(scratchPad);
    return result;
}