org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: FMeasureUDAFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryMultiSamples() throws Exception {
    final int[] actual = {0, 1, 0, 0, 0, 1, 0, 0};
    final int[] predicted = {1, 0, 0, 1, 0, 1, 0, 1};
    double beta = 1.;
    String average = "micro";
    binarySetUp(actual[0], predicted[0], beta, average);

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < actual.length; i++) {
        evaluator.iterate(agg, new Object[] {actual[i], predicted[i]});
    }

    Assert.assertEquals(0.5d, agg.get(), 1e-4);
}
 
Example #2
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeDoubleValuesExplicitParameters() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intConstantInspector, floatConstantInspector, longConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    final long seed = 2;
    UnionState state = (UnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new DoubleWritable(1), new IntWritable(16), new FloatWritable(0.99f), new LongWritable(seed)});
    eval.iterate(state, new Object[] {new DoubleWritable(2), new IntWritable(16), new FloatWritable(0.99f), new LongWritable(seed)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result), seed);
    // because of sampling probability < 1
    Assert.assertTrue(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);
  }
}
 
Example #3
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void finalMode() throws Exception {
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(new ObjectInspector[] { doubleInspector }, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {binaryInspector});
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #4
Source File: FMeasureUDAFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiLabelF1MultiSamples() throws Exception {
    String[][] actual =
            {{"0", "2"}, {"0", "1"}, {"0"}, {"2"}, {"2", "0"}, {"0", "1"}, {"1", "2"}};
    String[][] predicted =
            {{"0", "1"}, {"0", "2"}, {}, {"2"}, {"2", "0"}, {"0", "1", "2"}, {"1"}};

    double beta = 1.0;
    String average = "micro";
    setUpWithArguments(beta, average);

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < actual.length; i++) {
        evaluator.iterate(agg, new Object[] {actual[i], predicted[i]});
    }

    // should equal to spark's micro f1 measure result
    // https://spark.apache.org/docs/latest/mllib-evaluation-metrics.html#multilabel-classification
    Assert.assertEquals(0.6956d, agg.get(), 1e-4);
}
 
Example #5
Source File: FMeasureUDAFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test(expected = HiveException.class)
public void testMultiLabelFmeasureBinary() throws Exception {
    String[][] actual =
            {{"0", "2"}, {"0", "1"}, {"0"}, {"2"}, {"2", "0"}, {"0", "1"}, {"1", "2"}};
    String[][] predicted =
            {{"0", "1"}, {"0", "2"}, {}, {"2"}, {"2", "0"}, {"0", "1", "2"}, {"1"}};

    double beta = 1.0;
    String average = "binary";

    setUpWithArguments(beta, average);
    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < actual.length; i++) {
        evaluator.iterate(agg, new Object[] {actual[i], predicted[i]});
    }

    agg.get();
}
 
Example #6
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();

  if (inspectors.length < 1) {
    throw new UDFArgumentException("Expected at least 1 argument");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0, PrimitiveCategory.BINARY);

  // nominal number of entries
  if (inspectors.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
  }

  checkExtraArguments(inspectors);

  return createEvaluator();
}
 
Example #7
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial2Mode() throws Exception {
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(new ObjectInspector[] { doubleInspector }, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] {binaryInspector});
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().build();
    sketch1.update(1.0);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().build();
    sketch2.update(2.0);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #8
Source File: HiveGenericUDAF.java    From flink with Apache License 2.0 6 votes vote down vote up
private void init() throws HiveException {
	ObjectInspector[] inputInspectors = HiveInspectors.toInspectors(constantArguments, argTypes);

	// Flink UDAF only supports Hive UDAF's PARTIAL_1 and FINAL mode

	// PARTIAL1: from original data to partial aggregation data:
	// 		iterate() and terminatePartial() will be called.
	this.partialEvaluator = createEvaluator(inputInspectors);
	this.partialResultObjectInspector = partialEvaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputInspectors);

	// FINAL: from partial aggregation to full aggregation:
	// 		merge() and terminate() will be called.
	this.finalEvaluator = createEvaluator(inputInspectors);
	this.finalResultObjectInspector = finalEvaluator.init(
		GenericUDAFEvaluator.Mode.FINAL, new ObjectInspector[]{ partialResultObjectInspector });

	conversions = new HiveObjectConversion[inputInspectors.length];
	for (int i = 0; i < inputInspectors.length; i++) {
		conversions[i] = HiveInspectors.getConversion(inputInspectors[i], argTypes[i].getLogicalType());
	}
	allIdentityConverter = Arrays.stream(conversions)
		.allMatch(conv -> conv instanceof IdentityConversion);

	initialized = true;
}
 
Example #9
Source File: UnionSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
/**
 * Perform argument count check and argument type checking, returns an
 * appropriate evaluator to perform based on input type (which should always
 * be BINARY sketch). Also check sketch size and seed params if they are passed in.
 *
 * @see org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver
 * #getEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo)
 *
 * @param info The parameter info to validate
 * @return The GenericUDAFEvaluator to use to compute the function.
 */
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] parameters = info.getParameterObjectInspectors();

  if (parameters.length < 1) {
    throw new UDFArgumentException("Please specify at least 1 argument");
  }

  if (parameters.length > 3) {
    throw new UDFArgumentTypeException(parameters.length - 1, "Please specify no more than 3 arguments");
  }

  ObjectInspectorValidator.validateGivenPrimitiveCategory(parameters[0], 0, PrimitiveCategory.BINARY);

  if (parameters.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(parameters[1], 1);
  }

  if (parameters.length > 2) {
    ObjectInspectorValidator.validateIntegralParameter(parameters[2], 2);
  }
  return new UnionSketchUDAFEvaluator();
}
 
Example #10
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    CpcSketch resultSketch = CpcSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #11
Source File: DataToDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(1)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 2);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), DEFAULT_NOMINAL_ENTRIES);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(1)), new DoubleSummaryDeserializer());
    Assert.assertFalse(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #12
Source File: HiveGenericUDAFTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUDAFMin() throws Exception {
	Object[] constantArgs = new Object[] {
		null
	};

	DataType[] argTypes = new DataType[] {
		DataTypes.BIGINT()
	};

	HiveGenericUDAF udf = init(GenericUDAFMin.class, constantArgs, argTypes);

	GenericUDAFEvaluator.AggregationBuffer acc = udf.createAccumulator();

	udf.accumulate(acc, 2L);
	udf.accumulate(acc, 3L);
	udf.accumulate(acc, 1L);

	udf.merge(acc, Arrays.asList());

	assertEquals(1L, udf.getValue(acc));
}
 
Example #13
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void mergeTerminate() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] { binaryInspector });
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();
    state.init(256);
    state.update("a");

    ItemsSketch<String> sketch = new ItemsSketch<>(256);
    sketch.update("b");

    eval.merge(state, new BytesWritable(sketch.toByteArray(serDe)));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example #14
Source File: HiveGenericUDAFTest.java    From flink with Apache License 2.0 6 votes vote down vote up
@Test
public void testUDAFCount() throws Exception {
	Object[] constantArgs = new Object[] {
		null
	};

	DataType[] argTypes = new DataType[] {
		DataTypes.DOUBLE()
	};

	HiveGenericUDAF udf = init(GenericUDAFCount.class, constantArgs, argTypes);

	GenericUDAFEvaluator.AggregationBuffer acc = udf.createAccumulator();

	udf.accumulate(acc, 0.5d);
	udf.accumulate(acc, 0.3d);
	udf.accumulate(acc, 5.3d);

	udf.merge(acc, Arrays.asList());

	assertEquals(3L, udf.getValue(acc));
}
 
Example #15
Source File: MRRUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(0,
            "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
    }
    ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(1,
            "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
    }

    return new Evaluator();
}
 
Example #16
Source File: AUCUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    if (HiveUtils.isNumberTypeInfo(typeInfo[0]) && HiveUtils.isIntegerTypeInfo(typeInfo[1])) {
        return new ClassificationEvaluator();
    } else {
        ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
        if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
            throw new UDFArgumentTypeException(0,
                "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
        }

        ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
        if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
            throw new UDFArgumentTypeException(1,
                "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
        }

        return new RankingEvaluator();
    }
}
 
Example #17
Source File: FMeasureUDAFTest.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Test(expected = HiveException.class)
public void testMultiLabelNegativeBeta() throws Exception {
    List<Integer> actual = Arrays.asList(1, 3, 2, 6);
    List<Integer> predicted = Arrays.asList(1, 2, 4);
    double beta = -1.0d;
    String average = "micro";
    setUpWithArguments(beta, average);

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    evaluator.iterate(agg, new Object[] {actual, predicted});

    // FMeasure for beta has negative value is not defined
    agg.get();
}
 
Example #18
Source File: RecallUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(0,
            "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
    }
    ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(1,
            "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
    }

    return new Evaluator();
}
 
Example #19
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial2Mode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, inspectors);
    DataToSketchUDAFTest.checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();

    KllFloatsSketch sketch1 = new KllFloatsSketch(400);
    sketch1.update(1);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    KllFloatsSketch sketch2 = new KllFloatsSketch(400);
    sketch2.update(2);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(400, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);
  }
}
 
Example #20
Source File: UnionDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partia1ModelGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()), new IntWritable(256) });

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #21
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsUnionState<String> state = (ItemsUnionState<String>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("a") });
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("b") });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), comparator, serDe);
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), "a");
    Assert.assertEquals(resultSketch.getMaxValue(), "b");
  }
}
 
Example #22
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #23
Source File: IntersectSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info)
    throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
  if (inspectors.length < 1) {
    throw new UDFArgumentException("Please specify at least 1 argument");
  }
  if (inspectors.length > 2) {
    throw new
    UDFArgumentTypeException(inspectors.length - 1, "Please specify no more than 2 arguments");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0,
      PrimitiveCategory.BINARY);
  if (inspectors.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
  }
  return new IntersectSketchUDAFEvaluator();
}
 
Example #24
Source File: OnehotEncodingUDAF.java    From incubator-hivemall with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] argTypes)
        throws SemanticException {
    final int numFeatures = argTypes.length;
    if (numFeatures == 0) {
        throw new UDFArgumentException("_FUNC_ requires at least 1 argument");
    }
    for (int i = 0; i < numFeatures; i++) {
        if (argTypes[i] == null) {
            throw new UDFArgumentTypeException(i,
                "Null type is found. Only primitive type arguments are accepted.");
        }
        if (argTypes[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(i,
                "Only primitive type arguments are accepted but " + argTypes[i].getTypeName()
                        + " was passed as parameter 1.");
        }
    }

    return new GenericUDAFOnehotEncodingEvaluator();
}
 
Example #25
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #26
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #27
Source File: DataToDoubleSummaryWithModeSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeCheckTrimmingToNominal() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummaryWithModeSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    for (int i = 0; i < 10000; i++) {
      eval.iterate(state, new Object[] {new IntWritable(i), new DoubleWritable(1)});
    }

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 10000.0, 10000 * 0.03);
    Assert.assertTrue(resultSketch.getRetainedEntries() <= 4096, "retained entries: " + resultSketch.getRetainedEntries());

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #28
Source File: MergeTest.java    From hive-funnel-udf with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentTypeException.class)
public void testArrayOfNonPrimitives() throws HiveException {
    Merge udaf = new Merge();
    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
            ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector))
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
}
 
Example #29
Source File: UDAFToOrderedList.java    From incubator-hivemall with Apache License 2.0 5 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info)
        throws SemanticException {
    @SuppressWarnings("deprecation")
    TypeInfo[] typeInfo = info.getParameters();
    ObjectInspector[] argOIs = info.getParameterObjectInspectors();
    if ((typeInfo.length == 1)
            || (typeInfo.length == 2 && HiveUtils.isConstString(argOIs[1]))) {
        // sort values by value itself w/o key
        if (typeInfo[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(0,
                "Only primitive type arguments are accepted for value but "
                        + typeInfo[0].getTypeName() + " was passed as the first parameter.");
        }
    } else if ((typeInfo.length == 2)
            || (typeInfo.length == 3 && HiveUtils.isConstString(argOIs[2]))) {
        // sort values by key
        if (typeInfo[1].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(1,
                "Only primitive type arguments are accepted for key but "
                        + typeInfo[1].getTypeName() + " was passed as the second parameter.");
        }
    } else {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "Number of arguments must be in [1, 3] including constant string for options: "
                    + typeInfo.length);
    }
    return new UDAFToOrderedListEvaluator();
}
 
Example #30
Source File: FunnelTest.java    From hive-funnel-udf with Apache License 2.0 5 votes vote down vote up
@Test(expected = UDFArgumentTypeException.class)
public void testComplexParamPosition2() throws HiveException {
    Funnel udaf = new Funnel();
    ObjectInspector[] inputObjectInspectorList = new ObjectInspector[]{
        PrimitiveObjectInspectorFactory.javaStringObjectInspector,
        ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaLongObjectInspector),
        PrimitiveObjectInspectorFactory.javaStringObjectInspector
    };

    GenericUDAFParameterInfo paramInfo = new SimpleGenericUDAFParameterInfo(inputObjectInspectorList, false, false);
    GenericUDAFEvaluator udafEvaluator = udaf.getEvaluator(paramInfo);
}