Java Code Examples for org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator

The following examples show how to use org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flink   Source File: HiveGenericUDAF.java    License: Apache License 2.0 6 votes vote down vote up
private void init() throws HiveException {
	ObjectInspector[] inputInspectors = HiveInspectors.toInspectors(constantArguments, argTypes);

	// Flink UDAF only supports Hive UDAF's PARTIAL_1 and FINAL mode

	// PARTIAL1: from original data to partial aggregation data:
	// 		iterate() and terminatePartial() will be called.
	this.partialEvaluator = createEvaluator(inputInspectors);
	this.partialResultObjectInspector = partialEvaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputInspectors);

	// FINAL: from partial aggregation to full aggregation:
	// 		merge() and terminate() will be called.
	this.finalEvaluator = createEvaluator(inputInspectors);
	this.finalResultObjectInspector = finalEvaluator.init(
		GenericUDAFEvaluator.Mode.FINAL, new ObjectInspector[]{ partialResultObjectInspector });

	conversions = new HiveObjectConversion[inputInspectors.length];
	for (int i = 0; i < inputInspectors.length; i++) {
		conversions[i] = HiveInspectors.getConversion(inputInspectors[i], argTypes[i].getLogicalType());
	}
	allIdentityConverter = Arrays.stream(conversions)
		.allMatch(conv -> conv instanceof IdentityConversion);

	initialized = true;
}
 
Example 2
Source Project: flink   Source File: HiveGenericUDAFTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUDAFMin() throws Exception {
	Object[] constantArgs = new Object[] {
		null
	};

	DataType[] argTypes = new DataType[] {
		DataTypes.BIGINT()
	};

	HiveGenericUDAF udf = init(GenericUDAFMin.class, constantArgs, argTypes);

	GenericUDAFEvaluator.AggregationBuffer acc = udf.createAccumulator();

	udf.accumulate(acc, 2L);
	udf.accumulate(acc, 3L);
	udf.accumulate(acc, 1L);

	udf.merge(acc, Arrays.asList());

	assertEquals(1L, udf.getValue(acc));
}
 
Example 3
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();

  if (inspectors.length < 1) {
    throw new UDFArgumentException("Expected at least 1 argument");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0, PrimitiveCategory.BINARY);

  // nominal number of entries
  if (inspectors.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
  }

  checkExtraArguments(inspectors);

  return createEvaluator();
}
 
Example 4
@Test
public void completeModeDoubleValuesExplicitParameters() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intConstantInspector, floatConstantInspector, longConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    final long seed = 2;
    UnionState state = (UnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new DoubleWritable(1), new IntWritable(16), new FloatWritable(0.99f), new LongWritable(seed)});
    eval.iterate(state, new Object[] {new DoubleWritable(2), new IntWritable(16), new FloatWritable(0.99f), new LongWritable(seed)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result), seed);
    // because of sampling probability < 1
    Assert.assertTrue(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);
  }
}
 
Example 5
@Test
public void completeModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example 6
Source Project: flink   Source File: HiveGenericUDAFTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testUDAFCount() throws Exception {
	Object[] constantArgs = new Object[] {
		null
	};

	DataType[] argTypes = new DataType[] {
		DataTypes.DOUBLE()
	};

	HiveGenericUDAF udf = init(GenericUDAFCount.class, constantArgs, argTypes);

	GenericUDAFEvaluator.AggregationBuffer acc = udf.createAccumulator();

	udf.accumulate(acc, 0.5d);
	udf.accumulate(acc, 0.3d);
	udf.accumulate(acc, 5.3d);

	udf.merge(acc, Arrays.asList());

	assertEquals(3L, udf.getValue(acc));
}
 
Example 7
Source Project: incubator-hivemall   Source File: MRRUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(0,
            "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
    }
    ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(1,
            "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
    }

    return new Evaluator();
}
 
Example 8
Source Project: incubator-hivemall   Source File: AUCUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    if (HiveUtils.isNumberTypeInfo(typeInfo[0]) && HiveUtils.isIntegerTypeInfo(typeInfo[1])) {
        return new ClassificationEvaluator();
    } else {
        ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
        if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
            throw new UDFArgumentTypeException(0,
                "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
        }

        ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
        if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
            throw new UDFArgumentTypeException(1,
                "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
        }

        return new RankingEvaluator();
    }
}
 
Example 9
Source Project: incubator-hivemall   Source File: RecallUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] typeInfo)
        throws SemanticException {
    if (typeInfo.length != 2 && typeInfo.length != 3) {
        throw new UDFArgumentTypeException(typeInfo.length - 1,
            "_FUNC_ takes two or three arguments");
    }

    ListTypeInfo arg1type = HiveUtils.asListTypeInfo(typeInfo[0]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg1type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(0,
            "The first argument `array rankItems` is invalid form: " + typeInfo[0]);
    }
    ListTypeInfo arg2type = HiveUtils.asListTypeInfo(typeInfo[1]);
    if (!HiveUtils.isPrimitiveTypeInfo(arg2type.getListElementTypeInfo())) {
        throw new UDFArgumentTypeException(1,
            "The second argument `array correctItems` is invalid form: " + typeInfo[1]);
    }

    return new Evaluator();
}
 
Example 10
@Test
public void partia1ModelGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()), new IntWritable(256) });

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example 11
Source Project: incubator-hivemall   Source File: OnehotEncodingUDAF.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(@Nonnull TypeInfo[] argTypes)
        throws SemanticException {
    final int numFeatures = argTypes.length;
    if (numFeatures == 0) {
        throw new UDFArgumentException("_FUNC_ requires at least 1 argument");
    }
    for (int i = 0; i < numFeatures; i++) {
        if (argTypes[i] == null) {
            throw new UDFArgumentTypeException(i,
                "Null type is found. Only primitive type arguments are accepted.");
        }
        if (argTypes[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
            throw new UDFArgumentTypeException(i,
                "Only primitive type arguments are accepted but " + argTypes[i].getTypeName()
                        + " was passed as parameter 1.");
        }
    }

    return new GenericUDAFOnehotEncodingEvaluator();
}
 
Example 12
@Test
public void finalMode() throws Exception {
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(new ObjectInspector[] { doubleInspector }, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {binaryInspector});
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().setK(256).build();
    sketch1.update(1.0);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().setK(256).build();
    sketch2.update(2.0);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example 13
/**
 * Perform argument count check and argument type checking, returns an
 * appropriate evaluator to perform based on input type (which should always
 * be BINARY sketch). Also check sketch size and seed params if they are passed in.
 *
 * @see org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver
 * #getEvaluator(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo)
 *
 * @param info The parameter info to validate
 * @return The GenericUDAFEvaluator to use to compute the function.
 */
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] parameters = info.getParameterObjectInspectors();

  if (parameters.length < 1) {
    throw new UDFArgumentException("Please specify at least 1 argument");
  }

  if (parameters.length > 3) {
    throw new UDFArgumentTypeException(parameters.length - 1, "Please specify no more than 3 arguments");
  }

  ObjectInspectorValidator.validateGivenPrimitiveCategory(parameters[0], 0, PrimitiveCategory.BINARY);

  if (parameters.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(parameters[1], 1);
  }

  if (parameters.length > 2) {
    ObjectInspectorValidator.validateIntegralParameter(parameters[2], 2);
  }
  return new UnionSketchUDAFEvaluator();
}
 
Example 14
@Test
public void mergeTerminate() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] { binaryInspector });
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();
    state.init(256);
    state.update("a");

    ItemsSketch<String> sketch = new ItemsSketch<>(256);
    sketch.update("b");

    eval.merge(state, new BytesWritable(sketch.toByteArray(serDe)));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example 15
@Test
public void partial1ModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example 16
Source Project: incubator-hivemall   Source File: FMeasureUDAFTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testBinaryMultiSamples() throws Exception {
    final int[] actual = {0, 1, 0, 0, 0, 1, 0, 0};
    final int[] predicted = {1, 0, 0, 1, 0, 1, 0, 1};
    double beta = 1.;
    String average = "micro";
    binarySetUp(actual[0], predicted[0], beta, average);

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < actual.length; i++) {
        evaluator.iterate(agg, new Object[] {actual[i], predicted[i]});
    }

    Assert.assertEquals(0.5d, agg.get(), 1e-4);
}
 
Example 17
@Test
public void completeModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example 18
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info)
    throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
  if (inspectors.length < 1) {
    throw new UDFArgumentException("Please specify at least 1 argument");
  }
  if (inspectors.length > 2) {
    throw new
    UDFArgumentTypeException(inspectors.length - 1, "Please specify no more than 2 arguments");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0,
      PrimitiveCategory.BINARY);
  if (inspectors.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
  }
  return new IntersectSketchUDAFEvaluator();
}
 
Example 19
@Test
public void partial2Mode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, inspectors);
    DataToSketchUDAFTest.checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();

    KllFloatsSketch sketch1 = new KllFloatsSketch(400);
    sketch1.update(1);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    KllFloatsSketch sketch2 = new KllFloatsSketch(400);
    sketch2.update(2);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(400, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);
  }
}
 
Example 20
Source Project: incubator-hivemall   Source File: FMeasureUDAFTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = HiveException.class)
public void testMultiLabelNegativeBeta() throws Exception {
    List<Integer> actual = Arrays.asList(1, 3, 2, 6);
    List<Integer> predicted = Arrays.asList(1, 2, 4);
    double beta = -1.0d;
    String average = "micro";
    setUpWithArguments(beta, average);

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    evaluator.iterate(agg, new Object[] {actual, predicted});

    // FMeasure for beta has negative value is not defined
    agg.get();
}
 
Example 21
@Test
public void completeModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsUnionState<String> state = (ItemsUnionState<String>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("a") });
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("b") });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), comparator, serDe);
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), "a");
    Assert.assertEquals(resultSketch.getMaxValue(), "b");
  }
}
 
Example 22
Source Project: incubator-hivemall   Source File: FMeasureUDAFTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test
public void testMultiLabelF1MultiSamples() throws Exception {
    String[][] actual =
            {{"0", "2"}, {"0", "1"}, {"0"}, {"2"}, {"2", "0"}, {"0", "1"}, {"1", "2"}};
    String[][] predicted =
            {{"0", "1"}, {"0", "2"}, {}, {"2"}, {"2", "0"}, {"0", "1", "2"}, {"1"}};

    double beta = 1.0;
    String average = "micro";
    setUpWithArguments(beta, average);

    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < actual.length; i++) {
        evaluator.iterate(agg, new Object[] {actual[i], predicted[i]});
    }

    // should equal to spark's micro f1 measure result
    // https://spark.apache.org/docs/latest/mllib-evaluation-metrics.html#multilabel-classification
    Assert.assertEquals(0.6956d, agg.get(), 1e-4);
}
 
Example 23
Source Project: incubator-hivemall   Source File: FMeasureUDAFTest.java    License: Apache License 2.0 6 votes vote down vote up
@Test(expected = HiveException.class)
public void testMultiLabelFmeasureBinary() throws Exception {
    String[][] actual =
            {{"0", "2"}, {"0", "1"}, {"0"}, {"2"}, {"2", "0"}, {"0", "1"}, {"1", "2"}};
    String[][] predicted =
            {{"0", "1"}, {"0", "2"}, {}, {"2"}, {"2", "0"}, {"0", "1", "2"}, {"1"}};

    double beta = 1.0;
    String average = "binary";

    setUpWithArguments(beta, average);
    evaluator.init(GenericUDAFEvaluator.Mode.PARTIAL1, inputOIs);
    evaluator.reset(agg);

    for (int i = 0; i < actual.length; i++) {
        evaluator.iterate(agg, new Object[] {actual[i], predicted[i]});
    }

    agg.get();
}
 
Example 24
@Test
public void partial2Mode() throws Exception {
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(new ObjectInspector[] { doubleInspector }, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] {binaryInspector});
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();

    UpdateDoublesSketch sketch1 = DoublesSketch.builder().build();
    sketch1.update(1.0);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    UpdateDoublesSketch sketch2 = DoublesSketch.builder().build();
    sketch2.update(2.0);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example 25
@Test
public void completeModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    CpcSketch resultSketch = CpcSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example 26
@Test
public void partial1ModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(1)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 2);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), DEFAULT_NOMINAL_ENTRIES);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(1)), new DoubleSummaryDeserializer());
    Assert.assertFalse(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example 27
Source Project: flink   Source File: HiveGenericUDAF.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * This is invoked without calling open() in Blink, so we need to call init() for getNewAggregationBuffer().
 * TODO: re-evaluate how this will fit into Flink's new type inference and udf systemß
 */
@Override
public GenericUDAFEvaluator.AggregationBuffer createAccumulator() {
	try {
		if (!initialized) {
			init();
		}
		return partialEvaluator.getNewAggregationBuffer();
	} catch (Exception e) {
		throw new FlinkHiveUDFException(
			String.format("Failed to create accumulator for %s", hiveFunctionWrapper.getClassName()), e);
	}
}
 
Example 28
Source Project: flink   Source File: HiveGenericUDAF.java    License: Apache License 2.0 5 votes vote down vote up
public void accumulate(GenericUDAFEvaluator.AggregationBuffer acc, Object... inputs) throws HiveException {
	if (!allIdentityConverter) {
		for (int i = 0; i < inputs.length; i++) {
			inputs[i] = conversions[i].toHiveObject(inputs[i]);
		}
	}

	partialEvaluator.iterate(acc, inputs);
}
 
Example 29
Source Project: flink   Source File: HiveGenericUDAF.java    License: Apache License 2.0 5 votes vote down vote up
public void merge(
		GenericUDAFEvaluator.AggregationBuffer accumulator,
		Iterable<GenericUDAFEvaluator.AggregationBuffer> its) throws HiveException {

	for (GenericUDAFEvaluator.AggregationBuffer buffer : its) {
		finalEvaluator.merge(
			accumulator, partialEvaluator.terminatePartial(buffer));
	}
}
 
Example 30
@Test
public void completeModelDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToSketchUDAFTest.checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();

    KllFloatsSketch sketch1 = new KllFloatsSketch();
    sketch1.update(1);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()) });

    KllFloatsSketch sketch2 = new KllFloatsSketch();
    sketch2.update(2);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(200, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);

    eval.reset(state);
    Assert.assertNull(eval.terminate(state));
  }
}