org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo Java Examples

The following examples show how to use org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #2
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { floatInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new FloatWritable(1), new IntWritable(400) });
    eval.iterate(state, new Object[] { new FloatWritable(2), new IntWritable(400) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(400, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);
  }
}
 
Example #3
Source File: DataToDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeStringKeysExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, doubleInspector, intInspector, floatInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkIntermediateResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new Text("a"), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f)});
    eval.iterate(state, new Object[] {new Text("b"), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f)});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 2);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), 32);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(1)), new DoubleSummaryDeserializer());
    // because of sampling probability < 1
    Assert.assertTrue(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);
  }
}
 
Example #4
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial2Mode() throws Exception {
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(new ObjectInspector[] { floatInspector }, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] {binaryInspector});
    checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();

    KllFloatsSketch sketch1 = new KllFloatsSketch();
    sketch1.update(1);
    eval.merge(state, new BytesWritable(sketch1.toByteArray()));

    KllFloatsSketch sketch2 = new KllFloatsSketch();
    sketch2.update(2);
    eval.merge(state, new BytesWritable(sketch2.toByteArray()));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);
  }
}
 
Example #5
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeIntValuesDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    UnionState state = (UnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #6
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeGivenK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0), new IntWritable(256) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 256);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #7
Source File: UnionArrayOfDoublesSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();

  if (inspectors.length < 1) {
    throw new UDFArgumentException("Expected at least 1 argument");
  }
  if (inspectors.length > 3) {
    throw new UDFArgumentTypeException(inspectors.length - 1, "Expected no more than 3 arguments");
  }

  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0, PrimitiveCategory.BINARY);

  // nominal number of entries
  if (inspectors.length > 1) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[1], 1);
  }

  // number of double values per key
  if (inspectors.length > 2) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[2], 2);
  }

  return new UnionArrayOfDoublesSketchEvaluator();
}
 
Example #8
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void mergeTerminate() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] { binaryInspector });
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();
    state.init(256);
    state.update("a");

    ItemsSketch<String> sketch = new ItemsSketch<>(256);
    sketch.update("b");

    eval.merge(state, new BytesWritable(sketch.toByteArray(serDe)));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example #9
Source File: DataToDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(1)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #10
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void iterateTerminatePartial() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("a"), new IntWritable(256) });
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("b"), new IntWritable(256) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example #11
Source File: DataToDoublesSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void partial1ModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoublesSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    checkResultInspector(resultInspector);

    DoublesUnionState state = (DoublesUnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new DoubleWritable(1.0) });
    eval.iterate(state, new Object[] { new DoubleWritable(2.0) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    DoublesSketch resultSketch = DoublesSketch.wrap(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1.0);
    Assert.assertEquals(resultSketch.getMaxValue(), 2.0);
  }
}
 
Example #12
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeDoubleValuesExplicitParameters() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, intConstantInspector, floatConstantInspector, longConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    final long seed = 2;
    UnionState state = (UnionState) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new DoubleWritable(1), new IntWritable(16), new FloatWritable(0.99f), new LongWritable(seed)});
    eval.iterate(state, new Object[] {new DoubleWritable(2), new IntWritable(16), new FloatWritable(0.99f), new LongWritable(seed)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result), seed);
    // because of sampling probability < 1
    Assert.assertTrue(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);
  }
}
 
Example #13
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 6 votes vote down vote up
@Test
public void completeModeDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToDoublesSketchUDAFTest.checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsUnionState<String> state = (ItemsUnionState<String>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("a") });
    eval.iterate(state, new Object[] { new org.apache.hadoop.io.Text("b") });

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), comparator, serDe);
    Assert.assertEquals(resultSketch.getK(), 128);
    Assert.assertEquals(resultSketch.getRetainedItems(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), "a");
    Assert.assertEquals(resultSketch.getMaxValue(), "b");
  }
}
 
Example #14
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = { UDFArgumentTypeException.class })
public void initIvalidCategoryArg3() throws SemanticException {
  UnionSketchUDAF udf = new UnionSketchUDAF();
  GenericUDAFParameterInfo params = new SimpleGenericUDAFParameterInfo(
      new ObjectInspector[] { binaryInspector, intInspector, structInspector }, false, false, false);
  udf.getEvaluator(params);
}
 
Example #15
Source File: UnionDoubleSummaryWithModeSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoubleSummaryWithModeSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToDoubleSummarySketchUDAFTest.checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();

    UpdatableSketch<Double, DoubleSummary> sketch1 =
        new UpdatableSketchBuilder<>(new DoubleSummaryFactory(DoubleSummary.Mode.Sum)).build();
    sketch1.update(1, 1.0);
    sketch1.update(2, 2.0);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.compact().toByteArray())});

    UpdatableSketch<Double, DoubleSummary> sketch2 =
        new UpdatableSketchBuilder<>(new DoubleSummaryFactory(DoubleSummary.Mode.Sum)).build();
    sketch2.update(1, 2.0);
    sketch2.update(2, 1.0);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.compact().toByteArray())});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
    SketchIterator<DoubleSummary> it = resultSketch.iterator();
    while (it.next()) {
      Assert.assertEquals(it.getSummary().getValue(), 3.0);
    }

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #16
Source File: DataToStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void mergeTerminateEmptyState() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { stringInspector, intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToStringsSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL2, new ObjectInspector[] { binaryInspector });
    checkResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    ItemsState<String> state = (ItemsState<String>) eval.getNewAggregationBuffer();

    ItemsSketch<String> sketch1 = new ItemsSketch<>(256);
    sketch1.update("a");
    eval.merge(state, new BytesWritable(sketch1.toByteArray(serDe)));

    ItemsSketch<String> sketch2 = new ItemsSketch<>(256);
    sketch2.update("b");
    eval.merge(state, new BytesWritable(sketch2.toByteArray(serDe)));

    BytesWritable bytes = (BytesWritable) eval.terminate(state);
    ItemsSketch<String> resultSketch = ItemsSketch.getInstance(BytesWritableHelper.wrapAsMemory(bytes), serDe);
    Assert.assertEquals(resultSketch.getStreamLength(), 2);
    Assert.assertEquals(resultSketch.getNumActiveItems(), 2);
    Assert.assertEquals(resultSketch.getEstimate("a"), 1);
    Assert.assertEquals(resultSketch.getEstimate("b"), 1);
  }
}
 
Example #17
Source File: DataToDoubleSummaryWithModeSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeDoubleKeysExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { doubleInspector, doubleInspector, intInspector, floatInspector, stringInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummaryWithModeSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new DoubleWritable(1), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f), new Text("Max")});
    eval.iterate(state, new Object[] {new DoubleWritable(2), new DoubleWritable(2), new IntWritable(32), new FloatWritable(0.99f), new Text("Max")});
    eval.iterate(state, new Object[] {new DoubleWritable(1), new DoubleWritable(2), new IntWritable(32), new FloatWritable(0.99f), new Text("Max")});
    eval.iterate(state, new Object[] {new DoubleWritable(2), new DoubleWritable(1), new IntWritable(32), new FloatWritable(0.99f), new Text("Max")});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    // because of sampling probability < 1
    Assert.assertTrue(resultSketch.isEstimationMode());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.05);
    SketchIterator<DoubleSummary> it = resultSketch.iterator();
    while (it.next()) {
      Assert.assertEquals(it.getSummary().getValue(), 2.0);
    }

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #18
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial1ModeDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToSketchUDAFTest.checkIntermediateResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();

    HllSketch sketch1 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch1.update(1);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.toCompactByteArray())});

    HllSketch sketch2 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch2.update(2);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.toCompactByteArray())});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), SketchEvaluator.DEFAULT_LG_K);
    Assert.assertEquals(((Text) r.get(1)).toString(), SketchEvaluator.DEFAULT_HLL_TYPE.toString());
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertEquals(resultSketch.getLgConfigK(), SketchEvaluator.DEFAULT_LG_K);
    Assert.assertEquals(resultSketch.getTgtHllType(), SketchEvaluator.DEFAULT_HLL_TYPE);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #19
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToSketchUDAFTest.checkFinalResultInspector(resultInspector);

    State state = (State) eval.getNewAggregationBuffer();

    HllSketch sketch1 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch1.update(1);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.toCompactByteArray())});

    HllSketch sketch2 = new HllSketch(SketchEvaluator.DEFAULT_LG_K);
    sketch2.update(2);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.toCompactByteArray())});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #20
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void partial1ModeExplicitParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector, intConstantInspector, stringConstantInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.PARTIAL1, inspectors);
    DataToSketchUDAFTest.checkIntermediateResultInspector(resultInspector);

    final int lgK = 10;
    final TgtHllType hllType = TgtHllType.HLL_6;
    State state = (State) eval.getNewAggregationBuffer();

    HllSketch sketch1 = new HllSketch(lgK, hllType);
    sketch1.update(1);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.toCompactByteArray()),
        new IntWritable(lgK), new Text(hllType.toString())});

    HllSketch sketch2 = new HllSketch(lgK, hllType);
    sketch2.update(2);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.toCompactByteArray()),
        new IntWritable(lgK), new Text(hllType.toString())});

    Object result = eval.terminatePartial(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof List);
    List<?> r = (List<?>) result;
    Assert.assertEquals(r.size(), 3);
    Assert.assertEquals(((IntWritable) r.get(0)).get(), lgK);
    Assert.assertEquals(((Text) r.get(1)).toString(), hllType.toString());
    HllSketch resultSketch = HllSketch.heapify(BytesWritableHelper.wrapAsMemory((BytesWritable) r.get(2)));
    Assert.assertEquals(resultSketch.getLgConfigK(), lgK);
    Assert.assertEquals(resultSketch.getTgtHllType(), hllType);
    Assert.assertEquals(resultSketch.getEstimate(), 2.0, 0.01);
  }
}
 
Example #21
Source File: UnionStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = UDFArgumentTypeException.class)
public void getEvaluatorWrongType() throws Exception {
  ObjectInspector intInspector =
    PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(PrimitiveCategory.INT);

  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  new UnionStringsSketchUDAF().getEvaluator(info);
}
 
Example #22
Source File: UnionStringsSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = UDFArgumentTypeException.class)
public void getEvaluatorWrongCategory() throws Exception {
  ObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(
    Arrays.asList("a"),
    Arrays.asList(binaryInspector)
  );
  ObjectInspector[] inspectors = new ObjectInspector[] { structInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  new UnionStringsSketchUDAF().getEvaluator(info);
}
 
Example #23
Source File: DataToSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test(expectedExceptions = { UDFArgumentTypeException.class })
public void initInvalidTypeArg3() throws SemanticException {
  DataToSketchUDAF udf = new DataToSketchUDAF();
  GenericUDAFParameterInfo params = new SimpleGenericUDAFParameterInfo(
      new ObjectInspector[] { stringInspector, intConstantInspector, intConstantInspector }, false, false, false);
  udf.getEvaluator(params);
}
 
Example #24
Source File: UnionDoubleSummarySketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void finalMode() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionDoubleSummarySketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.FINAL, new ObjectInspector[] {structInspector});
    DataToDoubleSummarySketchUDAFTest.checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();

    UpdatableSketch<Double, DoubleSummary> sketch1 =
        new UpdatableSketchBuilder<>(new DoubleSummaryFactory(DoubleSummary.Mode.Sum)).build();
    sketch1.update(1, 1.0);
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new BytesWritable(sketch1.compact().toByteArray()))
    );

    UpdatableSketch<Double, DoubleSummary> sketch2 =
        new UpdatableSketchBuilder<>(new DoubleSummaryFactory(DoubleSummary.Mode.Sum)).build();
    sketch2.update(2, 1.0);
    eval.merge(state, Arrays.asList(
      new IntWritable(DEFAULT_NOMINAL_ENTRIES),
      new BytesWritable(sketch2.compact().toByteArray()))
    );

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
  }
}
 
Example #25
Source File: DataToDoubleSummaryWithModeSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeIntKeysDefaultParams() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { intInspector, doubleInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new DataToDoubleSummaryWithModeSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    checkFinalResultInspector(resultInspector);

    @SuppressWarnings("unchecked")
    State<DoubleSummary> state = (State<DoubleSummary>) eval.getNewAggregationBuffer();
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(1)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(2)});
    eval.iterate(state, new Object[] {new IntWritable(1), new DoubleWritable(2)});
    eval.iterate(state, new Object[] {new IntWritable(2), new DoubleWritable(1)});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch<DoubleSummary> resultSketch = Sketches.heapifySketch(
        BytesWritableHelper.wrapAsMemory((BytesWritable) result), new DoubleSummaryDeserializer());
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);
    SketchIterator<DoubleSummary> it = resultSketch.iterator();
    while (it.next()) {
      Assert.assertEquals(it.getSummary().getValue(), 3.0);
    }

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #26
Source File: UnionSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModelDefaultK() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new UnionSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToSketchUDAFTest.checkResultInspector(resultInspector);

    SketchState state = (SketchState) eval.getNewAggregationBuffer();

    KllFloatsSketch sketch1 = new KllFloatsSketch();
    sketch1.update(1);
    eval.iterate(state, new Object[] { new BytesWritable(sketch1.toByteArray()) });

    KllFloatsSketch sketch2 = new KllFloatsSketch();
    sketch2.update(2);
    eval.iterate(state, new Object[] { new BytesWritable(sketch2.toByteArray()) });

    BytesWritable bytes = (BytesWritable) eval.terminatePartial(state);
    KllFloatsSketch resultSketch = KllFloatsSketch.heapify(BytesWritableHelper.wrapAsMemory(bytes));
    Assert.assertEquals(resultSketch.getNormalizedRankError(false), KllFloatsSketch.getNormalizedRankError(200, false));
    Assert.assertEquals(resultSketch.getNumRetained(), 2);
    Assert.assertEquals(resultSketch.getMinValue(), 1f);
    Assert.assertEquals(resultSketch.getMaxValue(), 2f);

    eval.reset(state);
    Assert.assertNull(eval.terminate(state));
  }
}
 
Example #27
Source File: IntersectSketchUDAFTest.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Test
public void completeModeDefaultSeed() throws Exception {
  ObjectInspector[] inspectors = new ObjectInspector[] { binaryInspector };
  GenericUDAFParameterInfo info = new SimpleGenericUDAFParameterInfo(inspectors, false, false, false);
  try (GenericUDAFEvaluator eval = new IntersectSketchUDAF().getEvaluator(info)) {
    ObjectInspector resultInspector = eval.init(Mode.COMPLETE, inspectors);
    DataToSketchUDAFTest.checkFinalResultInspector(resultInspector);

    IntersectSketchUDAF.IntersectSketchUDAFEvaluator.IntersectionState state =
        (IntersectSketchUDAF.IntersectSketchUDAFEvaluator.IntersectionState) eval.getNewAggregationBuffer();

    UpdateSketch sketch1 = UpdateSketch.builder().build();
    sketch1.update(1);
    sketch1.update(2);
    sketch1.update(3);
    eval.iterate(state, new Object[] {new BytesWritable(sketch1.compact().toByteArray())});

    UpdateSketch sketch2 = UpdateSketch.builder().build();
    sketch2.update(2);
    sketch2.update(3);
    sketch2.update(4);
    eval.iterate(state, new Object[] {new BytesWritable(sketch2.compact().toByteArray())});

    Object result = eval.terminate(state);
    Assert.assertNotNull(result);
    Assert.assertTrue(result instanceof BytesWritable);
    Sketch resultSketch = Sketches.wrapSketch(BytesWritableHelper.wrapAsMemory((BytesWritable) result));
    Assert.assertEquals(resultSketch.getEstimate(), 2.0);

    eval.reset(state);
    result = eval.terminate(state);
    Assert.assertNull(result);
  }
}
 
Example #28
Source File: DataToArrayOfDoublesSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();

  if (inspectors.length < 2) {
    throw new UDFArgumentException("Expected at least 2 arguments");
  }
  ObjectInspectorValidator.validateCategoryPrimitive(inspectors[0], 0);

  int numValues = 0;
  while ((numValues + 1) < inspectors.length) {
    ObjectInspectorValidator.validateCategoryPrimitive(inspectors[numValues + 1], numValues + 1);
    final PrimitiveObjectInspector primitiveInspector =
        (PrimitiveObjectInspector) inspectors[numValues + 1];
    if (primitiveInspector.getPrimitiveCategory() != PrimitiveCategory.DOUBLE) { break; }
    numValues++;
  }
  if (numValues == 0) {
    throw new UDFArgumentException("Expected at least 1 double value");
  }

  // nominal number of entries
  if (inspectors.length > (numValues + 1)) {
    ObjectInspectorValidator.validateIntegralParameter(inspectors[numValues + 1], numValues + 1);
  }

  // sampling probability
  if (inspectors.length > (numValues + 2)) {
    ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[numValues + 2],
        numValues + 2, PrimitiveCategory.FLOAT);
  }

  // there must be nothing after sampling probability
  if (inspectors.length > (numValues + 3)) {
    throw new UDFArgumentException("Unexpected argument " + (numValues + 4));
  }

  return new DataToArrayOfDoublesSketchEvaluator();
}
 
Example #29
Source File: UnionDoublesSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info) throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
  if ((inspectors.length != 1) && (inspectors.length != 2)) {
    throw new UDFArgumentException("One or two arguments expected");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0, PrimitiveCategory.BINARY);
  if (inspectors.length == 2) {
    ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[1], 1, PrimitiveCategory.INT);
  }
  return new UnionEvaluator();
}
 
Example #30
Source File: DataToDoublesSketchUDAF.java    From incubator-datasketches-hive with Apache License 2.0 5 votes vote down vote up
@Override
public GenericUDAFEvaluator getEvaluator(final GenericUDAFParameterInfo info)
    throws SemanticException {
  final ObjectInspector[] inspectors = info.getParameterObjectInspectors();
  if ((inspectors.length != 1) && (inspectors.length != 2)) {
    throw new UDFArgumentException("One or two arguments expected");
  }
  ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[0], 0,
      PrimitiveCategory.DOUBLE);
  if (inspectors.length == 2) {
    ObjectInspectorValidator.validateGivenPrimitiveCategory(inspectors[1], 1,
        PrimitiveCategory.INT);
  }
  return new DataToSketchEvaluator();
}