com.tdunning.math.stats.AVLTreeDigest Java Examples

The following examples show how to use com.tdunning.math.stats.AVLTreeDigest. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: OnlineStatisticsProvider.java    From metron with Apache License 2.0 6 votes vote down vote up
@Override
public void read(Kryo kryo, Input input) {
  int digestSize = input.readInt();
  byte[] digestBytes = input.readBytes(digestSize);
  ByteBuffer digestBuff = ByteBuffer.wrap(digestBytes);
  digest = AVLTreeDigest.fromBytes(digestBuff);
  n = input.readLong();
  sum = input.readDouble();
  sumOfSquares = input.readDouble();
  sumOfLogs = input.readDouble();
  min = input.readDouble();
  max = input.readDouble();
  M1 = input.readDouble();
  M2 = input.readDouble();
  M3 = input.readDouble();
  M4 = input.readDouble();
}
 
Example #2
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected Object getValueFromDigest(AVLTreeDigest digest) {
  if (digest == null) {
    return null;
  }

  if (percentiles.size() == 1) {
    return digest.quantile( percentiles.get(0) * 0.01 );
  }

  List<Double> lst = new ArrayList<>(percentiles.size());
  for (Double percentile : percentiles) {
    double val = digest.quantile( percentile * 0.01 );
    lst.add( val );
  }
  return lst;
}
 
Example #3
Source File: TestJsonFacets.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void XtestPercentiles() {
  AVLTreeDigest catA = new AVLTreeDigest(100);
  catA.add(4);
  catA.add(2);

  AVLTreeDigest catB = new AVLTreeDigest(100);
  catB.add(-9);
  catB.add(11);
  catB.add(-5);

  AVLTreeDigest all = new AVLTreeDigest(100);
  all.add(catA);
  all.add(catB);

  System.out.println(str(catA));
  System.out.println(str(catB));
  System.out.println(str(all));

  // 2.0 2.2 3.0 3.8 4.0
  // -9.0 -8.2 -5.0 7.800000000000001 11.0
  // -9.0 -7.3999999999999995 2.0 8.200000000000001 11.0
}
 
Example #4
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected void collectValues(int doc, int slot) throws IOException {
  AVLTreeDigest digest = digests[slot];
  if (digest == null) {
    digests[slot] = digest = new AVLTreeDigest(100);
  }
  long ord;
  while ((ord = values.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
    BytesRef term = values.lookupOrd(ord);
    Object obj = sf.getType().toObject(sf, term);
    double val = obj instanceof Date ? ((Date)obj).getTime(): ((Number)obj).doubleValue();
    digest.add(val);
  }
}
 
Example #5
Source File: PercentileCounter.java    From kylin with Apache License 2.0 5 votes vote down vote up
public int peekLength(ByteBuffer in) {
    int mark = in.position();
    AVLTreeDigest.fromBytes(in);
    int total = in.position() - mark;
    in.position(mark);
    return total;
}
 
Example #6
Source File: TestJsonFacets.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static String str(AVLTreeDigest digest) {
  StringBuilder sb = new StringBuilder();
  for (double d : new double[] {0,.1,.5,.9,1}) {
    sb.append(" ").append(digest.quantile(d));
  }
  return sb.toString();
}
 
Example #7
Source File: StatsValuesFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void updateTypeSpecificStats(@SuppressWarnings({"rawtypes"})NamedList stv) {
  if (computeSum) {
    sum += ((Number) stv.get("sum")).doubleValue();
  }
  if (computeSumOfSquares) {
    sumOfSquares += ((Number) stv.get("sumOfSquares")).doubleValue();
  }

  if (computePercentiles) {
    byte[] data = (byte[]) stv.get("percentiles");
    ByteBuffer buf = ByteBuffer.wrap(data);
    tdigest.add(AVLTreeDigest.fromBytes(buf));
  }
}
 
Example #8
Source File: StatsValuesFactory.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public NumericStatsValues(StatsField statsField) {
  super(statsField);

  this.computeSum = statsField.calculateStats(Stat.sum);
  this.computeSumOfSquares = statsField.calculateStats(Stat.sumOfSquares);

  this.computePercentiles = statsField.calculateStats(Stat.percentiles);
  if ( computePercentiles ) {
    tdigest = new AVLTreeDigest(statsField.getTdigestCompression());
  }

}
 
Example #9
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void merge(Object facetResult, Context mcontext) {
  byte[] arr = (byte[])facetResult;
  if (arr == null) return; // an explicit null can mean no values in the field
  AVLTreeDigest subDigest = AVLTreeDigest.fromBytes(ByteBuffer.wrap(arr));
  if (digest == null) {
    digest = subDigest;
  } else {
    digest.add(subDigest);
  }
}
 
Example #10
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void call(int ord) {
  AVLTreeDigest digest = digests[currentSlot];
  if (digest == null) {
    digests[currentSlot] = digest = new AVLTreeDigest(100);
  }
  try {
    BytesRef term = docToTerm.lookupOrd(ord);
    Object obj = sf.getType().toObject(sf, term);
    double val = obj instanceof Date ? ((Date) obj).getTime() : ((Number) obj).doubleValue();
    digest.add(val);
  } catch (IOException e) {
    throw new UncheckedIOException(e);
  }
}
 
Example #11
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Object getShardValue(int slot) throws IOException {
  AVLTreeDigest digest = digests[slot];
  if (digest == null) return null;

  digest.compress();
  int sz = digest.byteSize();
  if (buf == null || buf.capacity() < sz) {
    buf = ByteBuffer.allocate(sz+(sz>>1));  // oversize by 50%
  } else {
    buf.clear();
  }
  digest.asSmallBytes(buf);
  byte[] arr = Arrays.copyOf(buf.array(), buf.position());
  return arr;
}
 
Example #12
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void fillSortVals() {
  sortvals = new double[ digests.length ];
  double sortp = percentiles.get(0) * 0.01;
  for (int i=0; i<digests.length; i++) {
    AVLTreeDigest digest = digests[i];
    if (digest == null) {
      sortvals[i] = Double.NEGATIVE_INFINITY;
    } else {
      sortvals[i] = digest.quantile(sortp);
    }
  }
}
 
Example #13
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected void collectValues(int doc, int slot) throws IOException {
  AVLTreeDigest digest = digests[slot];
  if (digest == null) {
    digests[slot] = digest = new AVLTreeDigest(100);
  }
  for (int i = 0, count = values.docValueCount(); i < count; i++) {
    double val = getDouble(values.nextValue());
    digest.add(val);
  }
}
 
Example #14
Source File: Trails.java    From log-synth with Apache License 2.0 5 votes vote down vote up
@Override
public void setup() {
    ExecutorService pool = Executors.newFixedThreadPool(1);
    BlockingQueue<State> q = new ArrayBlockingQueue<>(2000);
    input = q;
    pool.submit(new Producer(q));
    speedDistribution = new AVLTreeDigest(300);
    noise = new Random();

    speed = new Stripchart(10, 430, 460, 80, 1, 0, 0, 90);
    rpm = new Stripchart(10, 520, 460, 80, 1, 0, 0, 2200);
    throttle = new Stripchart(10, 610, 460, 80, 1, 0, 0, 100);

    frameRate(15);
}
 
Example #15
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Object getShardValue(int slot) throws IOException {
  AVLTreeDigest digest = digests[slot];
  if (digest == null) return null;  // no values for this slot

  digest.compress();
  int sz = digest.byteSize();
  if (buf == null || buf.capacity() < sz) {
    buf = ByteBuffer.allocate(sz+(sz>>1));  // oversize by 50%
  } else {
    buf.clear();
  }
  digest.asSmallBytes(buf);
  byte[] arr = Arrays.copyOf(buf.array(), buf.position());
  return arr;
}
 
Example #16
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void fillSortVals() {
  sortvals = new double[ digests.length ];
  double sortp = percentiles.get(0) * 0.01;
  for (int i=0; i<digests.length; i++) {
    AVLTreeDigest digest = digests[i];
    if (digest == null) {
      sortvals[i] = Double.NEGATIVE_INFINITY;
    } else {
      sortvals[i] = digest.quantile(sortp);
    }
  }
}
 
Example #17
Source File: ScalableStatistics.java    From kite with Apache License 2.0 5 votes vote down vote up
private static TDigest getDefaultTDigest() {
  //return TDigest.createDigest(COMPRESSION);
  return new AVLTreeDigest(COMPRESSION);
  //return new TreeDigest(COMPRESSION);
  //return new ArrayDigest(4, COMPRESSION);
  //return new MergingDigest(COMPRESSION);    
}
 
Example #18
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Object getShardValue(int slot) throws IOException {
  AVLTreeDigest digest = digests[slot];
  if (digest == null) return null;  // no values for this slot

  digest.compress();
  int sz = digest.byteSize();
  if (buf == null || buf.capacity() < sz) {
    buf = ByteBuffer.allocate(sz+(sz>>1));  // oversize by 50%
  } else {
    buf.clear();
  }
  digest.asSmallBytes(buf);
  byte[] arr = Arrays.copyOf(buf.array(), buf.position());
  return arr;
}
 
Example #19
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void fillSortVals() {
  sortvals = new double[ digests.length ];
  double sortp = percentiles.get(0) * 0.01;
  for (int i=0; i<digests.length; i++) {
    AVLTreeDigest digest = digests[i];
    if (digest == null) {
      sortvals[i] = Double.NEGATIVE_INFINITY;
    } else {
      sortvals[i] = digest.quantile(sortp);
    }
  }
}
 
Example #20
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void collect(int doc, int slotNum, IntFunction<SlotContext> slotContext) throws IOException {
  if (!values.exists(doc)) return;
  double val = values.doubleVal(doc);

  AVLTreeDigest digest = digests[slotNum];
  if (digest == null) {
    digests[slotNum] = digest = new AVLTreeDigest(100);   // TODO: make compression configurable
  }

  digest.add(val);
}
 
Example #21
Source File: PercentileCounter.java    From kylin-on-parquet-v2 with Apache License 2.0 5 votes vote down vote up
public int peekLength(ByteBuffer in) {
    int mark = in.position();
    AVLTreeDigest.fromBytes(in);
    int total = in.position() - mark;
    in.position(mark);
    return total;
}
 
Example #22
Source File: ScalableStatisticsTest.java    From kite with Apache License 2.0 4 votes vote down vote up
private TDigest createCustomTDigest() {
  return new AVLTreeDigest(400);
}
 
Example #23
Source File: Util.java    From t-digest with Apache License 2.0 4 votes vote down vote up
TDigest create(double compression) {
    return new AVLTreeDigest(compression);
}
 
Example #24
Source File: TDigestBench.java    From t-digest with Apache License 2.0 4 votes vote down vote up
@Override
TDigest create(double compression) {
    return new AVLTreeDigest(compression);
}
 
Example #25
Source File: RandomWalkSamplerTest.java    From log-synth with Apache License 2.0 4 votes vote down vote up
@Test
public void testBasics() throws IOException {
    // this sampler has four variables
    // g1 is gamma distributed with alpha = 0.2, beta = 0.2
    // v1 is unit normal
    // v2 is normal with mean = 0, sd = 2
    // v3 is gamma-normal with dof=2, mean = 0.
    SchemaSampler s = new SchemaSampler(Resources.asCharSource(Resources.getResource("schema015.json"), Charsets.UTF_8).read());

    TDigest tdG1 = new AVLTreeDigest(500);
    TDigest tdG2 = new AVLTreeDigest(500);
    TDigest td1 = new AVLTreeDigest(500);
    TDigest td2 = new AVLTreeDigest(500);
    TDigest td3 = new AVLTreeDigest(500);

    double x1 = 0;
    double x2 = 0;
    double x3 = 0;

    for (int i = 0; i < 1000000; i++) {
        JsonNode r = s.sample();
        tdG1.add(r.get("g1").asDouble());
        tdG2.add(r.get("g2").asDouble());

        double step1 = r.get("v1").get("step").asDouble();
        td1.add(step1);
        x1 += step1;
        assertEquals(x1, r.get("v1").get("value").asDouble(), 0);
        assertEquals(x1, r.get("v1-bare").asDouble(), 0);

        double step2 = r.get("v2").get("step").asDouble();
        td2.add(step2);
        x2 += step2;
        assertEquals(x2, r.get("v2").get("value").asDouble(), 0);

        double step3 = r.get("v3").get("step").asDouble();
        td3.add(step3);
        x3 += step3;
        assertEquals(x3, r.get("v3").get("value").asDouble(), 0);
    }

    // now compare against reference distributions to test accuracy of the observed step distributions
    NormalDistribution normalDistribution = new NormalDistribution();
    GammaDistribution gd1 = new GammaDistribution(0.2, 5);
    GammaDistribution gd2 = new GammaDistribution(1, 1);
    TDistribution tDistribution = new TDistribution(2);
    for (double q : new double[]{0.001, 0.01, 0.1, 0.2, 0.5, 0.8, 0.9, 0.99, 0.99}) {
        double uG1 = gd1.cumulativeProbability(tdG1.quantile(q));
        assertEquals(q, uG1, (1 - q) * q * 10e-2);

        double uG2 = gd2.cumulativeProbability(tdG2.quantile(q));
        assertEquals(q, uG2, (1 - q) * q * 10e-2);

        double u1 = normalDistribution.cumulativeProbability(td1.quantile(q));
        assertEquals(q, u1, (1 - q) * q * 10e-2);

        double u2 = normalDistribution.cumulativeProbability(td2.quantile(q) / 2);
        assertEquals(q, u2, (1 - q) * q * 10e-2);

        double u3 = tDistribution.cumulativeProbability(td3.quantile(q));
        assertEquals(q, u3, (1 - q) * q * 10e-2);
    }
}
 
Example #26
Source File: PercentileCounter.java    From kylin-on-parquet-v2 with Apache License 2.0 4 votes vote down vote up
public void readRegisters(ByteBuffer in) {
    registers = AVLTreeDigest.fromBytes(in);
    compression = registers.compression();
}
 
Example #27
Source File: PercentileCounter.java    From kylin with Apache License 2.0 4 votes vote down vote up
public void readRegisters(ByteBuffer in) {
    registers = AVLTreeDigest.fromBytes(in);
    compression = registers.compression();
}
 
Example #28
Source File: OnlineStatisticsProvider.java    From metron with Apache License 2.0 4 votes vote down vote up
public OnlineStatisticsProvider() {
  digest = new AVLTreeDigest(COMPRESSION);
}
 
Example #29
Source File: TestJsonFacets.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/*** test code to ensure TDigest is working as we expect. */

  public void XtestTDigest() throws Exception {
    AVLTreeDigest t1 = new AVLTreeDigest(100);
    t1.add(10, 1);
    t1.add(90, 1);
    t1.add(50, 1);

    System.out.println(t1.quantile(0.1));
    System.out.println(t1.quantile(0.5));
    System.out.println(t1.quantile(0.9));

    assertEquals(t1.quantile(0.5), 50.0, 0.01);

    AVLTreeDigest t2 = new AVLTreeDigest(100);
    t2.add(130, 1);
    t2.add(170, 1);
    t2.add(90, 1);

    System.out.println(t2.quantile(0.1));
    System.out.println(t2.quantile(0.5));
    System.out.println(t2.quantile(0.9));

    AVLTreeDigest top = new AVLTreeDigest(100);

    t1.compress();
    ByteBuffer buf = ByteBuffer.allocate(t1.byteSize()); // upper bound
    t1.asSmallBytes(buf);
    byte[] arr1 = Arrays.copyOf(buf.array(), buf.position());

    ByteBuffer rbuf = ByteBuffer.wrap(arr1);
    top.add(AVLTreeDigest.fromBytes(rbuf));

    System.out.println(top.quantile(0.1));
    System.out.println(top.quantile(0.5));
    System.out.println(top.quantile(0.9));

    t2.compress();
    ByteBuffer buf2 = ByteBuffer.allocate(t2.byteSize()); // upper bound
    t2.asSmallBytes(buf2);
    byte[] arr2 = Arrays.copyOf(buf2.array(), buf2.position());

    ByteBuffer rbuf2 = ByteBuffer.wrap(arr2);
    top.add(AVLTreeDigest.fromBytes(rbuf2));

    System.out.println(top.quantile(0.1));
    System.out.println(top.quantile(0.5));
    System.out.println(top.quantile(0.9));
  }
 
Example #30
Source File: PercentileAgg.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void reset() {
  digests = new AVLTreeDigest[digests.length];
  sortvals = null;
}