Java Code Examples for org.apache.solr.client.solrj.io.Tuple#put()

The following examples show how to use org.apache.solr.client.solrj.io.Tuple#put() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SetValueEvaluator.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public Object doWork(Object... values) throws IOException {
  if(values[0] instanceof Tuple) {
    Tuple tuple = (Tuple)values[0];
    String key = (String)values[1];
    Object value = values[2];
    if(value instanceof String) {
      value = ((String)value).replace("\"", "");
    }
    key = key.replace("\"", "");
    Tuple newTuple = tuple.clone();
    newTuple.put(key, value);
    return newTuple;
  } else {
    throw new IOException("The setValue function expects a Tuple as the first parameter");
  }
}
 
Example 2
Source File: CloudSolrStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected Tuple _read() throws IOException {
  TupleWrapper tw = tuples.pollFirst();
  if(tw != null) {
    Tuple t = tw.getTuple();

    if (trace) {
      t.put("_COLLECTION_", this.collection);
    }

    if(tw.next()) {
      tuples.add(tw);
    }
    return t;
  } else {
    Tuple tuple = Tuple.EOF();
    if(trace) {
      tuple.put("_COLLECTION_", this.collection);
    }
    return tuple;
  }
}
 
Example 3
Source File: DeepRandomStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected Tuple _read() throws IOException {
  if(tuples.size() > 0) {
    TupleWrapper tw = tuples.removeFirst();
    Tuple t = tw.getTuple();

    if (trace) {
      t.put("_COLLECTION_", this.collection);
    }

    if(tw.next()) {
      tuples.addLast(tw);
    }
    return t;
  } else {
    Tuple tuple = Tuple.EOF();
    if(trace) {
      tuple.put("_COLLECTION_", this.collection);
    }
    return tuple;
  }
}
 
Example 4
Source File: JDBCStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public Tuple read() throws IOException {
  
  try {
    Tuple tuple = new Tuple();
    if (resultSet.next()) {
      // we have a record
      for (ResultSetValueSelector selector : valueSelectors) {
        tuple.put(selector.getColumnName(), selector.selectValue(resultSet));
      }
    } else {
      // we do not have a record
      tuple.put(StreamParams.EOF, true);
    }
    
    return tuple;
  } catch (SQLException e) {
    throw new IOException(String.format(Locale.ROOT, "Failed to read next record with error '%s'", e.getMessage()), e);
  }
}
 
Example 5
Source File: ReplaceWithValueOperation.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void replace(Tuple tuple){
  if(null == replacement){
    tuple.remove(fieldName);
  }
  else{
    tuple.put(fieldName, replacement);
  }
}
 
Example 6
Source File: StatsStream.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void fillTuple(Tuple t,
                       @SuppressWarnings({"rawtypes"})NamedList nl,
                       Metric[] _metrics) {

  if(nl == null) {
    return;
  }

  int m = 0;
  for(Metric metric : _metrics) {
    String identifier = metric.getIdentifier();
    if(!identifier.startsWith("count(")) {
      if(nl.get("facet_"+m) != null) {
        Object d = nl.get("facet_" + m);
        if(d instanceof Number) {
          if (metric.outputLong) {
            t.put(identifier, Math.round(((Number)d).doubleValue()));
          } else {
            t.put(identifier, ((Number)d).doubleValue());
          }
        } else {
          t.put(identifier, d);
        }
      }
      ++m;
    } else {
      long l = ((Number)nl.get("count")).longValue();
      t.put("count(*)", l);
    }
  }
}
 
Example 7
Source File: FrequencyTableEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object... values) throws IOException {
  if(Arrays.stream(values).anyMatch(item -> null == item)){
    return null;
  }

  List<?> sourceValues;

  if(values.length == 1){
    sourceValues = values[0] instanceof List<?> ? (List<?>)values[0] : Arrays.asList(values[0]);
  }
  else
  {
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least one value but found %d",toExpression(constructingFactory),containedEvaluators.size()));
  }

  Frequency frequency = new Frequency();

  for(Object o : sourceValues) {
    Number number = (Number)o;
    frequency.addValue(number.longValue());
  }

  List<Tuple> histogramBins = new ArrayList<>();

  @SuppressWarnings({"rawtypes"})
  Iterator iterator = frequency.valuesIterator();

  while(iterator.hasNext()){
    Long value = (Long)iterator.next();
    Tuple tuple = new Tuple();
    tuple.put("value", value.longValue());
    tuple.put("count", frequency.getCount(value));
    tuple.put("cumFreq", frequency.getCumFreq(value));
    tuple.put("cumPct", frequency.getCumPct(value));
    tuple.put("pct", frequency.getPct(value));
    histogramBins.add(tuple);
  }
  return histogramBins;
}
 
Example 8
Source File: ChiSquareDataSetEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object value1, Object value2) throws IOException {

  @SuppressWarnings({"unchecked"})
  List<Number> listA = (List<Number>) value1;
  @SuppressWarnings({"unchecked"})
  List<Number> listB = (List<Number>) value2;

  long[] sampleA = new long[listA.size()];
  long[] sampleB = new long[listB.size()];

  for(int i=0; i<sampleA.length; i++) {
    sampleA[i] = listA.get(i).longValue();
  }

  for(int i=0; i<sampleB.length; i++) {
    sampleB[i] = listB.get(i).longValue();
  }

  ChiSquareTest chiSquareTest = new ChiSquareTest();
  double chiSquare = chiSquareTest.chiSquareDataSetsComparison(sampleA, sampleB);
  double p = chiSquareTest.chiSquareTestDataSetsComparison(sampleA, sampleB);

  Tuple tuple = new Tuple();
  tuple.put("chisquare-statistic", chiSquare);
  tuple.put(StreamParams.P_VALUE, p);
  return tuple;

}
 
Example 9
Source File: GTestDataSetEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object value1, Object value2) throws IOException {

  @SuppressWarnings({"unchecked"})
  List<Number> listA = (List<Number>) value1;
  @SuppressWarnings({"unchecked"})
  List<Number> listB = (List<Number>) value2;

  long[] sampleA = new long[listA.size()];
  long[] sampleB = new long[listB.size()];

  for(int i=0; i<sampleA.length; i++) {
    sampleA[i] = listA.get(i).longValue();
  }

  for(int i=0; i<sampleB.length; i++) {
    sampleB[i] = listB.get(i).longValue();
  }

  GTest gTest = new GTest();
  double g = gTest.gDataSetsComparison(sampleA, sampleB);
  double p = gTest.gTestDataSetsComparison(sampleA, sampleB);

 Tuple tuple = new Tuple();
  tuple.put("G-statistic", g);
  tuple.put(StreamParams.P_VALUE, p);
  return tuple;
}
 
Example 10
Source File: TopicStream.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Tuple read() throws IOException {
  Tuple tuple = _read();

  if(tuple.EOF) {
    if(runCount > 0) {
      tuple.put("sleepMillis", 0);
    } else {
      tuple.put("sleepMillis", 1000);
    }

    return tuple;
  }

  ++count;
  ++runCount;
  if(checkpointEvery > -1 && (count % checkpointEvery) == 0) {
    persistCheckpoints();
  }

  long version = tuple.getLong(VERSION_FIELD);
  String slice = tuple.getString("_SLICE_");
  checkpoints.put(slice, version);

  tuple.remove("_SLICE_");
  tuple.remove("_CORE_");

  return tuple;
}
 
Example 11
Source File: SearchStream.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Tuple read() throws IOException {
  if(documentIterator.hasNext()) {
    Tuple tuple = new Tuple();
    SolrDocument doc = documentIterator.next();
    for(Entry<String, Object> entry : doc.entrySet()) {
      tuple.put(entry.getKey(), entry.getValue());
    }
    return tuple;
  } else {
    return Tuple.EOF();
  }
}
 
Example 12
Source File: DescribeEvaluator.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public Object doWork(Object value) throws IOException {
  
  if(!(value instanceof List<?>)){
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a numeric list but found %s", toExpression(constructingFactory), value.getClass().getSimpleName()));
  }
  
  // we know each value is a BigDecimal or a list of BigDecimals
  DescriptiveStatistics descriptiveStatistics = new DescriptiveStatistics();
  ((List<?>)value).stream().mapToDouble(innerValue -> ((Number)innerValue).doubleValue()).forEach(innerValue -> descriptiveStatistics.addValue(innerValue));

  Tuple tuple = new Tuple();
  tuple.put("max", descriptiveStatistics.getMax());
  tuple.put("mean", descriptiveStatistics.getMean());
  tuple.put("min", descriptiveStatistics.getMin());
  tuple.put("stdev", descriptiveStatistics.getStandardDeviation());
  tuple.put("sum", descriptiveStatistics.getSum());
  tuple.put("N", descriptiveStatistics.getN());
  tuple.put("var", descriptiveStatistics.getVariance());
  tuple.put("kurtosis", descriptiveStatistics.getKurtosis());
  tuple.put("skewness", descriptiveStatistics.getSkewness());
  tuple.put("popVar", descriptiveStatistics.getPopulationVariance());
  tuple.put("geometricMean", descriptiveStatistics.getGeometricMean());
  tuple.put("sumsq", descriptiveStatistics.getSumsq());

  return tuple;
}
 
Example 13
Source File: Node.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public Tuple toTuple(String collection, String field, int level, Traversal traversal) {
  Tuple tuple = new Tuple();

  tuple.put("node", id);
  tuple.put("collection", collection);
  tuple.put("field", field);
  tuple.put("level", level);

  boolean prependCollection = traversal.isMultiCollection();
  List<String> cols = traversal.getCollections();

  if(ancestors != null) {
    List<String> l = new ArrayList<>();
    for(String ancestor : ancestors) {
      String[] ancestorParts = ancestor.split("\\^");

      if(prependCollection) {
        //prepend the collection
        int colIndex = Integer.parseInt(ancestorParts[0]);
        l.add(cols.get(colIndex)+"/"+ancestorParts[1]);
      } else {
        // Use only the ancestor id.
        l.add(ancestorParts[1]);
      }
    }

    tuple.put("ancestors", l);
  }

  if(metrics != null) {
    for(Metric metric : metrics) {
      tuple.put(metric.getIdentifier(), metric.getValue());
    }
  }

  return tuple;
}
 
Example 14
Source File: SelectStream.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public Tuple read() throws IOException {
  Tuple original = stream.read();
  
  if(original.EOF){
    return original;
  }

  // create a copy with the limited set of fields
  Tuple workingToReturn = new Tuple();
  Tuple workingForEvaluators = new Tuple();

  //Clear the TupleContext before running the evaluators.
  //The TupleContext allows evaluators to cache values within the scope of a single tuple.
  //For example a LocalDateTime could be parsed by one evaluator and used by other evaluators within the scope of the tuple.
  //This avoids the need to create multiple LocalDateTime instances for the same tuple to satisfy a select expression.

  streamContext.getTupleContext().clear();

  for(Object fieldName : original.getFields().keySet()){
    workingForEvaluators.put(fieldName, original.get(fieldName));
    if(selectedFields.containsKey(fieldName)){
      workingToReturn.put(selectedFields.get(fieldName), original.get(fieldName));
    }
  }
  
  // apply all operations
  for(StreamOperation operation : operations){
    operation.operate(workingToReturn);
    operation.operate(workingForEvaluators);
  }
  
  // Apply all evaluators
  for(Map.Entry<StreamEvaluator, String> selectedEvaluator : selectedEvaluators.entrySet()) {
    Object o = selectedEvaluator.getKey().evaluate(workingForEvaluators);
    if(o != null) {
      workingForEvaluators.put(selectedEvaluator.getValue(), o);
      workingToReturn.put(selectedEvaluator.getValue(), o);
    }
  }
  
  return workingToReturn;
}
 
Example 15
Source File: FacetStream.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void fillTuples(int level,
                        List<Tuple> tuples,
                        Tuple currentTuple,
                        @SuppressWarnings({"rawtypes"}) NamedList facets,
                        Bucket[] _buckets,
                        Metric[] _metrics) {

  String bucketName = _buckets[level].toString();
  @SuppressWarnings({"rawtypes"})
  NamedList nl = (NamedList)facets.get(bucketName);
  if(nl == null) {
    return;
  }
  @SuppressWarnings({"rawtypes"})
  List allBuckets = (List)nl.get("buckets");
  for(int b=0; b<allBuckets.size(); b++) {
    @SuppressWarnings({"rawtypes"})
    NamedList bucket = (NamedList)allBuckets.get(b);
    Object val = bucket.get("val");
    if (val instanceof Integer) {
      val=((Integer)val).longValue();  // calcite currently expects Long values here
    }
    Tuple t = currentTuple.clone();
    t.put(bucketName, val);
    int nextLevel = level+1;
    if(nextLevel<_buckets.length) {
      fillTuples(nextLevel,
                 tuples,
                 t.clone(),
                 bucket,
                 _buckets,
                 _metrics);
    } else {
      int m = 0;
      for(Metric metric : _metrics) {
        String identifier = metric.getIdentifier();
        if(!identifier.startsWith("count(")) {
          Number d = ((Number)bucket.get("facet_"+m));
          if(metric.outputLong) {
            if (d instanceof Long || d instanceof Integer) {
              t.put(identifier, d.longValue());
            } else {
              t.put(identifier, Math.round(d.doubleValue()));
            }
          } else {
            t.put(identifier, d.doubleValue());
          }
          ++m;
        } else {
          long l = ((Number)bucket.get("count")).longValue();
          t.put("count(*)", l);
        }
      }
      tuples.add(t);
    }
  }
}
 
Example 16
Source File: HistogramEvaluator.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Object doWork(Object... values) throws IOException {
  if(Arrays.stream(values).anyMatch(item -> null == item)){
    return null;
  }
  
  List<?> sourceValues;
  Integer bins = 10;
  
  if(values.length >= 1){
    sourceValues = values[0] instanceof List<?> ? (List<?>)values[0] : Arrays.asList(values[0]); 
          
    if(values.length >= 2){
      if(values[1] instanceof Number){
        bins = ((Number)values[1]).intValue();
      }
      else{
        throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - if second parameter is provided then it must be a valid number but found %s instead",toExpression(constructingFactory), values[1].getClass().getSimpleName()));
      }        
    }      
  }
  else{
    throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting at least one value but found %d",toExpression(constructingFactory),containedEvaluators.size()));
  }

  EmpiricalDistribution distribution = new EmpiricalDistribution(bins);
  distribution.load(((List<?>)sourceValues).stream().mapToDouble(value -> ((Number)value).doubleValue()).toArray());;

  List<Tuple> histogramBins = new ArrayList<>();
  for(SummaryStatistics binSummary : distribution.getBinStats()) {
    Tuple tuple = new Tuple();
    tuple.put("max", binSummary.getMax());
    tuple.put("mean", binSummary.getMean());
    tuple.put("min", binSummary.getMin());
    tuple.put("stdev", binSummary.getStandardDeviation());
    tuple.put("sum", binSummary.getSum());
    tuple.put("N", binSummary.getN());
    tuple.put("var", binSummary.getVariance());
    tuple.put("cumProb", distribution.cumulativeProbability(binSummary.getMean()));
    tuple.put("prob", distribution.probability(binSummary.getMin(), binSummary.getMax()));
    histogramBins.add(tuple);
  }
  
  return histogramBins;
}
 
Example 17
Source File: TextLogitStream.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public Tuple call() throws Exception {
  ModifiableSolrParams params = new ModifiableSolrParams();
  HttpSolrClient solrClient = cache.getHttpSolrClient(baseUrl);

  params.add(DISTRIB, "false");
  params.add("fq","{!tlogit}");
  params.add("feature", feature);
  params.add("terms", TextLogitStream.toString(terms));
  params.add("idfs", TextLogitStream.toString(idfs));

  for(Entry<String, String> entry : paramsMap.entrySet()) {
    params.add(entry.getKey(), entry.getValue());
  }

  if(weights != null) {
    params.add("weights", TextLogitStream.toString(weights));
  }

  params.add("iteration", Integer.toString(iteration));
  params.add("outcome", outcome);
  params.add("positiveLabel", Integer.toString(positiveLabel));
  params.add("threshold", Double.toString(threshold));
  params.add("alpha", Double.toString(learningRate));

  QueryRequest  request= new QueryRequest(params, SolrRequest.METHOD.POST);
  QueryResponse response = request.process(solrClient);
  @SuppressWarnings({"rawtypes"})
  NamedList res = response.getResponse();

  @SuppressWarnings({"rawtypes"})
  NamedList logit = (NamedList)res.get("logit");

  @SuppressWarnings({"unchecked"})
  List<Double> shardWeights = (List<Double>)logit.get("weights");
  double shardError = (double)logit.get("error");

  Tuple tuple = new Tuple();

  tuple.put("error", shardError);
  tuple.put("weights", shardWeights);
  tuple.put("evaluation", logit.get("evaluation"));

  return tuple;
}
 
Example 18
Source File: ClassifyStream.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public Tuple read() throws IOException {
  if (modelTuple == null) {

    modelTuple = modelStream.read();
    if (modelTuple == null || modelTuple.EOF) {
      throw new IOException("Model tuple not found for classify stream!");
    }

    termToIndex = new HashMap<>();

    List<String> terms = modelTuple.getStrings("terms_ss");

    for (int i = 0; i < terms.size(); i++) {
      termToIndex.put(terms.get(i), i);
    }

    idfs = modelTuple.getDoubles("idfs_ds");
    modelWeights = modelTuple.getDoubles("weights_ds");
  }

  Tuple docTuple = docStream.read();
  if (docTuple.EOF) return docTuple;

  String text = docTuple.getString(field);

  double tfs[] = new double[termToIndex.size()];

  TokenStream tokenStream = analyzer.tokenStream(analyzerField, text);
  CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
  tokenStream.reset();

  int termCount = 0;
  while (tokenStream.incrementToken()) {
    termCount++;
    if (termToIndex.containsKey(termAtt.toString())) {
      tfs[termToIndex.get(termAtt.toString())]++;
    }
  }

  tokenStream.end();
  tokenStream.close();

  List<Double> tfidfs = new ArrayList<>(termToIndex.size());
  tfidfs.add(1.0);
  for (int i = 0; i < tfs.length; i++) {
    if (tfs[i] != 0) {
      tfs[i] = 1 + Math.log(tfs[i]);
    }
    tfidfs.add(this.idfs.get(i) * tfs[i]);
  }

  double total = 0.0;
  for (int i = 0; i < tfidfs.size(); i++) {
    total += tfidfs.get(i) * modelWeights.get(i);
  }

  double score = total * ((float) (1.0 / Math.sqrt(termCount)));
  double positiveProb = sigmoid(total);

  docTuple.put("probability_d", positiveProb);
  docTuple.put("score_d",  score);

  return docTuple;
}
 
Example 19
Source File: ReplaceWithFieldOperation.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void replace(Tuple tuple){
  tuple.put(originalFieldName, tuple.get(replacementFieldName));
}
 
Example 20
Source File: Facet2DStream.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void fillTuples(int level, List<Tuple> tuples, Tuple currentTuple,
                        @SuppressWarnings({"rawtypes"})NamedList facets, Bucket x, Bucket y, Metric metric) {
  String bucketXName = x.toString();
  String bucketYName = y.toString();

  @SuppressWarnings({"rawtypes"})
  NamedList allXBuckets = (NamedList) facets.get("x");
  for (int b = 0; b < allXBuckets.size(); b++) {
    @SuppressWarnings({"rawtypes"})
    List buckets = (List) allXBuckets.get("buckets");
    for(int s=0; s<buckets.size(); s++) {

      @SuppressWarnings({"rawtypes"})
      NamedList bucket = (NamedList)buckets.get(s);
      Object val = bucket.get("val");
      if (val instanceof Integer) {
        val = ((Integer) val).longValue();
      }
      Tuple tx = currentTuple.clone();
      tx.put(bucketXName, val);

      @SuppressWarnings({"rawtypes"})
      NamedList allYBuckets = (NamedList) bucket.get("y");
      @SuppressWarnings({"rawtypes"})
      List ybuckets = (List)allYBuckets.get("buckets");

      for (int d = 0; d < ybuckets.size(); d++) {
        @SuppressWarnings({"rawtypes"})
        NamedList bucketY = (NamedList) ybuckets.get(d);
        Object valY = bucketY.get("val");
        if (valY instanceof Integer) {
          valY = ((Integer) valY).longValue();
        }
        Tuple yt = tx.clone();
        yt.put(bucketYName, valY);

        int m = 0;
        String identifier = metric.getIdentifier();
        if (!identifier.startsWith("count(")) {
          Number d1 = (Number) bucketY.get("agg");
          if (metric.outputLong) {
            if (d1 instanceof Long || d1 instanceof Integer) {
              yt.put(identifier, d1.longValue());
            } else {
              yt.put(identifier, Math.round(d1.doubleValue()));
            }
          } else {
            yt.put(identifier, d1.doubleValue());
          }
          ++m;
        } else {
          long l = ((Number)bucketY.get("count")).longValue();
          yt.put("count(*)", l);
        }
        tuples.add(yt);
      }
    }
  }
}