org.apache.lucene.index.TermContext Java Exaples

Source File: BlendedTermQuery.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public Query rewrite(IndexReader reader) throws IOException {
    IndexReaderContext context = reader.getContext();
    TermContext[] ctx = new TermContext[terms.length];
    int[] docFreqs = new int[ctx.length];
    for (int i = 0; i < terms.length; i++) {
        ctx[i] = TermContext.build(context, terms[i]);
        docFreqs[i] = ctx[i].docFreq();
    }

    final int maxDoc = reader.maxDoc();
    blend(ctx, maxDoc, reader);
    Query query = topLevelQuery(terms, ctx, docFreqs, maxDoc);
    query.setBoost(getBoost());
    return query;
}

Source File: BlendedTermQuery.java From Elasticsearch with Apache License 2.0

6 votes

private TermContext adjustTTF(TermContext termContext, long sumTTF) {
    if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
        return termContext;
    }
    TermContext newTermContext = new TermContext(termContext.topReaderContext);
    List<LeafReaderContext> leaves = termContext.topReaderContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    int df = termContext.docFreq();
    long ttf = sumTTF;
    for (int i = 0; i < len; i++) {
        TermState termState = termContext.get(i);
        if (termState == null) {
            continue;
        }
        newTermContext.register(termState, i, df, ttf);
        df = 0;
        ttf = 0;
    }
    return newTermContext;
}

Source File: MtasSpanPrecededByQuery.java From mtas with Apache License 2.0

5 votes

/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanPrecededByQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanPrecededByQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}

Source File: MtasSpanNotQuery.java From mtas with Apache License 2.0

5 votes

/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanNotQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanNotQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}

Source File: MtasSpanFullyAlignedWithQuery.java From mtas with Apache License 2.0

5 votes

/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanFullyAlignedWithQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanFullyAlignedWithQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}

Source File: MtasSpanIntersectingQuery.java From mtas with Apache License 2.0

5 votes

/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanIntersectingQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanIntersectingQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}

Source File: MtasSpanMatchAllQuery.java From mtas with Apache License 2.0

5 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  Term term = new Term(field);
  if (!contexts.containsKey(term)) {
    IndexReaderContext topContext = searcher.getTopReaderContext();
    try {
      contexts.put(term, TermContext.build(topContext, term));
    } catch (IOException e) {
      log.debug(e);
      // fail
    }
  }
}

Source File: MtasSpanSequenceQuery.java From mtas with Apache License 2.0

5 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  for (MtasSpanSequenceQueryWeight w : subWeights) {
    w.spanWeight.extractTermContexts(contexts);
  }
  if (ignoreWeight != null) {
    ignoreWeight.extractTermContexts(contexts);
  }
}

Source File: MtasExtendedSpanTermQuery.java From mtas with Apache License 2.0

5 votes

@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost)
    throws IOException {
  final TermContext context;
  final IndexReaderContext topContext = searcher.getTopReaderContext();
  if (termContext == null) {
    context = TermContext.build(topContext, localTerm);
  } else {
    context = termContext;
  }
  return new SpanTermWeight(context, searcher,
      needsScores ? Collections.singletonMap(localTerm, context) : null, boost);
}

Source File: MtasSpanSequenceQuery.java From mtas with Apache License 2.0

5 votes

/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanSequenceQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanSequenceQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}

Source File: MtasSpanFollowedByQuery.java From mtas with Apache License 2.0

5 votes

/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanFollowedByQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanFollowedByQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}

Source File: CustomSpanWeight.java From pyramid with Apache License 2.0

5 votes

private Similarity.SimWeight buildSimWeight(CustomSpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts) throws IOException {
  if (termContexts == null || termContexts.size() == 0 || query.getField() == null)
    return null;
  TermStatistics[] termStats = new TermStatistics[termContexts.size()];
  int i = 0;
  for (Term term : termContexts.keySet()) {
    termStats[i] = searcher.termStatistics(term, termContexts.get(term));
    i++;
  }
  CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
  return similarity.computeWeight(collectionStats, termStats);
}

Source File: CustomSpanQuery.java From pyramid with Apache License 2.0

5 votes

/**
 * Build a map of terms to termcontexts, for use in constructing SpanWeights
 * @lucene.internal
 */
public static Map<Term, TermContext> getTermContexts(CustomSpanWeight... weights) {
  Map<Term, TermContext> terms = new TreeMap<>();
  for (CustomSpanWeight w : weights) {
    w.extractTermContexts(terms);
  }
  return terms;
}

Source File: CustomSpanQuery.java From pyramid with Apache License 2.0

5 votes

/**
 * Build a map of terms to termcontexts, for use in constructing SpanWeights
 * @lucene.internal
 */
public static Map<Term, TermContext> getTermContexts(Collection<CustomSpanWeight> weights) {
  Map<Term, TermContext> terms = new TreeMap<>();
  for (CustomSpanWeight w : weights) {
    w.extractTermContexts(terms);
  }
  return terms;
}

Source File: AbstractAuthorityQueryWeight.java From SearchServices with GNU Lesser General Public License v3.0

5 votes

public AbstractAuthorityQueryWeight(SolrIndexSearcher searcher, boolean needsScores, Query query, String authTermName, String authTermText) throws IOException
{
	super(query);
    this.searcher = searcher;
    searcher.collectionStatistics(authTermName);
    final IndexReaderContext context = searcher.getTopReaderContext();
    final Term term = new Term(authTermName, authTermText);
    final TermContext termContext = TermContext.build(context, term);
    searcher.termStatistics(term, termContext);
    this.needsScores = needsScores;
}

Source File: ExtendedCommonTermsQuery.java From Elasticsearch with Apache License 2.0

5 votes

@Override
protected Query newTermQuery(Term term, TermContext context) {
    if (fieldType == null) {
        return super.newTermQuery(term, context);
    }
    final Query query = fieldType.queryStringTermQuery(term);
    if (query == null) {
        return super.newTermQuery(term, context);
    } else {
        return query;
    }
}

Source File: BlendedTermQuery.java From Elasticsearch with Apache License 2.0

5 votes

private static TermContext adjustDF(TermContext ctx, int newDocFreq) {
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    }
    List<LeafReaderContext> leaves = ctx.topReaderContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    TermContext newCtx = new TermContext(ctx.topReaderContext);
    for (int i = 0; i < len; ++i) {
        TermState termState = ctx.get(i);
        if (termState == null) {
            continue;
        }
        newCtx.register(termState, i, newDocFreq, newTTF);
        newDocFreq = 0;
        newTTF = 0;
    }
    return newCtx;
}

Source File: MtasSpanStartQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  spanWeight.extractTermContexts(contexts);
}

Source File: MtasDisabledTwoPhaseIteratorSpanQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}

Source File: MtasExtendedSpanTermQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  contexts.put(localTerm, termContext);
}

Source File: MtasSpanUniquePositionQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}

Source File: MtasMaximumExpandSpanQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}

Source File: MtasExpandSpanQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}

Source File: MtasSpanFullyAlignedWithQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  w1.spanWeight.extractTermContexts(contexts);
  w2.spanWeight.extractTermContexts(contexts);
}

Source File: MtasSpanPrecededByQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  w1.spanWeight.extractTermContexts(contexts);
  w2.spanWeight.extractTermContexts(contexts);
}

Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

4 votes

public SpanPayloadCheckWeight(IndexSearcher searcher, Map<Term, TermContext> termContexts,
                              SpanWeight matchWeight, float boost) throws IOException {
    super(CustomSpanPayloadCheckQuery.this, searcher, termContexts, boost);
    this.matchWeight = matchWeight;
}

Source File: CustomSpanPayloadCheckQuery.java From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
    matchWeight.extractTermContexts(contexts);
}

Source File: MtasSpanNotQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  w1.spanWeight.extractTermContexts(contexts);
  w2.spanWeight.extractTermContexts(contexts);
}

Source File: BlendedTermQuery.java From Elasticsearch with Apache License 2.0

4 votes

protected void blend(final TermContext[] contexts, int maxDoc, IndexReader reader) throws IOException {
    if (contexts.length <= 1) {
        return;
    }
    int max = 0;
    long minSumTTF = Long.MAX_VALUE;
    for (int i = 0; i < contexts.length; i++) {
        TermContext ctx = contexts[i];
        int df = ctx.docFreq();
        // we use the max here since it's the only "true" estimation we can make here
        // at least max(df) documents have that term. Sum or Averages don't seem
        // to have a significant meaning here.
        // TODO: Maybe it could also make sense to assume independent distributions of documents and eg. have:
        //   df = df1 + df2 - (df1 * df2 / maxDoc)?
        max = Math.max(df, max);
        if (minSumTTF != -1 && ctx.totalTermFreq() != -1) {
            // we need to find out the minimum sumTTF to adjust the statistics
            // otherwise the statistics don't match
            minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field()));
        } else {
            minSumTTF = -1;
        }

    }
    if (minSumTTF != -1 && maxDoc > minSumTTF) {
        maxDoc = (int)minSumTTF;
    }

    if (max == 0) {
        return; // we are done that term doesn't exist at all
    }
    long sumTTF = minSumTTF == -1 ? -1 : 0;
    final int[] tieBreak = new int[contexts.length];
    for (int i = 0; i < tieBreak.length; ++i) {
        tieBreak[i] = i;
    }
    new InPlaceMergeSorter() {
        @Override
        protected void swap(int i, int j) {
            final int tmp = tieBreak[i];
            tieBreak[i] = tieBreak[j];
            tieBreak[j] = tmp;
        }
        @Override
        protected int compare(int i, int j) {
            return Ints.compare(contexts[tieBreak[j]].docFreq(), contexts[tieBreak[i]].docFreq());
        }
    }.sort(0, tieBreak.length);
    int prev = contexts[tieBreak[0]].docFreq();
    int actualDf = Math.min(maxDoc, max);
    assert actualDf >=0 : "DF must be >= 0";


    // here we try to add a little bias towards
    // the more popular (more frequent) fields
    // that acts as a tie breaker
    for (int i : tieBreak) {
        TermContext ctx = contexts[i];
        if (ctx.docFreq() == 0) {
            break;
        }
        final int current = ctx.docFreq();
        if (prev > current) {
            actualDf++;
        }
        contexts[i] = ctx = adjustDF(ctx, Math.min(maxDoc, actualDf));
        prev = current;
        if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) {
            sumTTF += ctx.totalTermFreq();
        } else {
            sumTTF = -1;  // omit once TF is omitted anywhere!
        }
    }
    sumTTF = Math.min(sumTTF, minSumTTF);
    for (int i = 0; i < contexts.length; i++) {
        int df = contexts[i].docFreq();
        if (df == 0) {
            continue;
        }
        // the blended sumTTF can't be greater than the sumTTTF on the field
        final long fixedTTF = sumTTF == -1 ? -1 : sumTTF;
        contexts[i] = adjustTTF(contexts[i], fixedTTF);
    }
}

Source File: MtasSpanRecurrenceQuery.java From mtas with Apache License 2.0

4 votes

@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}

org.apache.lucene.index.TermContext Java Examples