org.apache.lucene.index.TermContext Java Examples

The following examples show how to use org.apache.lucene.index.TermContext. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: BlendedTermQuery.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public Query rewrite(IndexReader reader) throws IOException {
    IndexReaderContext context = reader.getContext();
    TermContext[] ctx = new TermContext[terms.length];
    int[] docFreqs = new int[ctx.length];
    for (int i = 0; i < terms.length; i++) {
        ctx[i] = TermContext.build(context, terms[i]);
        docFreqs[i] = ctx[i].docFreq();
    }

    final int maxDoc = reader.maxDoc();
    blend(ctx, maxDoc, reader);
    Query query = topLevelQuery(terms, ctx, docFreqs, maxDoc);
    query.setBoost(getBoost());
    return query;
}
 
Example #2
Source File: BlendedTermQuery.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private TermContext adjustTTF(TermContext termContext, long sumTTF) {
    if (sumTTF == -1 && termContext.totalTermFreq() == -1) {
        return termContext;
    }
    TermContext newTermContext = new TermContext(termContext.topReaderContext);
    List<LeafReaderContext> leaves = termContext.topReaderContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    int df = termContext.docFreq();
    long ttf = sumTTF;
    for (int i = 0; i < len; i++) {
        TermState termState = termContext.get(i);
        if (termState == null) {
            continue;
        }
        newTermContext.register(termState, i, df, ttf);
        df = 0;
        ttf = 0;
    }
    return newTermContext;
}
 
Example #3
Source File: MtasSpanPrecededByQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanPrecededByQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanPrecededByQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}
 
Example #4
Source File: MtasSpanNotQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanNotQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanNotQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}
 
Example #5
Source File: MtasSpanFullyAlignedWithQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanFullyAlignedWithQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanFullyAlignedWithQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}
 
Example #6
Source File: MtasSpanIntersectingQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanIntersectingQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanIntersectingQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}
 
Example #7
Source File: MtasSpanMatchAllQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  Term term = new Term(field);
  if (!contexts.containsKey(term)) {
    IndexReaderContext topContext = searcher.getTopReaderContext();
    try {
      contexts.put(term, TermContext.build(topContext, term));
    } catch (IOException e) {
      log.debug(e);
      // fail
    }
  }
}
 
Example #8
Source File: MtasSpanSequenceQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  for (MtasSpanSequenceQueryWeight w : subWeights) {
    w.spanWeight.extractTermContexts(contexts);
  }
  if (ignoreWeight != null) {
    ignoreWeight.extractTermContexts(contexts);
  }
}
 
Example #9
Source File: MtasExtendedSpanTermQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
@Override
public SpanWeight createWeight(IndexSearcher searcher, boolean needsScores, float boost)
    throws IOException {
  final TermContext context;
  final IndexReaderContext topContext = searcher.getTopReaderContext();
  if (termContext == null) {
    context = TermContext.build(topContext, localTerm);
  } else {
    context = termContext;
  }
  return new SpanTermWeight(context, searcher,
      needsScores ? Collections.singletonMap(localTerm, context) : null, boost);
}
 
Example #10
Source File: MtasSpanSequenceQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanSequenceQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanSequenceQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}
 
Example #11
Source File: MtasSpanFollowedByQuery.java    From mtas with Apache License 2.0 5 votes vote down vote up
/**
 * Gets the term contexts.
 *
 * @param items the items
 * @return the term contexts
 */
protected Map<Term, TermContext> getTermContexts(
    List<MtasSpanFollowedByQueryWeight> items) {
  List<SpanWeight> weights = new ArrayList<>();
  for (MtasSpanFollowedByQueryWeight item : items) {
    weights.add(item.spanWeight);
  }
  return getTermContexts(weights);
}
 
Example #12
Source File: CustomSpanWeight.java    From pyramid with Apache License 2.0 5 votes vote down vote up
private Similarity.SimWeight buildSimWeight(CustomSpanQuery query, IndexSearcher searcher, Map<Term, TermContext> termContexts) throws IOException {
  if (termContexts == null || termContexts.size() == 0 || query.getField() == null)
    return null;
  TermStatistics[] termStats = new TermStatistics[termContexts.size()];
  int i = 0;
  for (Term term : termContexts.keySet()) {
    termStats[i] = searcher.termStatistics(term, termContexts.get(term));
    i++;
  }
  CollectionStatistics collectionStats = searcher.collectionStatistics(query.getField());
  return similarity.computeWeight(collectionStats, termStats);
}
 
Example #13
Source File: CustomSpanQuery.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * Build a map of terms to termcontexts, for use in constructing SpanWeights
 * @lucene.internal
 */
public static Map<Term, TermContext> getTermContexts(CustomSpanWeight... weights) {
  Map<Term, TermContext> terms = new TreeMap<>();
  for (CustomSpanWeight w : weights) {
    w.extractTermContexts(terms);
  }
  return terms;
}
 
Example #14
Source File: CustomSpanQuery.java    From pyramid with Apache License 2.0 5 votes vote down vote up
/**
 * Build a map of terms to termcontexts, for use in constructing SpanWeights
 * @lucene.internal
 */
public static Map<Term, TermContext> getTermContexts(Collection<CustomSpanWeight> weights) {
  Map<Term, TermContext> terms = new TreeMap<>();
  for (CustomSpanWeight w : weights) {
    w.extractTermContexts(terms);
  }
  return terms;
}
 
Example #15
Source File: AbstractAuthorityQueryWeight.java    From SearchServices with GNU Lesser General Public License v3.0 5 votes vote down vote up
public AbstractAuthorityQueryWeight(SolrIndexSearcher searcher, boolean needsScores, Query query, String authTermName, String authTermText) throws IOException
{
	super(query);
    this.searcher = searcher;
    searcher.collectionStatistics(authTermName);
    final IndexReaderContext context = searcher.getTopReaderContext();
    final Term term = new Term(authTermName, authTermText);
    final TermContext termContext = TermContext.build(context, term);
    searcher.termStatistics(term, termContext);
    this.needsScores = needsScores;
}
 
Example #16
Source File: ExtendedCommonTermsQuery.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
@Override
protected Query newTermQuery(Term term, TermContext context) {
    if (fieldType == null) {
        return super.newTermQuery(term, context);
    }
    final Query query = fieldType.queryStringTermQuery(term);
    if (query == null) {
        return super.newTermQuery(term, context);
    } else {
        return query;
    }
}
 
Example #17
Source File: BlendedTermQuery.java    From Elasticsearch with Apache License 2.0 5 votes vote down vote up
private static TermContext adjustDF(TermContext ctx, int newDocFreq) {
    // Use a value of ttf that is consistent with the doc freq (ie. gte)
    long newTTF;
    if (ctx.totalTermFreq() < 0) {
        newTTF = -1;
    } else {
        newTTF = Math.max(ctx.totalTermFreq(), newDocFreq);
    }
    List<LeafReaderContext> leaves = ctx.topReaderContext.leaves();
    final int len;
    if (leaves == null) {
        len = 1;
    } else {
        len = leaves.size();
    }
    TermContext newCtx = new TermContext(ctx.topReaderContext);
    for (int i = 0; i < len; ++i) {
        TermState termState = ctx.get(i);
        if (termState == null) {
            continue;
        }
        newCtx.register(termState, i, newDocFreq, newTTF);
        newDocFreq = 0;
        newTTF = 0;
    }
    return newCtx;
}
 
Example #18
Source File: MtasSpanStartQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  spanWeight.extractTermContexts(contexts);
}
 
Example #19
Source File: MtasDisabledTwoPhaseIteratorSpanQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}
 
Example #20
Source File: MtasExtendedSpanTermQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  contexts.put(localTerm, termContext);
}
 
Example #21
Source File: MtasSpanUniquePositionQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}
 
Example #22
Source File: MtasMaximumExpandSpanQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}
 
Example #23
Source File: MtasExpandSpanQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}
 
Example #24
Source File: MtasSpanFullyAlignedWithQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  w1.spanWeight.extractTermContexts(contexts);
  w2.spanWeight.extractTermContexts(contexts);
}
 
Example #25
Source File: MtasSpanPrecededByQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  w1.spanWeight.extractTermContexts(contexts);
  w2.spanWeight.extractTermContexts(contexts);
}
 
Example #26
Source File: CustomSpanPayloadCheckQuery.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
public SpanPayloadCheckWeight(IndexSearcher searcher, Map<Term, TermContext> termContexts,
                              SpanWeight matchWeight, float boost) throws IOException {
    super(CustomSpanPayloadCheckQuery.this, searcher, termContexts, boost);
    this.matchWeight = matchWeight;
}
 
Example #27
Source File: CustomSpanPayloadCheckQuery.java    From elasticsearch-plugin-bundle with GNU Affero General Public License v3.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
    matchWeight.extractTermContexts(contexts);
}
 
Example #28
Source File: MtasSpanNotQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  w1.spanWeight.extractTermContexts(contexts);
  w2.spanWeight.extractTermContexts(contexts);
}
 
Example #29
Source File: BlendedTermQuery.java    From Elasticsearch with Apache License 2.0 4 votes vote down vote up
protected void blend(final TermContext[] contexts, int maxDoc, IndexReader reader) throws IOException {
    if (contexts.length <= 1) {
        return;
    }
    int max = 0;
    long minSumTTF = Long.MAX_VALUE;
    for (int i = 0; i < contexts.length; i++) {
        TermContext ctx = contexts[i];
        int df = ctx.docFreq();
        // we use the max here since it's the only "true" estimation we can make here
        // at least max(df) documents have that term. Sum or Averages don't seem
        // to have a significant meaning here.
        // TODO: Maybe it could also make sense to assume independent distributions of documents and eg. have:
        //   df = df1 + df2 - (df1 * df2 / maxDoc)?
        max = Math.max(df, max);
        if (minSumTTF != -1 && ctx.totalTermFreq() != -1) {
            // we need to find out the minimum sumTTF to adjust the statistics
            // otherwise the statistics don't match
            minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field()));
        } else {
            minSumTTF = -1;
        }

    }
    if (minSumTTF != -1 && maxDoc > minSumTTF) {
        maxDoc = (int)minSumTTF;
    }

    if (max == 0) {
        return; // we are done that term doesn't exist at all
    }
    long sumTTF = minSumTTF == -1 ? -1 : 0;
    final int[] tieBreak = new int[contexts.length];
    for (int i = 0; i < tieBreak.length; ++i) {
        tieBreak[i] = i;
    }
    new InPlaceMergeSorter() {
        @Override
        protected void swap(int i, int j) {
            final int tmp = tieBreak[i];
            tieBreak[i] = tieBreak[j];
            tieBreak[j] = tmp;
        }
        @Override
        protected int compare(int i, int j) {
            return Ints.compare(contexts[tieBreak[j]].docFreq(), contexts[tieBreak[i]].docFreq());
        }
    }.sort(0, tieBreak.length);
    int prev = contexts[tieBreak[0]].docFreq();
    int actualDf = Math.min(maxDoc, max);
    assert actualDf >=0 : "DF must be >= 0";


    // here we try to add a little bias towards
    // the more popular (more frequent) fields
    // that acts as a tie breaker
    for (int i : tieBreak) {
        TermContext ctx = contexts[i];
        if (ctx.docFreq() == 0) {
            break;
        }
        final int current = ctx.docFreq();
        if (prev > current) {
            actualDf++;
        }
        contexts[i] = ctx = adjustDF(ctx, Math.min(maxDoc, actualDf));
        prev = current;
        if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) {
            sumTTF += ctx.totalTermFreq();
        } else {
            sumTTF = -1;  // omit once TF is omitted anywhere!
        }
    }
    sumTTF = Math.min(sumTTF, minSumTTF);
    for (int i = 0; i < contexts.length; i++) {
        int df = contexts[i].docFreq();
        if (df == 0) {
            continue;
        }
        // the blended sumTTF can't be greater than the sumTTTF on the field
        final long fixedTTF = sumTTF == -1 ? -1 : sumTTF;
        contexts[i] = adjustTTF(contexts[i], fixedTTF);
    }
}
 
Example #30
Source File: MtasSpanRecurrenceQuery.java    From mtas with Apache License 2.0 4 votes vote down vote up
@Override
public void extractTermContexts(Map<Term, TermContext> contexts) {
  subWeight.extractTermContexts(contexts);
}