Java Code Examples for org.apache.lucene.index.Term#bytes()

The following examples show how to use org.apache.lucene.index.Term#bytes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PhraseHelper.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void collectLeaf(PostingsEnum postings, int position, Term term) throws IOException {
  if (!fieldMatcher.test(term.field())) {
    return;
  }

  SpanCollectedOffsetsEnum offsetsEnum = termToOffsetsEnums.get(term.bytes());
  if (offsetsEnum == null) {
    // If it's pos insensitive we handle it outside of PhraseHelper.  term.field() is from the Query.
    if (positionInsensitiveTerms.contains(term.bytes())) {
      return;
    }
    offsetsEnum = new SpanCollectedOffsetsEnum(term.bytes(), postings.freq());
    termToOffsetsEnums.put(term.bytes(), offsetsEnum);
  }
  offsetsEnum.add(postings.startOffset(), postings.endOffset());
}
 
Example 2
Source File: TestSpanSearchEquivalence.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** SpanNearQuery([A, B], 0, true) = "A B" */
public void testSpanNearVersusPhrase() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  SpanQuery subquery[] = new SpanQuery[] { 
                           spanQuery(new SpanTermQuery(t1)), 
                           spanQuery(new SpanTermQuery(t2)) 
                         };
  SpanQuery q1 = spanQuery(new SpanNearQuery(subquery, 0, true));
  PhraseQuery q2 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
  if (t1.equals(t2)) {
    assertSameSet(q1, q2);
  } else {
    assertSameScores(q1, q2);
  }
}
 
Example 3
Source File: TestRTGBase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected int getFirstMatch(IndexReader r, Term t) throws IOException {
  Terms terms = MultiTerms.getTerms(r, t.field());
  if (terms == null) return -1;
  BytesRef termBytes = t.bytes();
  final TermsEnum termsEnum = terms.iterator();
  if (!termsEnum.seekExact(termBytes)) {
    return -1;
  }
  PostingsEnum docs = termsEnum.postings(null, PostingsEnum.NONE);
  docs = BitsFilteredPostingsEnum.wrap(docs, MultiBits.getLiveDocs(r));
  int id = docs.nextDoc();
  if (id != DocIdSetIterator.NO_MORE_DOCS) {
    int next = docs.nextDoc();
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, next);
  }
  return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id;
}
 
Example 4
Source File: TestSloppyPhraseQuery2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A B"~N ⊆ "A B"~N+1 */
public void testIncreasingSloppiness() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  for (int i = 0; i < 10; i++) {
    PhraseQuery q1 = new PhraseQuery(i, t1.field(), t1.bytes(), t2.bytes());
    PhraseQuery q2 = new PhraseQuery(i + 1, t1.field(), t1.bytes(), t2.bytes());
    assertSubsetOf(q1, q2);
  }
}
 
Example 5
Source File: TestSloppyPhraseQuery2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A B C"~N ⊆ "A B C"~N+1 */
public void testIncreasingSloppiness3() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  Term t3 = randomTerm();
  for (int i = 0; i < 10; i++) {
    PhraseQuery q1 = new PhraseQuery(i, t1.field(), t1.bytes(), t2.bytes(), t3.bytes());
    PhraseQuery q2 = new PhraseQuery(i + 1, t1.field(), t1.bytes(), t2.bytes(), t3.bytes());
    assertSubsetOf(q1, q2);
    assertSubsetOf(q1, q2);
  }
}
 
Example 6
Source File: TestSloppyPhraseQuery2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A A"~N ⊆ "A A"~N+1 */
public void testRepetitiveIncreasingSloppiness() throws Exception {
  Term t = randomTerm();
  for (int i = 0; i < 10; i++) {
    PhraseQuery q1 = new PhraseQuery(i, t.field(), t.bytes(), t.bytes());
    PhraseQuery q2 = new PhraseQuery(i + 1, t.field(), t.bytes(), t.bytes());
    assertSubsetOf(q1, q2);
  }
}
 
Example 7
Source File: TestSloppyPhraseQuery2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A A A"~N ⊆ "A A A"~N+1 */
public void testRepetitiveIncreasingSloppiness3() throws Exception {
  Term t = randomTerm();
  for (int i = 0; i < 10; i++) {
    PhraseQuery q1 = new PhraseQuery(i, t.field(), t.bytes(), t.bytes(), t.bytes());
    PhraseQuery q2 = new PhraseQuery(i + 1, t.field(), t.bytes(), t.bytes(), t.bytes());
    assertSubsetOf(q1, q2);
    assertSubsetOf(q1, q2);
  }
}
 
Example 8
Source File: UnInvertedField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Called for each term in the field being uninverted.
 * Collects {@link #maxTermCounts} for all bigTerms as well as storing them in {@link #bigTerms}.
 * @param te positioned at the current term.
 * @param termNum the ID/pointer/ordinal of the current term. Monotonically increasing between calls.
 */
@Override
protected void visitTerm(TermsEnum te, int termNum) throws IOException {

  if (termNum >= maxTermCounts.length) {
    // resize by doubling - for very large number of unique terms, expanding
    // by 4K and resultant GC will dominate uninvert times.  Resize at end if material
    int[] newMaxTermCounts = new int[ Math.min(Integer.MAX_VALUE-16, maxTermCounts.length*2) ];
    System.arraycopy(maxTermCounts, 0, newMaxTermCounts, 0, termNum);
    maxTermCounts = newMaxTermCounts;
  }

  final BytesRef term = te.term();

  if (te.docFreq() > maxTermDocFreq) {
    Term t = new Term(field, term);  // this makes a deep copy of the term bytes
    TopTerm topTerm = new TopTerm();
    topTerm.term = t.bytes();
    topTerm.termNum = termNum;
    topTerm.termQuery = new TermQuery(t);

    bigTerms.put(topTerm.termNum, topTerm);

    if (deState == null) {
      deState = new SolrIndexSearcher.DocsEnumState();
      deState.fieldName = field;
      deState.liveDocs = searcher.getLiveDocsBits();
      deState.termsEnum = te;  // TODO: check for MultiTermsEnum in SolrIndexSearcher could now fail?
      deState.postingsEnum = postingsEnum;
      deState.minSetSizeCached = maxTermDocFreq;
    }

    postingsEnum = deState.postingsEnum;
    DocSet set = searcher.getDocSet(deState);
    maxTermCounts[termNum] = set.size();
  }
}
 
Example 9
Source File: TestSimpleSearchEquivalence.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A B" ⊆ (+A +B) */
public void testExactPhraseVersusBooleanAnd() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
  BooleanQuery.Builder q2 = new BooleanQuery.Builder();
  q2.add(new TermQuery(t1), Occur.MUST);
  q2.add(new TermQuery(t2), Occur.MUST);
  assertSubsetOf(q1, q2.build());
}
 
Example 10
Source File: TestSimpleSearchEquivalence.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A B" ⊆ "A B"~1 */
public void testPhraseVersusSloppyPhrase() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
  PhraseQuery q2 = new PhraseQuery(1, t1.field(), t1.bytes(), t2.bytes());
  assertSubsetOf(q1, q2);
}
 
Example 11
Source File: TestSimpleSearchEquivalence.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A B" ⊆ "A (B C)" */
public void testExactPhraseVersusMultiPhrase() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
  Term t3 = randomTerm();
  MultiPhraseQuery.Builder q2b = new MultiPhraseQuery.Builder();
  q2b.add(t1);
  q2b.add(new Term[] { t2, t3 });
  assertSubsetOf(q1, q2b.build());
}
 
Example 12
Source File: TestSimpleSearchEquivalence.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** "A B"~∞ = +A +B if A != B */
public void testSloppyPhraseVersusBooleanAnd() throws Exception {
  Term t1 = randomTerm();
  Term t2 = null;
  // semantics differ from SpanNear: SloppyPhrase handles repeats,
  // so we must ensure t1 != t2
  do {
    t2 = randomTerm();
  } while (t1.equals(t2));
  PhraseQuery q1 = new PhraseQuery(Integer.MAX_VALUE, t1.field(), t1.bytes(), t2.bytes());
  BooleanQuery.Builder q2 = new BooleanQuery.Builder();
  q2.add(new TermQuery(t1), Occur.MUST);
  q2.add(new TermQuery(t2), Occur.MUST);
  assertSameSet(q1, q2.build());
}
 
Example 13
Source File: TestSimpleSearchEquivalence.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Phrase positions are relative. */
public void testPhraseRelativePositions() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  PhraseQuery q1 = new PhraseQuery(t1.field(), t1.bytes(), t2.bytes());
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(t1, 10000);
  builder.add(t2, 10001);
  PhraseQuery q2 = builder.build();
  assertSameScores(q1, q2);
}
 
Example 14
Source File: TestSimpleSearchEquivalence.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Sloppy-phrase positions are relative. */
public void testSloppyPhraseRelativePositions() throws Exception {
  Term t1 = randomTerm();
  Term t2 = randomTerm();
  PhraseQuery q1 = new PhraseQuery(2, t1.field(), t1.bytes(), t2.bytes());
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(t1, 10000);
  builder.add(t2, 10001);
  builder.setSlop(2);
  PhraseQuery q2 = builder.build();
  assertSameScores(q1, q2);
}
 
Example 15
Source File: VertexiumMapperQueryParser.java    From vertexium with Apache License 2.0 4 votes vote down vote up
private Query createQuery(Term term, Function<Term, Query> fn) {
    String field = term.field();
    BytesRef value = term.bytes();
    return createQuery(field, (fieldName) -> fn.apply(new Term(fieldName, value)));
}
 
Example 16
Source File: TestPrefixRandom.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
DumbPrefixQuery(Term term) {
  super(term.field());
  prefix = term.bytes();
}
 
Example 17
Source File: TestLRUQueryCache.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private static Query buildRandomQuery(int level) {
  if (level == 10) {
    // at most 10 levels
    return new MatchAllDocsQuery();
  }
  switch (random().nextInt(6)) {
    case 0:
      return new TermQuery(randomTerm());
    case 1:
      BooleanQuery.Builder bq = new BooleanQuery.Builder();
      final int numClauses = TestUtil.nextInt(random(), 1, 3);
      int numShould = 0;
      for (int i = 0; i < numClauses; ++i) {
        final Occur occur = RandomPicks.randomFrom(random(), Occur.values());
        bq.add(buildRandomQuery(level + 1), occur);
        if (occur == Occur.SHOULD) {
          numShould++;
        }
      }
      bq.setMinimumNumberShouldMatch(TestUtil.nextInt(random(), 0, numShould));
      return bq.build();
    case 2:
      Term t1 = randomTerm();
      Term t2 = randomTerm();
      PhraseQuery pq = new PhraseQuery(random().nextInt(2), t1.field(), t1.bytes(), t2.bytes());
      return pq;
    case 3:
      return new MatchAllDocsQuery();
    case 4:
      return new ConstantScoreQuery(buildRandomQuery(level + 1));
    case 5:
      List<Query> disjuncts = new ArrayList<>();
      final int numQueries = TestUtil.nextInt(random(), 1, 3);
      for (int i = 0; i < numQueries; ++i) {
        disjuncts.add(buildRandomQuery(level + 1));
      }
      return new DisjunctionMaxQuery(disjuncts, random().nextFloat());
    default:
      throw new AssertionError();
  }
}
 
Example 18
Source File: IndexSearcher.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Returns {@link TermStatistics} for a term.
 * 
 * This can be overridden for example, to return a term's statistics
 * across a distributed collection.
 *
 * @param docFreq The document frequency of the term. It must be greater or equal to 1.
 * @param totalTermFreq The total term frequency.
 * @return A {@link TermStatistics} (never null).
 *
 * @lucene.experimental
 */
public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq) throws IOException {
  // This constructor will throw an exception if docFreq <= 0.
  return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
}