org.apache.lucene.index.TermsEnum.SeekStatus Java Examples

The following examples show how to use org.apache.lucene.index.TermsEnum.SeekStatus. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestCompressingTermVectorsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testNoOrds() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  doc.add(new Field("foo", "this is a test", ft));
  iw.addDocument(doc);
  LeafReader ir = getOnlyLeafReader(iw.getReader());
  Terms terms = ir.getTermVector(0, "foo");
  assertNotNull(terms);
  TermsEnum termsEnum = terms.iterator();
  assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("this")));

  expectThrows(UnsupportedOperationException.class, termsEnum::ord);
  expectThrows(UnsupportedOperationException.class, () -> termsEnum.seekExact(0));

  ir.close();
  iw.close();
  dir.close();
}
 
Example #2
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public SeekStatus seekCeil(BytesRef text) throws IOException {
  final long block = seekBlock(text);
  if (block == -1) {
    // before the first term
    seekExact(0L);
    return SeekStatus.NOT_FOUND;
  }
  final long blockAddress = blockAddresses.get(block);
  this.ord = block << entry.termsDictBlockShift;
  bytes.seek(blockAddress);
  term.length = bytes.readVInt();
  bytes.readBytes(term.bytes, 0, term.length);
  while (true) {
    int cmp = term.compareTo(text);
    if (cmp == 0) {
      return SeekStatus.FOUND;
    } else if (cmp > 0) {
      return SeekStatus.NOT_FOUND;
    }
    if (next() == null) {
      return SeekStatus.END;
    }
  }
}
 
Example #3
Source File: TestTermsEnum2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** seeks to every term accepted by some automata */
public void testSeeking() throws Exception {
  for (int i = 0; i < numIterations; i++) {
    String reg = AutomatonTestUtil.randomRegexp(random());
    Automaton automaton = Operations.determinize(new RegExp(reg, RegExp.NONE).toAutomaton(),
      DEFAULT_MAX_DETERMINIZED_STATES);
    TermsEnum te = MultiTerms.getTerms(reader, "field").iterator();
    ArrayList<BytesRef> unsortedTerms = new ArrayList<>(terms);
    Collections.shuffle(unsortedTerms, random());

    for (BytesRef term : unsortedTerms) {
      if (Operations.run(automaton, term.utf8ToString())) {
        // term is accepted
        if (random().nextBoolean()) {
          // seek exact
          assertTrue(te.seekExact(term));
        } else {
          // seek ceil
          assertEquals(SeekStatus.FOUND, te.seekCeil(term));
          assertEquals(term, te.term());
        }
      }
    }
  }
}
 
Example #4
Source File: TestTermsEnum2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** mixes up seek and next for all terms */
public void testSeekingAndNexting() throws Exception {
  for (int i = 0; i < numIterations; i++) {
    TermsEnum te = MultiTerms.getTerms(reader, "field").iterator();

    for (BytesRef term : terms) {
      int c = random().nextInt(3);
      if (c == 0) {
        assertEquals(term, te.next());
      } else if (c == 1) {
        assertEquals(SeekStatus.FOUND, te.seekCeil(term));
        assertEquals(term, te.term());
      } else {
        assertTrue(te.seekExact(term));
      }
    }
  }
}
 
Example #5
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public int lookupTerm(BytesRef key) throws IOException {
  SeekStatus status = termsEnum.seekCeil(key);
  switch (status) {
    case FOUND:
      return Math.toIntExact(termsEnum.ord());
    default:
      return Math.toIntExact(-1L - termsEnum.ord());
  }
}
 
Example #6
Source File: Lucene80DocValuesProducer.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public long lookupTerm(BytesRef key) throws IOException {
  SeekStatus status = termsEnum.seekCeil(key);
  switch (status) {
    case FOUND:
      return termsEnum.ord();
    default:
      return -1L - termsEnum.ord();
  }
}
 
Example #7
Source File: SecureAtomicReaderTestBase.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private int getTermWithSeekCount(Fields fields, String field) throws IOException {
  Terms terms = fields.terms(field);
  TermsEnum termsEnum = terms.iterator(null);
  SeekStatus seekStatus = termsEnum.seekCeil(new BytesRef(""));
  if (seekStatus == SeekStatus.END) {
    return 0;
  }
  System.out.println(termsEnum.term().utf8ToString());
  int count = 1;
  while (termsEnum.next() != null) {
    count++;
  }
  return count;
}
 
Example #8
Source File: OrdsSegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
  return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
}
 
Example #9
Source File: IDVersionSegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
  return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
}
 
Example #10
Source File: BaseDocValuesFormatTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSortedSetTermsEnum() throws IOException {
  Directory directory = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
  iwconfig.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);
  
  Document doc = new Document();
  doc.add(new SortedSetDocValuesField("field", new BytesRef("hello")));
  doc.add(new SortedSetDocValuesField("field", new BytesRef("world")));
  doc.add(new SortedSetDocValuesField("field", new BytesRef("beer")));
  iwriter.addDocument(doc);
  
  DirectoryReader ireader = iwriter.getReader();
  iwriter.close();

  SortedSetDocValues dv = getOnlyLeafReader(ireader).getSortedSetDocValues("field");
  assertEquals(3, dv.getValueCount());
  
  TermsEnum termsEnum = dv.termsEnum();
  
  // next()
  assertEquals("beer", termsEnum.next().utf8ToString());
  assertEquals(0, termsEnum.ord());
  assertEquals("hello", termsEnum.next().utf8ToString());
  assertEquals(1, termsEnum.ord());
  assertEquals("world", termsEnum.next().utf8ToString());
  assertEquals(2, termsEnum.ord());
  
  // seekCeil()
  assertEquals(SeekStatus.NOT_FOUND, termsEnum.seekCeil(new BytesRef("ha!")));
  assertEquals("hello", termsEnum.term().utf8ToString());
  assertEquals(1, termsEnum.ord());
  assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef("beer")));
  assertEquals("beer", termsEnum.term().utf8ToString());
  assertEquals(0, termsEnum.ord());
  assertEquals(SeekStatus.END, termsEnum.seekCeil(new BytesRef("zzz")));
  
  // seekExact()
  assertTrue(termsEnum.seekExact(new BytesRef("beer")));
  assertEquals("beer", termsEnum.term().utf8ToString());
  assertEquals(0, termsEnum.ord());
  assertTrue(termsEnum.seekExact(new BytesRef("hello")));
  assertEquals("hello", termsEnum.term().utf8ToString());
  assertEquals(1, termsEnum.ord());
  assertTrue(termsEnum.seekExact(new BytesRef("world")));
  assertEquals("world", termsEnum.term().utf8ToString());
  assertEquals(2, termsEnum.ord());
  assertFalse(termsEnum.seekExact(new BytesRef("bogus")));

  // seek(ord)
  termsEnum.seekExact(0);
  assertEquals("beer", termsEnum.term().utf8ToString());
  assertEquals(0, termsEnum.ord());
  termsEnum.seekExact(1);
  assertEquals("hello", termsEnum.term().utf8ToString());
  assertEquals(1, termsEnum.ord());
  termsEnum.seekExact(2);
  assertEquals("world", termsEnum.term().utf8ToString());
  assertEquals(2, termsEnum.ord());

  // NORMAL automaton
  termsEnum = dv.intersect(new CompiledAutomaton(new RegExp(".*l.*").toAutomaton()));
  assertEquals("hello", termsEnum.next().utf8ToString());
  assertEquals(1, termsEnum.ord());
  assertEquals("world", termsEnum.next().utf8ToString());
  assertEquals(2, termsEnum.ord());
  assertNull(termsEnum.next());

  // SINGLE automaton
  termsEnum = dv.intersect(new CompiledAutomaton(new RegExp("hello").toAutomaton()));
  assertEquals("hello", termsEnum.next().utf8ToString());
  assertEquals(1, termsEnum.ord());
  assertNull(termsEnum.next());

  ireader.close();
  directory.close();
}
 
Example #11
Source File: LuceneTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void assertTermsSeekingEquals(String info, Terms leftTerms, Terms rightTerms) throws IOException {

    // just an upper bound
    int numTests = atLeast(20);
    Random random = random();

    TermsEnum leftEnum = null;

    // collect this number of terms from the left side
    HashSet<BytesRef> tests = new HashSet<>();
    int numPasses = 0;
    while (numPasses < 10 && tests.size() < numTests) {
      leftEnum = leftTerms.iterator();
      BytesRef term = null;
      while ((term = leftEnum.next()) != null) {
        int code = random.nextInt(10);
        if (code == 0) {
          // the term
          tests.add(BytesRef.deepCopyOf(term));
        } else if (code == 1) {
          // truncated subsequence of term
          term = BytesRef.deepCopyOf(term);
          if (term.length > 0) {
            // truncate it
            term.length = random.nextInt(term.length);
          }
        } else if (code == 2) {
          // term, but ensure a non-zero offset
          byte newbytes[] = new byte[term.length+5];
          System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
          tests.add(new BytesRef(newbytes, 5, term.length));
        } else if (code == 3) {
          switch (random().nextInt(3)) {
            case 0:
              tests.add(new BytesRef()); // before the first term
              break;
            case 1:
              tests.add(new BytesRef(new byte[] {(byte) 0xFF, (byte) 0xFF})); // past the last term
              break;
            case 2:
              tests.add(new BytesRef(TestUtil.randomSimpleString(random()))); // random term
              break;
            default:
              throw new AssertionError();
          }
        }
      }
      numPasses++;
    }

    TermsEnum rightEnum = rightTerms.iterator();

    ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
    Collections.shuffle(shuffledTests, random);

    for (BytesRef b : shuffledTests) {
      if (rarely()) {
        // make new enums
        leftEnum = leftTerms.iterator();
        rightEnum = rightTerms.iterator();
      }

      final boolean seekExact = random().nextBoolean();

      if (seekExact) {
        assertEquals(info, leftEnum.seekExact(b), rightEnum.seekExact(b));
      } else {
        SeekStatus leftStatus = leftEnum.seekCeil(b);
        SeekStatus rightStatus = rightEnum.seekCeil(b);
        assertEquals(info, leftStatus, rightStatus);
        if (leftStatus != SeekStatus.END) {
          assertEquals(info, leftEnum.term(), rightEnum.term());
          assertTermStatsEquals(info, leftEnum, rightEnum);
        }
      }
    }
  }
 
Example #12
Source File: SegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SeekStatus scanToTerm(BytesRef target, boolean exactOnly) throws IOException {
  return isLeafBlock ? scanToTermLeaf(target, exactOnly) : scanToTermNonLeaf(target, exactOnly);
}
 
Example #13
Source File: SegmentTermsEnumFrame.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {

    // if (DEBUG) System.out.println("    scanToTermLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));

    assert nextEnt != -1;

    ste.termExists = true;
    subCode = 0;

    if (nextEnt == entCount) {
      if (exactOnly) {
        fillTerm();
      }
      return SeekStatus.END;
    }

    assert prefixMatches(target);

    // TODO: binary search when all terms have the same length, which is common for ID fields,
    // which are also the most sensitive to lookup performance?
    // Loop over each entry (term or sub-block) in this block:
    do {
      nextEnt++;

      suffix = suffixLengthsReader.readVInt();

      // if (DEBUG) {
      //   BytesRef suffixBytesRef = new BytesRef();
      //   suffixBytesRef.bytes = suffixBytes;
      //   suffixBytesRef.offset = suffixesReader.getPosition();
      //   suffixBytesRef.length = suffix;
      //   System.out.println("      cycle: term " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
      // }

      startBytePos = suffixesReader.getPosition();
      suffixesReader.skipBytes(suffix);

      // Loop over bytes in the suffix, comparing to the target
      final int cmp = Arrays.compareUnsigned(
          suffixBytes, startBytePos, startBytePos + suffix,
          target.bytes, target.offset + prefix, target.offset + target.length);

      if (cmp < 0) {
        // Current entry is still before the target;
        // keep scanning
      } else if (cmp > 0) {
        // Done!  Current entry is after target --
        // return NOT_FOUND:
        fillTerm();

        //if (DEBUG) System.out.println("        not found");
        return SeekStatus.NOT_FOUND;
      } else {
        // Exact match!

        // This cannot be a sub-block because we
        // would have followed the index to this
        // sub-block from the start:

        assert ste.termExists;
        fillTerm();
        //if (DEBUG) System.out.println("        found!");
        return SeekStatus.FOUND;
      }
    } while (nextEnt < entCount);

    // It is possible (and OK) that terms index pointed us
    // at this block, but, we scanned the entire block and
    // did not find the term to position to.  This happens
    // when the target is after the last term in the block
    // (but, before the next term in the index).  EG
    // target could be foozzz, and terms index pointed us
    // to the foo* block, but the last term in this block
    // was fooz (and, eg, first term in the next block will
    // bee fop).
    //if (DEBUG) System.out.println("      block end");
    if (exactOnly) {
      fillTerm();
    }

    // TODO: not consistent that in the
    // not-exact case we don't next() into the next
    // frame here
    return SeekStatus.END;
  }
 
Example #14
Source File: TestBlockPostingsFormat3.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void assertTermsSeeking(Terms leftTerms, Terms rightTerms) throws Exception {
  TermsEnum leftEnum = null;
  TermsEnum rightEnum = null;
  
  // just an upper bound
  int numTests = atLeast(20);
  Random random = random();
  
  // collect this number of terms from the left side
  HashSet<BytesRef> tests = new HashSet<>();
  int numPasses = 0;
  while (numPasses < 10 && tests.size() < numTests) {
    leftEnum = leftTerms.iterator();
    BytesRef term = null;
    while ((term = leftEnum.next()) != null) {
      int code = random.nextInt(10);
      if (code == 0) {
        // the term
        tests.add(BytesRef.deepCopyOf(term));
      } else if (code == 1) {
        // truncated subsequence of term
        term = BytesRef.deepCopyOf(term);
        if (term.length > 0) {
          // truncate it
          term.length = random.nextInt(term.length);
        }
      } else if (code == 2) {
        // term, but ensure a non-zero offset
        byte newbytes[] = new byte[term.length+5];
        System.arraycopy(term.bytes, term.offset, newbytes, 5, term.length);
        tests.add(new BytesRef(newbytes, 5, term.length));
      }
    }
    numPasses++;
  }
  
  ArrayList<BytesRef> shuffledTests = new ArrayList<>(tests);
  Collections.shuffle(shuffledTests, random);
  
  for (BytesRef b : shuffledTests) {
    leftEnum = leftTerms.iterator();
    rightEnum = rightTerms.iterator();
    
    assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
    assertEquals(leftEnum.seekExact(b), rightEnum.seekExact(b));
    
    SeekStatus leftStatus;
    SeekStatus rightStatus;
    
    leftStatus = leftEnum.seekCeil(b);
    rightStatus = rightEnum.seekCeil(b);
    assertEquals(leftStatus, rightStatus);
    if (leftStatus != SeekStatus.END) {
      assertEquals(leftEnum.term(), rightEnum.term());
    }
    
    leftStatus = leftEnum.seekCeil(b);
    rightStatus = rightEnum.seekCeil(b);
    assertEquals(leftStatus, rightStatus);
    if (leftStatus != SeekStatus.END) {
      assertEquals(leftEnum.term(), rightEnum.term());
    }
  }
}