Java Code Examples for org.apache.lucene.util.BytesRef#compareTo()

The following examples show how to use org.apache.lucene.util.BytesRef#compareTo() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TopLevelJoinQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}
 
Example 2
Source File: SortedSetDocValues.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** If {@code key} exists, returns its ordinal, else
 *  returns {@code -insertionPoint-1}, like {@code
 *  Arrays.binarySearch}.
 *
 *  @param key Key to look up
 **/
public long lookupTerm(BytesRef key) throws IOException {
  long low = 0;
  long high = getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}
 
Example 3
Source File: FuzzyTermsEnum.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * fired when the max non-competitive boost has changed. this is the hook to
 * swap in a smarter actualEnum.
 */
private void bottomChanged(BytesRef lastTerm) throws IOException {
  int oldMaxEdits = maxEdits;
  
  // true if the last term encountered is lexicographically equal or after the bottom term in the PQ
  boolean termAfter = bottomTerm == null || (lastTerm != null && lastTerm.compareTo(bottomTerm) >= 0);

  // as long as the max non-competitive boost is >= the max boost
  // for some edit distance, keep dropping the max edit distance.
  while (maxEdits > 0) {
    float maxBoost = 1.0f - ((float) maxEdits / (float) termLength);
    if (bottom < maxBoost || (bottom == maxBoost && termAfter == false)) {
      break;
    }
    maxEdits--;
  }

  if (oldMaxEdits != maxEdits || lastTerm == null) {
    // This is a very powerful optimization: the maximum edit distance has changed.  This happens because we collect only the top scoring
    // N (= 50, by default) terms, and if e.g. maxEdits=2, and the queue is now full of matching terms, and we notice that the worst entry
    // in that queue is ed=1, then we can switch the automata here to ed=1 which is a big speedup.
    actualEnum = getAutomatonEnum(maxEdits, lastTerm);
  }
}
 
Example 4
Source File: TermsQParserPlugin.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd) throws IOException {
  long low = startOrd;
  long high = docValues.getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = docValues.lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}
 
Example 5
Source File: BlockReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Compares the searched term to the middle term of the block.
 * If the searched term is lexicographically equal or after the middle term
 * then jumps to the second half of the block directly.
 *
 * @return The comparison between the searched term and the middle term.
 */
protected int compareToMiddleAndJump(BytesRef searchedTerm) throws IOException {
  if (lineIndexInBlock != 0) {
    // Don't try to compare and jump if we are not positioned at the first line.
    // This can happen if we seek in the same current block and we continue
    // scanning from the current line (see initializeHeader()).
    return -1;
  }
  blockReadBuffer.skipBytes(blockHeader.getMiddleLineOffset());
  lineIndexInBlock = blockHeader.getMiddleLineIndex();
  readLineInBlock();
  if (blockLine == null) {
    throw newCorruptIndexException("Illegal absence of line at the middle of the block", null);
  }
  int compare = searchedTerm.compareTo(term());
  if (compare < 0) {
    blockReadBuffer.setPosition(blockFirstLineStart);
    lineIndexInBlock = 0;
  }
  return compare;
}
 
Example 6
Source File: LegacySortedSetDocValues.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** If {@code key} exists, returns its ordinal, else
 *  returns {@code -insertionPoint-1}, like {@code
 *  Arrays.binarySearch}.
 *
 *  @param key Key to look up
 **/
public long lookupTerm(BytesRef key) {
  long low = 0;
  long high = getValueCount()-1;

  while (low <= high) {
    long mid = (low + high) >>> 1;
    final BytesRef term = lookupOrd(mid);
    int cmp = term.compareTo(key);

    if (cmp < 0) {
      low = mid + 1;
    } else if (cmp > 0) {
      high = mid - 1;
    } else {
      return mid; // key found
    }
  }

  return -(low + 1);  // key not found.
}
 
Example 7
Source File: TestMultiTermsEnum.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public TermsEnum iterator() throws IOException {
  return new FilteredTermsEnum(delegate.iterator()) {

    @Override
    protected AcceptStatus accept(BytesRef term) {

      int comparison = term.compareTo(value);
      if (comparison < 0) {
        // I don't think it will actually get here because they are supposed to call nextSeekTerm
        // to get the initial term to seek to.
        return AcceptStatus.NO_AND_SEEK;
      } else if (comparison > 0) {
        return AcceptStatus.END;
      } else { // comparison == 0
        return AcceptStatus.YES;
      }
    }

    @Override
    protected BytesRef nextSeekTerm(BytesRef currentTerm) {
      if (currentTerm == null || currentTerm.compareTo(value) < 0) {
        return value;
      }

      return null;
    }
  };
}
 
Example 8
Source File: TestSTBlockReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected int compareToMiddleAndJump(BytesRef searchedTerm) {
  blockLine = lines.get(lines.size() >> 1);
  lineIndexInBlock = blockHeader.getMiddleLineIndex();
  int compare = searchedTerm.compareTo(term());
  if (compare < 0) {
    lineIndexInBlock = 0;
  }
  return compare;
}
 
Example 9
Source File: DocValuesStats.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected void doAccumulate(int count) throws IOException {
  BytesRef val = sdv.binaryValue();
  if (max == null || val.compareTo(max) > 0) {
    max = copyFrom(val, max);
  }
  if (min == null || val.compareTo(min) < 0) {
    min = copyFrom(val, min);
  }
}
 
Example 10
Source File: TestAutomaton.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testMakeBinaryIntervalRandom() throws Exception {
  int iters = atLeast(100);
  for(int iter=0;iter<iters;iter++) {
    BytesRef minTerm = TestUtil.randomBinaryTerm(random());
    boolean minInclusive = random().nextBoolean();
    BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
    boolean maxInclusive = random().nextBoolean();

    Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);

    for(int iter2=0;iter2<500;iter2++) {
      BytesRef term = TestUtil.randomBinaryTerm(random());
      int minCmp = minTerm.compareTo(term);
      int maxCmp = maxTerm.compareTo(term);

      boolean expected;
      if (minCmp > 0 || maxCmp < 0) {
        expected = false;
      } else if (minCmp == 0 && maxCmp == 0) {
        expected = minInclusive && maxInclusive;
      } else if (minCmp == 0) {
        expected = minInclusive;
      } else if (maxCmp == 0) {
        expected = maxInclusive;
      } else {
        expected = true;
      }

      if (VERBOSE) {
        System.out.println("  check term=" + term + " expected=" + expected);
      }
      IntsRefBuilder intsBuilder = new IntsRefBuilder();
      Util.toIntsRef(term, intsBuilder);
      assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
    }
  }
}
 
Example 11
Source File: SeekingTermSetTermsEnum.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected AcceptStatus accept(BytesRef term) throws IOException {
  if (term.compareTo(lastTerm) > 0) {
    return AcceptStatus.END;
  }

  BytesRef currentTerm = terms.get(ords[upto], spare);
  if (term.compareTo(currentTerm) == 0) {
    if (upto == lastElement) {
      return AcceptStatus.YES;
    } else {
      seekTerm = terms.get(ords[++upto], spare);
      return AcceptStatus.YES_AND_SEEK;
    }
  } else {
    if (upto == lastElement) {
      return AcceptStatus.NO;
    } else { // Our current term doesn't match the the given term.
      int cmp;
      do { // We maybe are behind the given term by more than one step. Keep incrementing till we're the same or higher.
        if (upto == lastElement) {
          return AcceptStatus.NO;
        }
        // typically the terms dict is a superset of query's terms so it's unusual that we have to skip many of
        // our terms so we don't do a binary search here
        seekTerm = terms.get(ords[++upto], spare);
      } while ((cmp = seekTerm.compareTo(term)) < 0);
      if (cmp == 0) {
        if (upto == lastElement) {
          return AcceptStatus.YES;
        }
        seekTerm = terms.get(ords[++upto], spare);
        return AcceptStatus.YES_AND_SEEK;
      } else {
        return AcceptStatus.NO_AND_SEEK;
      }
    }
  }
}
 
Example 12
Source File: TestICUCollationDocValuesField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestRanges(IndexSearcher is, String startPoint, String endPoint, BytesRef startBR, BytesRef endBR, Collator collator) throws Exception { 
  SortedDocValues dvs = MultiDocValues.getSortedValues(is.getIndexReader(), "collated");
  for(int docID=0;docID<is.getIndexReader().maxDoc();docID++) {
    Document doc = is.doc(docID);
    String s = doc.getField("field").stringValue();
    boolean collatorAccepts = collator.compare(s, startPoint) >= 0 && collator.compare(s, endPoint) <= 0;
    assertEquals(docID, dvs.nextDoc());
    BytesRef br = dvs.binaryValue();
    boolean luceneAccepts = br.compareTo(startBR) >= 0 && br.compareTo(endBR) <= 0;
    assertEquals(collatorAccepts, luceneAccepts);
  }
}
 
Example 13
Source File: MultiTerms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public BytesRef getMax() throws IOException {
  BytesRef maxTerm = null;
  for(Terms terms : subs) {
    BytesRef term = terms.getMax();
    if (maxTerm == null || term.compareTo(maxTerm) > 0) {
      maxTerm = term;
    }
  }

  return maxTerm;
}
 
Example 14
Source File: TestTerms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTermMinMaxRandom() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  BytesRef minTerm = null;
  BytesRef maxTerm = null;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    Field field = new TextField("field", "", Field.Store.NO);
    doc.add(field);
    //System.out.println("  doc " + i);
    CannedBinaryTokenStream.BinaryToken[] tokens = new CannedBinaryTokenStream.BinaryToken[atLeast(10)];
    for(int j=0;j<tokens.length;j++) {
      byte[] bytes = new byte[TestUtil.nextInt(random(), 1, 20)];
      random().nextBytes(bytes);
      BytesRef tokenBytes = new BytesRef(bytes);
      //System.out.println("    token " + tokenBytes);
      if (minTerm == null || tokenBytes.compareTo(minTerm) < 0) {
        //System.out.println("      ** new min");
        minTerm = tokenBytes;
      }
      if (maxTerm == null || tokenBytes.compareTo(maxTerm) > 0) {
        //System.out.println("      ** new max");
        maxTerm = tokenBytes;
      }
      tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes);
    }
    field.setTokenStream(new CannedBinaryTokenStream(tokens));
    w.addDocument(doc);
  }

  IndexReader r = w.getReader();
  Terms terms = MultiTerms.getTerms(r, "field");
  assertEquals(minTerm, terms.getMin());
  assertEquals(maxTerm, terms.getMax());
  
  r.close();
  w.close();
  dir.close();
}
 
Example 15
Source File: MergeSortRowIdLookup.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public int compareTo(TermsEnumReader o) {
  try {
    BytesRef t1 = _termsEnum.term();
    BytesRef t2 = o._termsEnum.term();
    return t1.compareTo(t2);
  } catch (IOException e) {
    throw new RuntimeException(e);
  }
}
 
Example 16
Source File: MergeSortRowIdLookup.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static void advance(List<TermsEnumReader> termsEnumList, BytesRef rowId) throws IOException {
  for (TermsEnumReader reader : termsEnumList) {
    BytesRef term = reader._termsEnum.term();
    if (term.compareTo(rowId) < 0) {
      reader._termsEnum.seekCeil(rowId);
    }
  }
}
 
Example 17
Source File: LegacyNumericRangeQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
protected final AcceptStatus accept(BytesRef term) {
  while (currentUpperBound == null || term.compareTo(currentUpperBound) > 0) {
    if (rangeBounds.isEmpty())
      return AcceptStatus.END;
    // peek next sub-range, only seek if the current term is smaller than next lower bound
    if (term.compareTo(rangeBounds.getFirst()) < 0)
      return AcceptStatus.NO_AND_SEEK;
    // step forward to next range without seeking, as next lower range bound is less or equal current term
    nextRange();
  }
  return AcceptStatus.YES;
}
 
Example 18
Source File: IntersectBlockReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
/**
 * True if the current state of the automata is best iterated linearly (without seeking).
 */
protected boolean isLinearState(BytesRef term) {
  return linear && term.compareTo(linearUpperBound) < 0;
}
 
Example 19
Source File: TermRangeQueryExpression.java    From incubator-atlas with Apache License 2.0 4 votes vote down vote up
private boolean compareUpperBound(BytesRef valueBytes) {
    return m_upperTerm == null || (m_upperInclusive ? valueBytes.compareTo(m_upperTerm) < 0 :
            valueBytes.compareTo(m_upperTerm) <= 0);
}
 
Example 20
Source File: BlockReader.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Indicates whether the searched term is beyond the last term of the field.
 *
 * @param blockStartFP The current block start file pointer.
 */
protected boolean isBeyondLastTerm(BytesRef searchedTerm, long blockStartFP) {
  return blockStartFP == fieldMetadata.getLastBlockStartFP()
      && searchedTerm.compareTo(fieldMetadata.getLastTerm()) > 0;
}