org.apache.lucene.index.FieldInvertState Java Examples

The following examples show how to use org.apache.lucene.index.FieldInvertState. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AssertingSimilarity.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  assert state != null;
  assert state.getLength() > 0;
  assert state.getPosition() >= 0;
  assert state.getOffset() >= 0;
  assert state.getMaxTermFrequency() >= 0; // TODO: seems to be 0 for omitTFAP? 
  assert state.getMaxTermFrequency() <= state.getLength();
  assert state.getNumOverlap() >= 0;
  assert state.getNumOverlap() < state.getLength();
  assert state.getUniqueTermCount() > 0;
  assert state.getUniqueTermCount() <= state.getLength();
  long norm = delegate.computeNorm(state);
  assert norm != 0;
  return norm;
}
 
Example #2
Source File: TestClassicSimilarity.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testSameNormsAsBM25() {
  ClassicSimilarity sim1 = new ClassicSimilarity();
  BM25Similarity sim2 = new BM25Similarity();
  sim2.setDiscountOverlaps(true);
  for (int iter = 0; iter < 100; ++iter) {
    final int length = TestUtil.nextInt(random(), 1, 1000);
    final int position = random().nextInt(length);
    final int numOverlaps = random().nextInt(length);
    final int maxTermFrequency = 1;
    final int uniqueTermCount = 1;
    FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount);
    assertEquals(
        sim2.computeNorm(state),
        sim1.computeNorm(state),
        0f);
  }
}
 
Example #3
Source File: TestBooleanSimilarity.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testSameNormsAsBM25() {
  BooleanSimilarity sim1 = new BooleanSimilarity();
  BM25Similarity sim2 = new BM25Similarity();
  sim2.setDiscountOverlaps(true);
  for (int iter = 0; iter < 100; ++iter) {
    final int length = TestUtil.nextInt(random(), 1, 100);
    final int position = random().nextInt(length);
    final int numOverlaps = random().nextInt(length);
    final int maxTermFrequency = 1;
    final int uniqueTermCount = 1;
    FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount);
    assertEquals(
        sim2.computeNorm(state),
        sim1.computeNorm(state),
        0f);
  }
}
 
Example #4
Source File: TestMemoryIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testSimilarities() throws IOException {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "a long text field that contains many many terms", analyzer);

  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  NumericDocValues norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n1 = norms.longValue();

  // Norms are re-computed when we change the Similarity
  mi.setSimilarity(new Similarity() {

    @Override
    public long computeNorm(FieldInvertState state) {
      return 74;
    }

    @Override
    public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
      throw new UnsupportedOperationException();
    }

  });
  norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n2 = norms.longValue();

  assertTrue(n1 != n2);
  TestUtil.checkReader(reader);
}
 
Example #5
Source File: LindenSimilarity.java    From linden with Apache License 2.0 5 votes vote down vote up
/** Implemented as
 *  <code>state.getBoost()*lengthNorm(numTerms)</code>, where
 *  <code>numTerms</code> is {@link org.apache.lucene.index.FieldInvertState#getLength()} if {@link
 *  #setDiscountOverlaps} is false, else it's {@link
 *  org.apache.lucene.index.FieldInvertState#getLength()} - {@link
 *  org.apache.lucene.index.FieldInvertState#getNumOverlap()}.
 *
 *  @lucene.experimental */
@Override
public float lengthNorm(FieldInvertState state) {
  final int numTerms;
  if (discountOverlaps)
    numTerms = state.getLength() - state.getNumOverlap();
  else
    numTerms = state.getLength();
  return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms)));
}
 
Example #6
Source File: SimilarityBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Encodes the document length in the same way as {@link BM25Similarity}. */
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}
 
Example #7
Source File: BM25Similarity.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}
 
Example #8
Source File: TFIDFSimilarity.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}
 
Example #9
Source File: TestSimilarityBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDiscountOverlapsBoost() throws IOException {
  BM25Similarity expected = new BM25Similarity();
  SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
  expected.setDiscountOverlaps(false);
  actual.setDiscountOverlaps(false);
  FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS);
  state.setLength(5);
  state.setNumOverlap(2);
  assertEquals(expected.computeNorm(state), actual.computeNorm(state));
  expected.setDiscountOverlaps(true);
  actual.setDiscountOverlaps(true);
  assertEquals(expected.computeNorm(state), actual.computeNorm(state));
}
 
Example #10
Source File: TFSimilarity.java    From lumongo with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
	return BM25_SIM.computeNorm(state);
}
 
Example #11
Source File: FairSimilarity.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Override
  public float lengthNorm(FieldInvertState fieldInvertState) {
    throw new RuntimeException("not sure");
//    return 0;
  }
 
Example #12
Source File: CustomSimilarity.java    From modernmt with Apache License 2.0 4 votes vote down vote up
@Override
public float lengthNorm(FieldInvertState state) {
    return 1.f;
}
 
Example #13
Source File: OKAPIBM25Similarity.java    From lucene4ir with Apache License 2.0 4 votes vote down vote up
@Override
   public final long computeNorm(FieldInvertState state)
   {
return state.getLength();
   }
 
Example #14
Source File: BM25Similarity.java    From lucene4ir with Apache License 2.0 4 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
  return encodeNormValue(state.getBoost(), numTerms);
}
 
Example #15
Source File: SMARTBNNBNNSimilarity.java    From lucene4ir with Apache License 2.0 4 votes vote down vote up
@Override
   public final long computeNorm(FieldInvertState state)
   {
return state.getLength();
   }
 
Example #16
Source File: TestSubScorerFreqs.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1;
}
 
Example #17
Source File: TestConjunctions.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1; // we dont care
}
 
Example #18
Source File: JustCompileSearch.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
 
Example #19
Source File: TestBooleanQueryVisitSubscorers.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1;
}
 
Example #20
Source File: TestSimilarityProvider.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 10;
}
 
Example #21
Source File: TestSimilarityProvider.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1;
}
 
Example #22
Source File: MultiSimilarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return sims[0].computeNorm(state);
}
 
Example #23
Source File: PerFieldSimilarityWrapper.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  return get(state.getName()).computeNorm(state);
}
 
Example #24
Source File: BooleanSimilarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return BM25_SIM.computeNorm(state);
}
 
Example #25
Source File: LegacyBM25Similarity.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return bm25Similarity.computeNorm(state);
}
 
Example #26
Source File: Similarity.java    From lucene-solr with Apache License 2.0 2 votes vote down vote up
/**
 * Computes the normalization value for a field, given the accumulated
 * state of term processing for this field (see {@link FieldInvertState}).
 *
 * <p>Matches in longer fields are less precise, so implementations of this
 * method usually set smaller values when <code>state.getLength()</code> is large,
 * and larger values when <code>state.getLength()</code> is small.
 *
 * <p>Note that for a given term-document frequency, greater unsigned norms
 * must produce scores that are lower or equal, ie. for two encoded norms
 * {@code n1} and {@code n2} so that
 * {@code Long.compareUnsigned(n1, n2) &gt; 0} then
 * {@code SimScorer.score(freq, n1) &lt;= SimScorer.score(freq, n2)}
 * for any legal {@code freq}.
 *
 * <p>{@code 0} is not a legal norm, so {@code 1} is the norm that produces
 * the highest scores.
 *
 * @lucene.experimental
 * 
 * @param state current processing state for this field
 * @return computed norm value
 */
public abstract long computeNorm(FieldInvertState state);