Java Code Examples for org.apache.lucene.index.FieldInvertState

The following examples show how to use org.apache.lucene.index.FieldInvertState. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: AssertingSimilarity.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  assert state != null;
  assert state.getLength() > 0;
  assert state.getPosition() >= 0;
  assert state.getOffset() >= 0;
  assert state.getMaxTermFrequency() >= 0; // TODO: seems to be 0 for omitTFAP? 
  assert state.getMaxTermFrequency() <= state.getLength();
  assert state.getNumOverlap() >= 0;
  assert state.getNumOverlap() < state.getLength();
  assert state.getUniqueTermCount() > 0;
  assert state.getUniqueTermCount() <= state.getLength();
  long norm = delegate.computeNorm(state);
  assert norm != 0;
  return norm;
}
 
Example 2
Source Project: lucene-solr   Source File: TestBooleanSimilarity.java    License: Apache License 2.0 6 votes vote down vote up
public void testSameNormsAsBM25() {
  BooleanSimilarity sim1 = new BooleanSimilarity();
  BM25Similarity sim2 = new BM25Similarity();
  sim2.setDiscountOverlaps(true);
  for (int iter = 0; iter < 100; ++iter) {
    final int length = TestUtil.nextInt(random(), 1, 100);
    final int position = random().nextInt(length);
    final int numOverlaps = random().nextInt(length);
    final int maxTermFrequency = 1;
    final int uniqueTermCount = 1;
    FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount);
    assertEquals(
        sim2.computeNorm(state),
        sim1.computeNorm(state),
        0f);
  }
}
 
Example 3
Source Project: lucene-solr   Source File: TestClassicSimilarity.java    License: Apache License 2.0 6 votes vote down vote up
public void testSameNormsAsBM25() {
  ClassicSimilarity sim1 = new ClassicSimilarity();
  BM25Similarity sim2 = new BM25Similarity();
  sim2.setDiscountOverlaps(true);
  for (int iter = 0; iter < 100; ++iter) {
    final int length = TestUtil.nextInt(random(), 1, 1000);
    final int position = random().nextInt(length);
    final int numOverlaps = random().nextInt(length);
    final int maxTermFrequency = 1;
    final int uniqueTermCount = 1;
    FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS, position, length, numOverlaps, 100, maxTermFrequency, uniqueTermCount);
    assertEquals(
        sim2.computeNorm(state),
        sim1.computeNorm(state),
        0f);
  }
}
 
Example 4
Source Project: linden   Source File: LindenSimilarity.java    License: Apache License 2.0 5 votes vote down vote up
/** Implemented as
 *  <code>state.getBoost()*lengthNorm(numTerms)</code>, where
 *  <code>numTerms</code> is {@link org.apache.lucene.index.FieldInvertState#getLength()} if {@link
 *  #setDiscountOverlaps} is false, else it's {@link
 *  org.apache.lucene.index.FieldInvertState#getLength()} - {@link
 *  org.apache.lucene.index.FieldInvertState#getNumOverlap()}.
 *
 *  @lucene.experimental */
@Override
public float lengthNorm(FieldInvertState state) {
  final int numTerms;
  if (discountOverlaps)
    numTerms = state.getLength() - state.getNumOverlap();
  else
    numTerms = state.getLength();
  return state.getBoost() * ((float) (1.0 / Math.sqrt(numTerms)));
}
 
Example 5
Source Project: lucene-solr   Source File: TestMemoryIndex.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testSimilarities() throws IOException {

  MemoryIndex mi = new MemoryIndex();
  mi.addField("f1", "a long text field that contains many many terms", analyzer);

  IndexSearcher searcher = mi.createSearcher();
  LeafReader reader = (LeafReader) searcher.getIndexReader();
  NumericDocValues norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n1 = norms.longValue();

  // Norms are re-computed when we change the Similarity
  mi.setSimilarity(new Similarity() {

    @Override
    public long computeNorm(FieldInvertState state) {
      return 74;
    }

    @Override
    public SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
      throw new UnsupportedOperationException();
    }

  });
  norms = reader.getNormValues("f1");
  assertEquals(0, norms.nextDoc());
  float n2 = norms.longValue();

  assertTrue(n1 != n2);
  TestUtil.checkReader(reader);
}
 
Example 6
Source Project: lucene-solr   Source File: SimilarityBase.java    License: Apache License 2.0 5 votes vote down vote up
/** Encodes the document length in the same way as {@link BM25Similarity}. */
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}
 
Example 7
Source Project: lucene-solr   Source File: BM25Similarity.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}
 
Example 8
Source Project: lucene-solr   Source File: TFIDFSimilarity.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms;
  if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
    numTerms = state.getUniqueTermCount();
  } else if (discountOverlaps) {
    numTerms = state.getLength() - state.getNumOverlap();
  } else {
    numTerms = state.getLength();
  }
  return SmallFloat.intToByte4(numTerms);
}
 
Example 9
Source Project: lucene-solr   Source File: TestSimilarityBase.java    License: Apache License 2.0 5 votes vote down vote up
public void testDiscountOverlapsBoost() throws IOException {
  BM25Similarity expected = new BM25Similarity();
  SimilarityBase actual = new DFRSimilarity(new BasicModelIne(), new AfterEffectB(), new NormalizationH2());
  expected.setDiscountOverlaps(false);
  actual.setDiscountOverlaps(false);
  FieldInvertState state = new FieldInvertState(Version.LATEST.major, "foo", IndexOptions.DOCS_AND_FREQS);
  state.setLength(5);
  state.setNumOverlap(2);
  assertEquals(expected.computeNorm(state), actual.computeNorm(state));
  expected.setDiscountOverlaps(true);
  actual.setDiscountOverlaps(true);
  assertEquals(expected.computeNorm(state), actual.computeNorm(state));
}
 
Example 10
Source Project: lucene-solr   Source File: LegacyBM25Similarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return bm25Similarity.computeNorm(state);
}
 
Example 11
Source Project: lucene-solr   Source File: BooleanSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return BM25_SIM.computeNorm(state);
}
 
Example 12
Source Project: lucene-solr   Source File: PerFieldSimilarityWrapper.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  return get(state.getName()).computeNorm(state);
}
 
Example 13
Source Project: lucene-solr   Source File: MultiSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return sims[0].computeNorm(state);
}
 
Example 14
Source Project: lucene-solr   Source File: TestSimilarityProvider.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1;
}
 
Example 15
Source Project: lucene-solr   Source File: TestSimilarityProvider.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 10;
}
 
Example 16
@Override
public long computeNorm(FieldInvertState state) {
  return 1;
}
 
Example 17
Source Project: lucene-solr   Source File: JustCompileSearch.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
 
Example 18
Source Project: lucene-solr   Source File: TestConjunctions.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1; // we dont care
}
 
Example 19
Source Project: lucene-solr   Source File: TestSubScorerFreqs.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
  return 1;
}
 
Example 20
Source Project: lucene4ir   Source File: SMARTBNNBNNSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
   public final long computeNorm(FieldInvertState state)
   {
return state.getLength();
   }
 
Example 21
Source Project: lucene4ir   Source File: BM25Similarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public final long computeNorm(FieldInvertState state) {
  final int numTerms = discountOverlaps ? state.getLength() - state.getNumOverlap() : state.getLength();
  return encodeNormValue(state.getBoost(), numTerms);
}
 
Example 22
Source Project: lucene4ir   Source File: OKAPIBM25Similarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
   public final long computeNorm(FieldInvertState state)
   {
return state.getLength();
   }
 
Example 23
Source Project: modernmt   Source File: CustomSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public float lengthNorm(FieldInvertState state) {
    return 1.f;
}
 
Example 24
Source Project: incubator-retired-blur   Source File: FairSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
  public float lengthNorm(FieldInvertState fieldInvertState) {
    throw new RuntimeException("not sure");
//    return 0;
  }
 
Example 25
Source Project: lumongo   Source File: TFSimilarity.java    License: Apache License 2.0 4 votes vote down vote up
@Override
public long computeNorm(FieldInvertState state) {
	return BM25_SIM.computeNorm(state);
}
 
Example 26
Source Project: lucene-solr   Source File: Similarity.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * Computes the normalization value for a field, given the accumulated
 * state of term processing for this field (see {@link FieldInvertState}).
 *
 * <p>Matches in longer fields are less precise, so implementations of this
 * method usually set smaller values when <code>state.getLength()</code> is large,
 * and larger values when <code>state.getLength()</code> is small.
 *
 * <p>Note that for a given term-document frequency, greater unsigned norms
 * must produce scores that are lower or equal, ie. for two encoded norms
 * {@code n1} and {@code n2} so that
 * {@code Long.compareUnsigned(n1, n2) &gt; 0} then
 * {@code SimScorer.score(freq, n1) &lt;= SimScorer.score(freq, n2)}
 * for any legal {@code freq}.
 *
 * <p>{@code 0} is not a legal norm, so {@code 1} is the norm that produces
 * the highest scores.
 *
 * @lucene.experimental
 * 
 * @param state current processing state for this field
 * @return computed norm value
 */
public abstract long computeNorm(FieldInvertState state);