/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Random; import java.util.Set; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MockRandomMergePolicy; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SerialMergeScheduler; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; public class TestTopFieldCollectorEarlyTermination extends LuceneTestCase { private int numDocs; private List<String> terms; private Directory dir; private final Sort sort = new Sort(new SortField("ndv1", SortField.Type.LONG)); private RandomIndexWriter iw; private IndexReader reader; private static final int FORCE_MERGE_MAX_SEGMENT_COUNT = 5; private Document randomDocument() { final Document doc = new Document(); doc.add(new NumericDocValuesField("ndv1", random().nextInt(10))); doc.add(new NumericDocValuesField("ndv2", random().nextInt(10))); doc.add(new StringField("s", RandomPicks.randomFrom(random(), terms), Store.YES)); return doc; } private void createRandomIndex(boolean singleSortedSegment) throws IOException { dir = newDirectory(); numDocs = atLeast(150); final int numTerms = TestUtil.nextInt(random(), 1, numDocs / 5); Set<String> randomTerms = new HashSet<>(); while (randomTerms.size() < numTerms) { randomTerms.add(TestUtil.randomSimpleString(random())); } terms = new ArrayList<>(randomTerms); final long seed = random().nextLong(); final IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(new Random(seed))); if (iwc.getMergePolicy() instanceof MockRandomMergePolicy) { // MockRandomMP randomly wraps the leaf readers which makes merging angry iwc.setMergePolicy(newTieredMergePolicy()); } iwc.setMergeScheduler(new SerialMergeScheduler()); // for reproducible tests iwc.setIndexSort(sort); iw = new RandomIndexWriter(new Random(seed), dir, iwc); iw.setDoRandomForceMerge(false); // don't do this, it may happen anyway with MockRandomMP for (int i = 0; i < numDocs; ++i) { final Document doc = randomDocument(); iw.addDocument(doc); if (i == numDocs / 2 || (i != numDocs - 1 && random().nextInt(8) == 0)) { iw.commit(); } if (random().nextInt(15) == 0) { final String term = RandomPicks.randomFrom(random(), terms); iw.deleteDocuments(new Term("s", term)); } } if (singleSortedSegment) { iw.forceMerge(1); } else if (random().nextBoolean()) { iw.forceMerge(FORCE_MERGE_MAX_SEGMENT_COUNT); } reader = iw.getReader(); if (reader.numDocs() == 0) { iw.addDocument(new Document()); reader.close(); reader = iw.getReader(); } } private void closeIndex() throws IOException { reader.close(); iw.close(); dir.close(); } public void testEarlyTermination() throws IOException { doTestEarlyTermination(false); } public void testEarlyTerminationWhenPaging() throws IOException { doTestEarlyTermination(true); } private void doTestEarlyTermination(boolean paging) throws IOException { final int iters = atLeast(1); for (int i = 0; i < iters; ++i) { createRandomIndex(false); int maxSegmentSize = 0; for (LeafReaderContext ctx : reader.leaves()) { maxSegmentSize = Math.max(ctx.reader().numDocs(), maxSegmentSize); } for (int j = 0; j < iters; ++j) { final IndexSearcher searcher = newSearcher(reader); final int numHits = TestUtil.nextInt(random(), 1, numDocs); FieldDoc after; if (paging) { assert searcher.getIndexReader().numDocs() > 0; TopFieldDocs td = searcher.search(new MatchAllDocsQuery(), 10, sort); after = (FieldDoc) td.scoreDocs[td.scoreDocs.length - 1]; } else { after = null; } final TopFieldCollector collector1 = TopFieldCollector.create(sort, numHits, after, Integer.MAX_VALUE); final TopFieldCollector collector2 = TopFieldCollector.create(sort, numHits, after, 1); final Query query; if (random().nextBoolean()) { query = new TermQuery(new Term("s", RandomPicks.randomFrom(random(), terms))); } else { query = new MatchAllDocsQuery(); } searcher.search(query, collector1); searcher.search(query, collector2); TopDocs td1 = collector1.topDocs(); TopDocs td2 = collector2.topDocs(); assertFalse(collector1.isEarlyTerminated()); if (paging == false && maxSegmentSize > numHits && query instanceof MatchAllDocsQuery) { // Make sure that we sometimes early terminate assertTrue(collector2.isEarlyTerminated()); } if (collector2.isEarlyTerminated()) { assertTrue(td2.totalHits.value >= td1.scoreDocs.length); assertTrue(td2.totalHits.value <= reader.maxDoc()); } else { assertEquals(td2.totalHits.value, td1.totalHits.value); } CheckHits.checkEqual(query, td1.scoreDocs, td2.scoreDocs); } closeIndex(); } } public void testCanEarlyTerminateOnDocId() { assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(SortField.FIELD_DOC), new Sort(SortField.FIELD_DOC))); assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(SortField.FIELD_DOC), null)); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG)), null)); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("b", SortField.Type.LONG)))); assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(SortField.FIELD_DOC), new Sort(new SortField("b", SortField.Type.LONG)))); assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(SortField.FIELD_DOC), new Sort(new SortField("b", SortField.Type.LONG), SortField.FIELD_DOC))); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG)), new Sort(SortField.FIELD_DOC))); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG), SortField.FIELD_DOC), new Sort(SortField.FIELD_DOC))); } public void testCanEarlyTerminateOnPrefix() { assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("a", SortField.Type.LONG)))); assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); assertTrue(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG, true)), null)); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG, true)), new Sort(new SortField("a", SortField.Type.LONG, false)))); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG)))); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("a", SortField.Type.LONG), new SortField("c", SortField.Type.STRING)))); assertFalse(TopFieldCollector.canEarlyTerminate( new Sort(new SortField("a", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)), new Sort(new SortField("c", SortField.Type.LONG), new SortField("b", SortField.Type.STRING)))); } }