Java Code Examples for org.apache.lucene.index.IndexWriter#forceMerge()

The following examples show how to use org.apache.lucene.index.IndexWriter#forceMerge() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BaseShapeTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected void indexRandomShapes(IndexWriter w, Object... shapes) throws Exception {
  Set<Integer> deleted = new HashSet<>();
  for (int id = 0; id < shapes.length; ++id) {
    Document doc = new Document();
    doc.add(newStringField("id", "" + id, Field.Store.NO));
    doc.add(new NumericDocValuesField("id", id));
    if (shapes[id] != null) {
      addShapeToDoc(FIELD_NAME, doc, shapes[id]);
    }
    w.addDocument(doc);
    if (id > 0 && random().nextInt(100) == 42) {
      int idToDelete = random().nextInt(id);
      w.deleteDocuments(new Term("id", ""+idToDelete));
      deleted.add(idToDelete);
      if (VERBOSE) {
        System.out.println("   delete id=" + idToDelete);
      }
    }
  }

  if (randomBoolean()) {
    w.forceMerge(1);
  }
}
 
Example 2
Source File: PayloadHelper.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Sets up a RAM-resident Directory, and adds documents (using English.intToEnglish()) with two fields: field and multiField
 * and analyzes them using the PayloadAnalyzer
 * @param similarity The Similarity class to use in the Searcher
 * @param numDocs The num docs to add
 * @return An IndexSearcher
 */
// TODO: randomize
public IndexSearcher setUp(Random random, Similarity similarity, int numDocs) throws IOException {
  Directory directory = new MockDirectoryWrapper(random, new ByteBuffersDirectory());
  PayloadAnalyzer analyzer = new PayloadAnalyzer();

  // TODO randomize this
  IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
      analyzer).setSimilarity(similarity));
  // writer.infoStream = System.out;
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    doc.add(new TextField(FIELD, English.intToEnglish(i), Field.Store.YES));
    doc.add(new TextField(MULTI_FIELD, English.intToEnglish(i) + "  " + English.intToEnglish(i), Field.Store.YES));
    doc.add(new TextField(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES));
    writer.addDocument(doc);
  }
  writer.forceMerge(1);
  reader = DirectoryReader.open(writer);
  writer.close();

  IndexSearcher searcher = LuceneTestCase.newSearcher(LuceneTestCase.getOnlyLeafReader(reader));
  searcher.setSimilarity(similarity);
  return searcher;
}
 
Example 3
Source File: TestScorerPerf.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception {
  int[] freq = new int[nTerms];
  Term[] terms = new Term[nTerms];
  for (int i=0; i<nTerms; i++) {
    int f = (nTerms+1)-i;  // make first terms less frequent
    freq[i] = (int)Math.ceil(Math.pow(f,power));
    terms[i] = new Term("f",Character.toString((char)('A'+i)));
  }

  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())).setOpenMode(OpenMode.CREATE));
  for (int i=0; i<nDocs; i++) {
    Document d = new Document();
    for (int j=0; j<nTerms; j++) {
      if (random().nextInt(freq[j]) == 0) {
        d.add(newStringField("f", terms[j].text(), Field.Store.NO));
        //System.out.println(d);
      }
    }
    iw.addDocument(d);
  }
  iw.forceMerge(1);
  iw.close();
}
 
Example 4
Source File: TestFieldCacheSort.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
  Directory indexStore = newDirectory();
  IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0; i<5; i++) {
      Document doc = new Document();
      doc.add(new StringField("string", "a"+i, Field.Store.NO));
      doc.add(new StringField("string", "b"+i, Field.Store.NO));
      writer.addDocument(doc);
  }
  writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
  writer.close();
  Sort sort = new Sort(
      new SortField("string", SortField.Type.STRING),
      SortField.FIELD_DOC);
  IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
                       Collections.singletonMap("string", Type.SORTED));
  IndexSearcher searcher = new IndexSearcher(reader);
  expectThrows(IllegalStateException.class, () -> {
    searcher.search(new MatchAllDocsQuery(), 500, sort);
  });
  reader.close();
  indexStore.close();
}
 
Example 5
Source File: TestLucene84PostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
  Directory d = newDirectory();
  IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0;i<25;i++) {
    Document doc = new Document();
    doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
    doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
    w.addDocument(doc);
  }
  w.forceMerge(1);

  DirectoryReader r = DirectoryReader.open(w);
  assertEquals(1, r.leaves().size());
  FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
  // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
  Stats stats = field.getStats();
  assertEquals(0, stats.floorBlockCount);
  assertEquals(2, stats.nonFloorBlockCount);
  r.close();
  w.close();
  d.close();
}
 
Example 6
Source File: TestMergeSchedulerExternal.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCustomMergeScheduler() throws Exception {
  // we don't really need to execute anything, just to make sure the custom MS
  // compiles. But ensure that it can be used as well, e.g., no other hidden
  // dependencies or something. Therefore, don't use any random API !
  Directory dir = new ByteBuffersDirectory();
  IndexWriterConfig conf = new IndexWriterConfig(null);
  conf.setMergeScheduler(new ReportingMergeScheduler());
  IndexWriter writer = new IndexWriter(dir, conf);
  writer.addDocument(new Document());
  writer.commit(); // trigger flush
  writer.addDocument(new Document());
  writer.commit(); // trigger flush
  writer.forceMerge(1);
  writer.close();
  dir.close();
}
 
Example 7
Source File: TestPointQueries.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Nightly
public void testInversePointRange() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
  final int numDims = TestUtil.nextInt(random(), 1, 3);
  final int numDocs = atLeast(10 * BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE); // we need multiple leaves to enable this optimization
  for (int i = 0; i < numDocs; ++i) {
    Document doc = new Document();
    int[] values = new int[numDims];
    Arrays.fill(values, i);
    doc.add(new IntPoint("f", values));
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader r = DirectoryReader.open(w);
  w.close();

  IndexSearcher searcher = newSearcher(r);
  int[] low = new int[numDims];
  int[] high = new int[numDims];
  Arrays.fill(high, numDocs - 2);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(low, 1);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(high, numDocs - 1);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(low, BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE + 1);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));
  Arrays.fill(high, numDocs - BKDWriter.DEFAULT_MAX_POINTS_IN_LEAF_NODE);
  assertEquals(high[0] - low[0] + 1, searcher.count(IntPoint.newRangeQuery("f", low, high)));

  r.close();
  dir.close();
}
 
Example 8
Source File: MergeCommand.java    From clue with Apache License 2.0 5 votes vote down vote up
@Override
public void execute(Namespace args, PrintStream out) throws Exception {
  int count = args.getInt("num");
  IndexWriter writer = ctx.getIndexWriter();
  if (writer != null) {
    writer.forceMerge(count, true);
    writer.commit();
    ctx.refreshReader();
  }
  else {
    out.println("unable to open index writer, index is in readonly mode");
  }
}
 
Example 9
Source File: TestLucene80DocValuesFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestSparseNumericBlocksOfVariousBitsPerValue(double density) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  Document doc = new Document();
  Field storedField = newStringField("stored", "", Field.Store.YES);
  Field dvField = new NumericDocValuesField("dv", 0);
  doc.add(storedField);
  doc.add(dvField);

  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier longs = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    if (random().nextDouble() > density) {
      writer.addDocument(new Document());
      continue;
    }
    long value = longs.getAsLong();
    storedField.setStringValue(Long.toString(value));
    dvField.setLongValue(value);
    writer.addDocument(doc);
  }

  writer.forceMerge(1);

  writer.close();
  
  // compare
  assertDVIterate(dir);
  assertDVAdvance(dir, 1); // Tests all jump-lengths from 1 to maxDoc (quite slow ~= 1 minute for 200K docs)

  dir.close();
}
 
Example 10
Source File: TestFieldCache.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  NUM_DOCS = atLeast(500);
  NUM_ORDS = atLeast(2);
  directory = newDirectory();
  IndexWriter writer= new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(new LogDocMergePolicy()));
  long theLong = Long.MAX_VALUE;
  double theDouble = Double.MAX_VALUE;
  int theInt = Integer.MAX_VALUE;
  float theFloat = Float.MAX_VALUE;
  unicodeStrings = new String[NUM_DOCS];
  multiValued = new BytesRef[NUM_DOCS][NUM_ORDS];
  if (VERBOSE) {
    System.out.println("TEST: setUp");
  }
  for (int i = 0; i < NUM_DOCS; i++){
    Document doc = new Document();
    doc.add(new LongPoint("theLong", theLong--));
    doc.add(new DoublePoint("theDouble", theDouble--));
    doc.add(new IntPoint("theInt", theInt--));
    doc.add(new FloatPoint("theFloat", theFloat--));
    if (i%2 == 0) {
      doc.add(new IntPoint("sparse", i));
    }

    if (i%2 == 0) {
      doc.add(new IntPoint("numInt", i));
    }

    // sometimes skip the field:
    if (random().nextInt(40) != 17) {
      unicodeStrings[i] = generateString(i);
      doc.add(newStringField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES));
    }

    // sometimes skip the field:
    if (random().nextInt(10) != 8) {
      for (int j = 0; j < NUM_ORDS; j++) {
        String newValue = generateString(i);
        multiValued[i][j] = new BytesRef(newValue);
        doc.add(newStringField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES));
      }
      Arrays.sort(multiValued[i]);
    }
    writer.addDocument(doc);
  }
  writer.forceMerge(1); // this test relies on one segment and docid order
  IndexReader r = DirectoryReader.open(writer);
  assertEquals(1, r.leaves().size());
  reader = r.leaves().get(0).reader();
  TestUtil.checkReader(reader);
  writer.close();
}
 
Example 11
Source File: TestIndexOrDocValuesQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testUseIndexForSelectiveMultiValueQueries() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
      // relies on costs and PointValues.estimateCost so we need the default codec
      .setCodec(TestUtil.getDefaultCodec()));
  for (int i = 0; i < 2000; ++i) {
    Document doc = new Document();
    if (i < 1000) {
      doc.add(new StringField("f1", "bar", Store.NO));
      for (int j =0; j < 500; j++) {
        doc.add(new LongPoint("f2", 42L));
        doc.add(new SortedNumericDocValuesField("f2", 42L));
      }
    } else if (i == 1001) {
      doc.add(new StringField("f1", "foo", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new SortedNumericDocValuesField("f2", 42L));
    } else {
      doc.add(new StringField("f1", "bar", Store.NO));
      for (int j =0; j < 100; j++) {
        doc.add(new LongPoint("f2", 2L));
        doc.add(new SortedNumericDocValuesField("f2", 2L));
      }
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader reader = DirectoryReader.open(w);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setQueryCache(null);

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q1 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), SortedNumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
      .build();

  final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
  final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s1.twoPhaseIterator()); // means we use points

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
      .build();

  final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
  final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s2.twoPhaseIterator()); // means we use points

  // The term query is more selective, so the IndexOrDocValuesQuery should use doc values
  final Query q3 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
      .build();

  final Weight w3 = searcher.createWeight(searcher.rewrite(q3), ScoreMode.COMPLETE, 1);
  final Scorer s3 = w3.scorer(searcher.getIndexReader().leaves().get(0));
  assertNotNull(s3.twoPhaseIterator()); // means we use doc values

  reader.close();
  w.close();
  dir.close();
}
 
Example 12
Source File: TestDirectoryTaxonomyReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testOpenIfChangedMergedSegment() throws Exception {
  // test openIfChanged() when all index segments were merged - used to be
  // a bug in ParentArray, caught by testOpenIfChangedManySegments - only
  // this test is not random
  Directory dir = newDirectory();
  
  // hold onto IW to forceMerge
  // note how we don't close it, since DTW will close it.
  final IndexWriter iw = new IndexWriter(dir,
      new IndexWriterConfig(new MockAnalyzer(random()))
          .setMergePolicy(new LogByteSizeMergePolicy()));
  DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
    @Override
    protected IndexWriter openIndexWriter(Directory directory,
        IndexWriterConfig config) throws IOException {
      return iw;
    }
  };
  
  TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
  assertEquals(1, reader.getSize());
  assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);

  // add category and call forceMerge -- this should flush IW and merge segments down to 1
  // in ParentArray.initFromReader, this used to fail assuming there are no parents.
  writer.addCategory(new FacetLabel("1"));
  iw.forceMerge(1);
  
  // now calling openIfChanged should trip on the bug
  TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
  assertNotNull(newtr);
  reader.close();
  reader = newtr;
  assertEquals(2, reader.getSize());
  assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example 13
Source File: TestLucene80DocValuesFormat.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Nightly
public void testSortedSetAroundBlockSize() throws IOException {
  final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
  for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) {
    final Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
    ByteBuffersDataOutput out = new ByteBuffersDataOutput();
    Document doc = new Document();
    SortedSetDocValuesField field1 = new SortedSetDocValuesField("sset", new BytesRef());
    doc.add(field1);
    SortedSetDocValuesField field2 = new SortedSetDocValuesField("sset", new BytesRef());
    doc.add(field2);
    for (int i = 0; i < maxDoc; ++i) {
      BytesRef s1 = new BytesRef(TestUtil.randomSimpleString(random(), 2));
      BytesRef s2 = new BytesRef(TestUtil.randomSimpleString(random(), 2));
      field1.setBytesValue(s1);
      field2.setBytesValue(s2);
      w.addDocument(doc);
      Set<BytesRef> set = new TreeSet<>(Arrays.asList(s1, s2));
      out.writeVInt(set.size());
      for (BytesRef ref : set) {
        out.writeVInt(ref.length);
        out.writeBytes(ref.bytes, ref.offset, ref.length);
      }
    }

    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    LeafReader sr = getOnlyLeafReader(r);
    assertEquals(maxDoc, sr.maxDoc());
    SortedSetDocValues values = sr.getSortedSetDocValues("sset");
    assertNotNull(values);
    ByteBuffersDataInput in = out.toDataInput();
    BytesRefBuilder b = new BytesRefBuilder();
    for (int i = 0; i < maxDoc; ++i) {
      assertEquals(i, values.nextDoc());
      final int numValues = in.readVInt();

      for (int j = 0; j < numValues; ++j) {
        b.setLength(in.readVInt());
        b.grow(b.length());
        in.readBytes(b.bytes(), 0, b.length());
        assertEquals(b.get(), values.lookupOrd(values.nextOrd()));
      }

      assertEquals(SortedSetDocValues.NO_MORE_ORDS, values.nextOrd());
    }
    r.close();
    dir.close();
  }
}
 
Example 14
Source File: TestLucene80DocValuesFormat.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Nightly
public void testSortedNumericAroundBlockSize() throws IOException {
  final int frontier = 1 << Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
  for (int maxDoc = frontier - 1; maxDoc <= frontier + 1; ++maxDoc) {
    final Directory dir = newDirectory();
    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
    ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();

    Document doc = new Document();
    SortedNumericDocValuesField field1 = new SortedNumericDocValuesField("snum", 0L);
    doc.add(field1);
    SortedNumericDocValuesField field2 = new SortedNumericDocValuesField("snum", 0L);
    doc.add(field2);
    for (int i = 0; i < maxDoc; ++i) {
      long s1 = random().nextInt(100);
      long s2 = random().nextInt(100);
      field1.setLongValue(s1);
      field2.setLongValue(s2);
      w.addDocument(doc);
      buffer.writeVLong(Math.min(s1, s2));
      buffer.writeVLong(Math.max(s1, s2));
    }

    w.forceMerge(1);
    DirectoryReader r = DirectoryReader.open(w);
    w.close();
    LeafReader sr = getOnlyLeafReader(r);
    assertEquals(maxDoc, sr.maxDoc());
    SortedNumericDocValues values = sr.getSortedNumericDocValues("snum");
    assertNotNull(values);
    ByteBuffersDataInput dataInput = buffer.toDataInput();
    for (int i = 0; i < maxDoc; ++i) {
      assertEquals(i, values.nextDoc());
      assertEquals(2, values.docValueCount());
      assertEquals(dataInput.readVLong(), values.nextValue());
      assertEquals(dataInput.readVLong(), values.nextValue());
    }
    r.close();
    dir.close();
  }
}
 
Example 15
Source File: TestLucene80DocValuesFormat.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}
 
Example 16
Source File: TestReqOptSumScorer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMaxBlock() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  ft.setTokenized(true);
  ft.freeze();

  for (int i = 0; i < 1024; i++) {
    // create documents with an increasing number of As and one B
    Document doc = new Document();
    doc.add(new Field("foo", new TermFreqTokenStream("a", i+1), ft));
    if (random().nextFloat() < 0.5f) {
      doc.add(new Field("foo", new TermFreqTokenStream("b", 1), ft));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  w.close();
  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setSimilarity(new TestSimilarity.SimpleSimilarity());
  // freq == score
  // searcher.setSimilarity(new TestSimilarity.SimpleSimilarity());
  final Query reqQ = new TermQuery(new Term("foo", "a"));
  final Query optQ = new TermQuery(new Term("foo", "b"));
  final Query boolQ = new BooleanQuery.Builder()
      .add(reqQ, Occur.MUST)
      .add(optQ, Occur.SHOULD)
      .build();
  Scorer actual = reqOptScorer(searcher, reqQ, optQ, true);
  Scorer expected = searcher
      .createWeight(boolQ, ScoreMode.COMPLETE, 1)
      .scorer(searcher.getIndexReader().leaves().get(0));
  actual.setMinCompetitiveScore(Math.nextUp(1));
  // Checks that all blocks are fully visited
  for (int i = 0; i < 1024; i++) {
    assertEquals(i, actual.iterator().nextDoc());
    assertEquals(i, expected.iterator().nextDoc());
    assertEquals(actual.score(),expected.score(), 0);
  }
  reader.close();
  dir.close();
}
 
Example 17
Source File: TestExternalCodecs.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testPerFieldCodec() throws Exception {
  
  final int NUM_DOCS = atLeast(173);
  if (VERBOSE) {
    System.out.println("TEST: NUM_DOCS=" + NUM_DOCS);
  }

  BaseDirectoryWrapper dir = newDirectory();
  dir.setCheckIndexOnClose(false); // we use a custom codec provider
  IndexWriter w = new IndexWriter(
      dir,
      newIndexWriterConfig(new MockAnalyzer(random())).
      setCodec(new CustomPerFieldCodec()).
          setMergePolicy(newLogMergePolicy(3))
  );
  Document doc = new Document();
  // uses default codec:
  doc.add(newTextField("field1", "this field uses the standard codec as the test", Field.Store.NO));
  // uses memory codec:
  Field field2 = newTextField("field2", "this field uses the memory codec as the test", Field.Store.NO);
  doc.add(field2);
  
  Field idField = newStringField("id", "", Field.Store.NO);

  doc.add(idField);
  for(int i=0;i<NUM_DOCS;i++) {
    idField.setStringValue(""+i);
    w.addDocument(doc);
    if ((i+1)%10 == 0) {
      w.commit();
    }
  }
  if (VERBOSE) {
    System.out.println("TEST: now delete id=77");
  }
  w.deleteDocuments(new Term("id", "77"));

  IndexReader r = DirectoryReader.open(w);
  
  assertEquals(NUM_DOCS-1, r.numDocs());
  IndexSearcher s = newSearcher(r);
  assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field1", "standard"))));
  assertEquals(NUM_DOCS-1, s.count(new TermQuery(new Term("field2", "memory"))));
  r.close();

  if (VERBOSE) {
    System.out.println("\nTEST: now delete 2nd doc");
  }
  w.deleteDocuments(new Term("id", "44"));

  if (VERBOSE) {
    System.out.println("\nTEST: now force merge");
  }
  w.forceMerge(1);
  if (VERBOSE) {
    System.out.println("\nTEST: now open reader");
  }
  r = DirectoryReader.open(w);
  assertEquals(NUM_DOCS-2, r.maxDoc());
  assertEquals(NUM_DOCS-2, r.numDocs());
  s = newSearcher(r);
  assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field1", "standard"))));
  assertEquals(NUM_DOCS-2, s.count(new TermQuery(new Term("field2", "memory"))));
  assertEquals(1, s.count(new TermQuery(new Term("id", "76"))));
  assertEquals(0, s.count(new TermQuery(new Term("id", "77"))));
  assertEquals(0, s.count(new TermQuery(new Term("id", "44"))));

  if (VERBOSE) {
    System.out.println("\nTEST: now close NRT reader");
  }
  r.close();

  w.close();

  dir.close();
}
 
Example 18
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
  
  File tmpDir = createTempDir().toFile();
  File indexDir = new File(tmpDir, "spellingIdx");
  //create a standalone index
  File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir.toPath());
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(new WhitespaceAnalyzer())
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("flesh");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("flesh is null and it shouldn't be", suggestions != null);
    assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
    assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("Caroline");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}
 
Example 19
Source File: TestReqOptSumScorer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMaxScoreSegment() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
  for (String[] values : Arrays.asList(
      new String[]{ "A" },            // 0
      new String[]{ "A" },            // 1
      new String[]{ },                // 2
      new String[]{ "A", "B" },       // 3
      new String[]{ "A" },            // 4
      new String[]{ "B" },            // 5
      new String[]{ "A", "B" },       // 6
      new String[]{ "B" }             // 7
  )) {
    Document doc = new Document();
    for (String value : values) {
      doc.add(new StringField("foo", value, Store.NO));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  w.close();

  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  final Query reqQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "A")));
  final Query optQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
  Scorer scorer = reqOptScorer(searcher, reqQ, optQ, false);
  assertEquals(0, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(1, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(4, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, false);
  scorer.setMinCompetitiveScore(Math.nextDown(1f));
  assertEquals(0, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(1, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(4, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, false);
  scorer.setMinCompetitiveScore(Math.nextUp(1f));
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, true);
  scorer.setMinCompetitiveScore(Math.nextUp(2f));
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  reader.close();
  dir.close();
}
 
Example 20
Source File: TestIndexOrDocValuesQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testUseIndexForSelectiveQueries() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
      // relies on costs and PointValues.estimateCost so we need the default codec
      .setCodec(TestUtil.getDefaultCodec()));
  for (int i = 0; i < 2000; ++i) {
    Document doc = new Document();
    if (i == 42) {
      doc.add(new StringField("f1", "bar", Store.NO));
      doc.add(new LongPoint("f2", 42L));
      doc.add(new NumericDocValuesField("f2", 42L));
    } else if (i == 100) {
      doc.add(new StringField("f1", "foo", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new NumericDocValuesField("f2", 2L));
    } else {
      doc.add(new StringField("f1", "bar", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new NumericDocValuesField("f2", 2L));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader reader = DirectoryReader.open(w);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setQueryCache(null);

  // The term query is more selective, so the IndexOrDocValuesQuery should use doc values
  final Query q1 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), NumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
      .build();

  final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
  final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
  assertNotNull(s1.twoPhaseIterator()); // means we use doc values

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), NumericDocValuesField.newSlowRangeQuery("f2", 42L, 42L)), Occur.MUST)
      .build();

  final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
  final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s2.twoPhaseIterator()); // means we use points

  reader.close();
  w.close();
  dir.close();
}