Java Code Examples for org.apache.lucene.index.IndexWriterConfig#setMergePolicy()

The following examples show how to use org.apache.lucene.index.IndexWriterConfig#setMergePolicy() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Index.java    From dacapobench with Apache License 2.0 6 votes vote down vote up
/**
 * Index all text files under a directory.
 */
public void main(final File INDEX_DIR, final String[] args) throws IOException {
  IndexWriterConfig IWConfig = new IndexWriterConfig();
  IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
  IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
  IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
  for (int arg = 0; arg < args.length; arg++) {
    final File docDir = new File(args[arg]);
    if (!docDir.exists() || !docDir.canRead()) {
      System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path");
      throw new IOException("Cannot read from document directory");
    }

    indexDocs(writer, docDir);
    System.out.println("Optimizing...");
    writer.forceMerge(1);
  }
  writer.close();
}
 
Example 2
Source File: TestSuggestField.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String> suggestFields) {
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  Codec filterCodec = new Lucene86Codec() {
    CompletionPostingsFormat.FSTLoadMode fstLoadMode =
        RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
    PostingsFormat postingsFormat = new Completion84PostingsFormat(fstLoadMode);

    @Override
    public PostingsFormat getPostingsFormatForField(String field) {
      if (suggestFields.contains(field)) {
        return postingsFormat;
      }
      return super.getPostingsFormatForField(field);
    }
  };
  iwc.setCodec(filterCodec);
  return iwc;
}
 
Example 3
Source File: DocumentValueSourceDictionaryTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testLongValuesSourceEmptyReader() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  // Make sure the index is created?
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();

  assertNull(inputIterator.next());
  assertEquals(inputIterator.weight(), 0);
  assertNull(inputIterator.payload());

  IOUtils.close(ir, analyzer, dir);
}
 
Example 4
Source File: DocumentValueSourceDictionaryTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testValueSourceEmptyReader() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  // Make sure the index is created?
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  writer.commit();
  writer.close();
  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentValueSourceDictionary(ir, FIELD_NAME, LongValuesSource.constant(10), PAYLOAD_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();

  assertNull(inputIterator.next());
  assertEquals(inputIterator.weight(), 0);
  assertNull(inputIterator.payload());

  IOUtils.close(ir, analyzer, dir);
}
 
Example 5
Source File: TestConjunctions.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  analyzer = new MockAnalyzer(random());
  dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig(analyzer);
  config.setMergePolicy(newLogMergePolicy()); // we will use docids to validate
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
  writer.addDocument(doc("lucene", "lucene is a very popular search engine library"));
  writer.addDocument(doc("solr", "solr is a very popular search server and is using lucene"));
  writer.addDocument(doc("nutch", "nutch is an internet search engine with web crawler and is using lucene and hadoop"));
  reader = writer.getReader();
  writer.close();
  searcher = newSearcher(reader);
  searcher.setSimilarity(new TFSimilarity());
}
 
Example 6
Source File: TestValueSources.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
  iwConfig.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
  for (String [] doc : documents) {
    Document document = new Document();
    document.add(new StringField("id", doc[0], Field.Store.NO));
    document.add(new SortedDocValuesField("id", new BytesRef(doc[0])));
    document.add(new NumericDocValuesField("double", Double.doubleToRawLongBits(Double.parseDouble(doc[1]))));
    document.add(new NumericDocValuesField("float", Float.floatToRawIntBits(Float.parseFloat(doc[2]))));
    document.add(new NumericDocValuesField("int", Integer.parseInt(doc[3])));
    document.add(new NumericDocValuesField("long", Long.parseLong(doc[4])));
    document.add(new StringField("string", doc[5], Field.Store.NO));
    document.add(new SortedDocValuesField("string", new BytesRef(doc[5])));
    document.add(new TextField("text", doc[6], Field.Store.NO));
    document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[7]))));
    document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[8]))));
    document.add(new SortedNumericDocValuesField("floatMv", NumericUtils.floatToSortableInt(Float.parseFloat(doc[9]))));
    document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[7]))));
    document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[8]))));
    document.add(new SortedNumericDocValuesField("doubleMv", NumericUtils.doubleToSortableLong(Double.parseDouble(doc[9]))));
    document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[10])));
    document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[11])));
    document.add(new SortedNumericDocValuesField("intMv", Long.parseLong(doc[12])));
    document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[10])));
    document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[11])));
    document.add(new SortedNumericDocValuesField("longMv", Long.parseLong(doc[12])));
    iw.addDocument(document);
  }
  
  reader = iw.getReader();
  searcher = newSearcher(reader);
  iw.close();
}
 
Example 7
Source File: TestLucene80DocValuesFormat.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private IndexWriter createFastIndexWriter(Directory dir, int maxBufferedDocs) throws IOException {
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(maxBufferedDocs);
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  return new IndexWriter(dir, conf);
}
 
Example 8
Source File: DocumentDictionaryTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultiValuedField() throws IOException {
  Directory dir = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwc = newIndexWriterConfig(random(), analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);

  List<Suggestion> suggestions = indexMultiValuedDocuments(atLeast(1000), writer);
  writer.commit();
  writer.close();

  IndexReader ir = DirectoryReader.open(dir);
  Dictionary dictionary = new DocumentDictionary(ir, FIELD_NAME, WEIGHT_FIELD_NAME, PAYLOAD_FIELD_NAME, CONTEXT_FIELD_NAME);
  InputIterator inputIterator = dictionary.getEntryIterator();
  BytesRef f;
  Iterator<Suggestion> suggestionsIter = suggestions.iterator();
  while((f = inputIterator.next())!=null) {
    Suggestion nextSuggestion = suggestionsIter.next();
    assertTrue(f.equals(nextSuggestion.term));
    long weight = nextSuggestion.weight;
    assertEquals(inputIterator.weight(), (weight != -1) ? weight : 0);
    assertEquals(inputIterator.payload(), nextSuggestion.payload);
    assertTrue(inputIterator.contexts().equals(nextSuggestion.contexts));
  }
  assertFalse(suggestionsIter.hasNext());
  IOUtils.close(ir, analyzer, dir);
}
 
Example 9
Source File: OverviewTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Path createIndex() throws IOException {
  Path indexDir = createTempDir();

  Directory dir = newFSDirectory(indexDir);
  IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
  config.setMergePolicy(NoMergePolicy.INSTANCE);  // see LUCENE-8998
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);

  Document doc1 = new Document();
  doc1.add(newStringField("f1", "1", Field.Store.NO));
  doc1.add(newTextField("f2", "a b c d e", Field.Store.NO));
  writer.addDocument(doc1);

  Document doc2 = new Document();
  doc2.add(newStringField("f1", "2", Field.Store.NO));
  doc2.add(new TextField("f2", "a c", Field.Store.NO));
  writer.addDocument(doc2);

  Document doc3 = new Document();
  doc3.add(newStringField("f1", "3", Field.Store.NO));
  doc3.add(newTextField("f2", "a f", Field.Store.NO));
  writer.addDocument(doc3);

  Map<String, String> userData = new HashMap<>();
  userData.put("data", "val");
  writer.w.setLiveCommitData(userData.entrySet());

  writer.commit();

  writer.close();
  dir.close();

  return indexDir;
}
 
Example 10
Source File: TestTaxonomyFacetCounts.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSegmentsWithoutCategoriesOrResults() throws Exception {
  // tests the accumulator when there are segments with no results
  Directory indexDir = newDirectory();
  Directory taxoDir = newDirectory();
  
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setMergePolicy(NoMergePolicy.INSTANCE); // prevent merges
  IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
  FacetsConfig config = new FacetsConfig();
  indexTwoDocs(taxoWriter, indexWriter, config, false); // 1st segment, no content, with categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 2nd segment, with content, no categories
  indexTwoDocs(taxoWriter, indexWriter, config, true);  // 3rd segment ok
  indexTwoDocs(taxoWriter, indexWriter, null, false);        // 4th segment, no content, or categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 5th segment, with content, no categories
  indexTwoDocs(taxoWriter, indexWriter, config, true);  // 6th segment, with content, with categories
  indexTwoDocs(taxoWriter, indexWriter, null, true);         // 7th segment, with content, no categories
  indexWriter.close();
  IOUtils.close(taxoWriter);

  DirectoryReader indexReader = DirectoryReader.open(indexDir);
  TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
  IndexSearcher indexSearcher = newSearcher(indexReader);
  
  // search for "f:a", only segments 1 and 3 should match results
  Query q = new TermQuery(new Term("f", "a"));
  FacetsCollector sfc = new FacetsCollector();
  indexSearcher.search(q, sfc);
  Facets facets = getTaxonomyFacetCounts(taxoReader, config, sfc);
  FacetResult result = facets.getTopChildren(10, "A");
  assertEquals("wrong number of children", 2, result.labelValues.length);
  for (LabelAndValue labelValue : result.labelValues) {
    assertEquals("wrong weight for child " + labelValue.label, 2, labelValue.value.intValue());
  }

  IOUtils.close(indexReader, taxoReader, indexDir, taxoDir);
}
 
Example 11
Source File: RecoverySourcePruneMergePolicyTests.java    From crate with Apache License 2.0 5 votes vote down vote up
public void testPruneNone() throws IOException {
    try (Directory dir = newDirectory()) {
        IndexWriterConfig iwc = newIndexWriterConfig();
        iwc.setMergePolicy(new RecoverySourcePruneMergePolicy("extra_source",
                                                              () -> new MatchAllDocsQuery(), iwc.getMergePolicy()));
        try (IndexWriter writer = new IndexWriter(dir, iwc)) {
            for (int i = 0; i < 20; i++) {
                if (i > 0 && randomBoolean()) {
                    writer.flush();
                }
                Document doc = new Document();
                doc.add(new StoredField("source", "hello world"));
                doc.add(new StoredField("extra_source", "hello world"));
                doc.add(new NumericDocValuesField("extra_source", 1));
                writer.addDocument(doc);
            }
            writer.forceMerge(1);
            writer.commit();
            try (DirectoryReader reader = DirectoryReader.open(writer)) {
                assertEquals(1, reader.leaves().size());
                NumericDocValues extra_source = reader.leaves().get(0).reader().getNumericDocValues("extra_source");
                assertNotNull(extra_source);
                for (int i = 0; i < reader.maxDoc(); i++) {
                    Document document = reader.document(i);
                    Set<String> collect = document.getFields().stream().map(IndexableField::name).collect(Collectors.toSet());
                    assertTrue(collect.contains("source"));
                    assertTrue(collect.contains("extra_source"));
                    assertEquals(i, extra_source.nextDoc());
                }
                assertEquals(DocIdSetIterator.NO_MORE_DOCS, extra_source.nextDoc());
            }
        }
    }
}
 
Example 12
Source File: TestCheckJoinIndex.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInconsistentDeletes() throws IOException {
  final Directory dir = newDirectory();
  final IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setMergePolicy(NoMergePolicy.INSTANCE); // so that deletions don't trigger merges
  final RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);

  List<Document> block = new ArrayList<>();
  final int numChildren = TestUtil.nextInt(random(), 1, 3);
  for (int i = 0; i < numChildren; ++i) {
    Document doc = new Document();
    doc.add(new StringField("child", Integer.toString(i), Store.NO));
    block.add(doc);
  }
  Document parent = new Document();
  parent.add(new StringField("parent", "true", Store.NO));
  block.add(parent);
  w.addDocuments(block);

  if (random().nextBoolean()) {
    w.deleteDocuments(new Term("parent", "true"));
  } else {
    // delete any of the children
    w.deleteDocuments(new Term("child", Integer.toString(random().nextInt(numChildren))));
  }

  final IndexReader reader = w.getReader();
  w.close();

  BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("parent", "true")));
  try {
    expectThrows(IllegalStateException.class, () -> CheckJoinIndex.check(reader, parentsFilter));
  } finally {
    reader.close();
    dir.close();
  }
}
 
Example 13
Source File: TestMultiPhraseEnum.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/** Tests union on one document  */
public void testOneDocument() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig();
  iwc.setMergePolicy(newLogMergePolicy());
  IndexWriter writer = new IndexWriter(dir, iwc);
  
  Document doc = new Document();
  doc.add(new TextField("field", "foo bar", Field.Store.NO));
  writer.addDocument(doc);
  
  DirectoryReader ir = DirectoryReader.open(writer);
  writer.close();

  PostingsEnum p1 = getOnlyLeafReader(ir).postings(new Term("field", "foo"), PostingsEnum.POSITIONS);
  PostingsEnum p2 = getOnlyLeafReader(ir).postings(new Term("field", "bar"), PostingsEnum.POSITIONS);
  PostingsEnum union = new MultiPhraseQuery.UnionPostingsEnum(Arrays.asList(p1, p2));
  
  assertEquals(-1, union.docID());
  
  assertEquals(0, union.nextDoc());
  assertEquals(2, union.freq());
  assertEquals(0, union.nextPosition());
  assertEquals(1, union.nextPosition());
  
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, union.nextDoc());
  
  ir.close();
  dir.close();
}
 
Example 14
Source File: SolrSnapshotManager.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * This method deletes index files of the {@linkplain IndexCommit} for the specified generation number.
 *
 * @param core The Solr core
 * @param dir The index directory storing the snapshot.
 * @throws IOException in case of I/O errors.
 */

@SuppressWarnings({"try", "unused"})
private static void deleteSnapshotIndexFiles(SolrCore core, Directory dir, IndexDeletionPolicy delPolicy) throws IOException {
  IndexWriterConfig conf = core.getSolrConfig().indexConfig.toIndexWriterConfig(core);
  conf.setOpenMode(OpenMode.APPEND);
  conf.setMergePolicy(NoMergePolicy.INSTANCE);//Don't want to merge any commits here!
  conf.setIndexDeletionPolicy(delPolicy);
  conf.setCodec(core.getCodec());
  try (SolrIndexWriter iw = new SolrIndexWriter("SolrSnapshotCleaner", dir, conf)) {
    // Do nothing. The only purpose of opening index writer is to invoke the Lucene IndexDeletionPolicy#onInit
    // method so that we can cleanup the files associated with specified index commit.
    // Note the index writer creates a new commit during the close() operation (which is harmless).
  }
}
 
Example 15
Source File: TestFieldCache.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testIntFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  IntPoint field = new IntPoint("f", 0);
  doc.add(field);
  final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final int v;
    switch (random().nextInt(10)) {
      case 0:
        v = Integer.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Integer.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextInt(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setIntValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.INT_POINT_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, ints.nextDoc());
      assertEquals(values[i], ints.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, ints.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}
 
Example 16
Source File: TestControlledRealTimeReopenThread.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testThreadStarvationNoDeleteNRTReader() throws IOException, InterruptedException {
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMergePolicy(NoMergePolicy.INSTANCE);
  Directory d = newDirectory();
  final CountDownLatch latch = new CountDownLatch(1);
  final CountDownLatch signal = new CountDownLatch(1);

  LatchedIndexWriter writer = new LatchedIndexWriter(d, conf, latch, signal);
  final SearcherManager manager = new SearcherManager(writer, false, false, null);
  Document doc = new Document();
  doc.add(newTextField("test", "test", Field.Store.YES));
  writer.addDocument(doc);
  manager.maybeRefresh();
  Thread t = new Thread() {
    @Override
    public void run() {
      try {
        signal.await();
        manager.maybeRefresh();
        writer.deleteDocuments(new TermQuery(new Term("foo", "barista")));
        manager.maybeRefresh(); // kick off another reopen so we inc. the internal gen
      } catch (Exception e) {
        e.printStackTrace();
      } finally {
        latch.countDown(); // let the add below finish
      }
    }
  };
  t.start();
  writer.waitAfterUpdate = true; // wait in addDocument to let some reopens go through

  final long lastGen = writer.updateDocument(new Term("foo", "bar"), doc); // once this returns the doc is already reflected in the last reopen

  // We now eagerly resolve deletes so the manager should see it after update:
  assertTrue(manager.isSearcherCurrent());
  
  IndexSearcher searcher = manager.acquire();
  try {
    assertEquals(2, searcher.getIndexReader().numDocs());
  } finally {
    manager.release(searcher);
  }
  final ControlledRealTimeReopenThread<IndexSearcher> thread = new ControlledRealTimeReopenThread<>(writer, manager, 0.01, 0.01);
  thread.start(); // start reopening
  if (VERBOSE) {
    System.out.println("waiting now for generation " + lastGen);
  }
  
  final AtomicBoolean finished = new AtomicBoolean(false);
  Thread waiter = new Thread() {
    @Override
    public void run() {
      try {
        thread.waitForGeneration(lastGen);
      } catch (InterruptedException ie) {
        Thread.currentThread().interrupt();
        throw new RuntimeException(ie);
      }
      finished.set(true);
    }
  };
  waiter.start();
  manager.maybeRefresh();
  waiter.join(1000);
  if (!finished.get()) {
    waiter.interrupt();
    fail("thread deadlocked on waitForGeneration");
  }
  thread.close();
  thread.join();
  writer.close();
  IOUtils.close(manager, d);
}
 
Example 17
Source File: TestLucene80DocValuesFormat.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void doTestSortedNumericBlocksOfVariousBitsPerValue(LongSupplier counts) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  conf.setMaxBufferedDocs(atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE));
  conf.setRAMBufferSizeMB(-1);
  conf.setMergePolicy(newLogMergePolicy(random().nextBoolean()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  final int numDocs = atLeast(Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE*3);
  final LongSupplier values = blocksOfVariousBPV();
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    
    int valueCount = (int) counts.getAsLong();
    long valueArray[] = new long[valueCount];
    for (int j = 0; j < valueCount; j++) {
      long value = values.getAsLong();
      valueArray[j] = value;
      doc.add(new SortedNumericDocValuesField("dv", value));
    }
    Arrays.sort(valueArray);
    for (int j = 0; j < valueCount; j++) {
      doc.add(new StoredField("stored", Long.toString(valueArray[j])));
    }
    writer.addDocument(doc);
    if (random().nextInt(31) == 0) {
      writer.commit();
    }
  }
  writer.forceMerge(1);

  writer.close();
  
  // compare
  DirectoryReader ir = DirectoryReader.open(dir);
  TestUtil.checkReader(ir);
  for (LeafReaderContext context : ir.leaves()) {
    LeafReader r = context.reader();
    SortedNumericDocValues docValues = DocValues.getSortedNumeric(r, "dv");
    for (int i = 0; i < r.maxDoc(); i++) {
      if (i > docValues.docID()) {
        docValues.nextDoc();
      }
      String expected[] = r.document(i).getValues("stored");
      if (i < docValues.docID()) {
        assertEquals(0, expected.length);
      } else {
        String actual[] = new String[docValues.docValueCount()];
        for (int j = 0; j < actual.length; j++) {
          actual[j] = Long.toString(docValues.nextValue());
        }
        assertArrayEquals(expected, actual);
      }
    }
  }
  ir.close();
  dir.close();
}
 
Example 18
Source File: TestSearcherTaxonomyManager.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testNRT() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  // Don't allow tiny maxBufferedDocs; it can make this
  // test too slow:
  iwc.setMaxBufferedDocs(Math.max(500, iwc.getMaxBufferedDocs()));

  // MockRandom/AlcololicMergePolicy are too slow:
  TieredMergePolicy tmp = new TieredMergePolicy();
  tmp.setFloorSegmentMB(.001);
  iwc.setMergePolicy(tmp);
  final IndexWriter w = new IndexWriter(dir, iwc);
  final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
  final FacetsConfig config = new FacetsConfig();
  config.setMultiValued("field", true);
  final AtomicBoolean stop = new AtomicBoolean();

  // How many unique facets to index before stopping:
  final int ordLimit = TEST_NIGHTLY ? 100000 : 6000;

  Thread indexer = new IndexerThread(w, config, tw, null, ordLimit, stop);

  final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);

  Thread reopener = new Thread() {
      @Override
      public void run() {
        while(!stop.get()) {
          try {
            // Sleep for up to 20 msec:
            Thread.sleep(random().nextInt(20));

            if (VERBOSE) {
              System.out.println("TEST: reopen");
            }

            mgr.maybeRefresh();

            if (VERBOSE) {
              System.out.println("TEST: reopen done");
            }
          } catch (Exception ioe) {
            throw new RuntimeException(ioe);
          }
        }
      }
    };

  reopener.setName("reopener");
  reopener.start();

  indexer.setName("indexer");
  indexer.start();

  try {
    while (!stop.get()) {
      SearcherAndTaxonomy pair = mgr.acquire();
      try {
        //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
        FacetsCollector sfc = new FacetsCollector();
        pair.searcher.search(new MatchAllDocsQuery(), sfc);
        Facets facets = getTaxonomyFacetCounts(pair.taxonomyReader, config, sfc);
        FacetResult result = facets.getTopChildren(10, "field");
        if (pair.searcher.getIndexReader().numDocs() > 0) { 
          //System.out.println(pair.taxonomyReader.getSize());
          assertTrue(result.childCount > 0);
          assertTrue(result.labelValues.length > 0);
        }

        //if (VERBOSE) {
        //System.out.println("TEST: facets=" + FacetTestUtils.toString(results.get(0)));
        //}
      } finally {
        mgr.release(pair);
      }
    }
  } finally {
    indexer.join();
    reopener.join();
  }

  if (VERBOSE) {
    System.out.println("TEST: now stop");
  }

  w.close();
  IOUtils.close(mgr, tw, taxoDir, dir);
}
 
Example 19
Source File: TestBlockJoin.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testEmptyChildFilter() throws Exception {
  final Directory dir = newDirectory();
  final IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
  config.setMergePolicy(NoMergePolicy.INSTANCE);
  // we don't want to merge - since we rely on certain segment setup
  final IndexWriter w = new IndexWriter(dir, config);

  final List<Document> docs = new ArrayList<>();

  docs.add(makeJob("java", 2007));
  docs.add(makeJob("python", 2010));
  docs.add(makeResume("Lisa", "United Kingdom"));
  w.addDocuments(docs);

  docs.clear();
  docs.add(makeJob("ruby", 2005));
  docs.add(makeJob("java", 2006));
  docs.add(makeResume("Frank", "United States"));
  w.addDocuments(docs);
  w.commit();

  IndexReader r = DirectoryReader.open(w);
  w.close();
  IndexSearcher s = newSearcher(r);
  BitSetProducer parentsFilter = new QueryBitSetProducer(new TermQuery(new Term("docType", "resume")));
  CheckJoinIndex.check(r, parentsFilter);

  BooleanQuery.Builder childQuery = new BooleanQuery.Builder();
  childQuery.add(new BooleanClause(new TermQuery(new Term("skill", "java")), Occur.MUST));
  childQuery.add(new BooleanClause(IntPoint.newRangeQuery("year", 2006, 2011), Occur.MUST));

  ToParentBlockJoinQuery childJoinQuery = new ToParentBlockJoinQuery(childQuery.build(), parentsFilter, ScoreMode.Avg);

  BooleanQuery.Builder fullQuery = new BooleanQuery.Builder();
  fullQuery.add(new BooleanClause(childJoinQuery, Occur.MUST));
  fullQuery.add(new BooleanClause(new MatchAllDocsQuery(), Occur.MUST));
  TopDocs topDocs = s.search(fullQuery.build(), 2);
  assertEquals(2, topDocs.totalHits.value);
  assertEquals(asSet("Lisa", "Frank"),
      asSet(s.doc(topDocs.scoreDocs[0].doc).get("name"), s.doc(topDocs.scoreDocs[1].doc).get("name")));

  ParentChildrenBlockJoinQuery childrenQuery =
      new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[0].doc);
  TopDocs matchingChildren = s.search(childrenQuery, 1);
  assertEquals(1, matchingChildren.totalHits.value);
  assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));

  childrenQuery = new ParentChildrenBlockJoinQuery(parentsFilter, childQuery.build(), topDocs.scoreDocs[1].doc);
  matchingChildren = s.search(childrenQuery, 1);
  assertEquals(1, matchingChildren.totalHits.value);
  assertEquals("java", s.doc(matchingChildren.scoreDocs[0].doc).get("skill"));

  r.close();
  dir.close();
}
 
Example 20
Source File: TestFieldCache.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testLongFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  LongPoint field = new LongPoint("f", 0L);
  StoredField field2 = new StoredField("f", 0L);
  doc.add(field);
  doc.add(field2);
  final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final long v;
    switch (random().nextInt(10)) {
      case 0:
        v = Long.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Long.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextLong(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setLongValue(v);
      field2.setLongValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LONG_POINT_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, longs.nextDoc());
      assertEquals(values[i], longs.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, longs.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}