org.apache.lucene.document.Document Java Examples

The following examples show how to use org.apache.lucene.document.Document. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: MovieLuceneConfigurer.java    From jstarcraft-example with Apache License 2.0 6 votes vote down vote up
@Bean("movieEngine")
LuceneEngine getMovieEngine(List<MovieItem> movieItems) throws Exception {
    LuceneContext context = new LuceneContext(CodecDefinition.instanceOf(MovieItem.class));
    LuceneMetadata codec = new LuceneMetadata(MovieItem.class, context);

    IndexWriterConfig config = new IndexWriterConfig();
    Path path = Paths.get("./lucene/movie");
    File file = path.toFile();
    FileUtils.deleteDirectory(file);
    LuceneEngine searcher = new LuceneEngine(config, path);

    for (MovieItem movie : movieItems) {
        Document document = codec.encodeDocument(movie);
        searcher.createDocument(String.valueOf(movie.getId()), document);
    }

    return searcher;
}
 
Example #2
Source File: TestBlockJoinValidation.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  directory = newDirectory();
  final IndexWriterConfig config = new IndexWriterConfig(new MockAnalyzer(random()));
  final IndexWriter indexWriter = new IndexWriter(directory, config);
  for (int i = 0; i < AMOUNT_OF_SEGMENTS; i++) {
    List<Document> segmentDocs = createDocsForSegment(i);
    indexWriter.addDocuments(segmentDocs);
    indexWriter.commit();
  }
  indexReader = DirectoryReader.open(indexWriter);
  indexWriter.close();
  indexSearcher = new IndexSearcher(indexReader);
  parentsFilter = new QueryBitSetProducer(new WildcardQuery(new Term("parent", "*")));
}
 
Example #3
Source File: TestStressIndexing2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static void indexSerial(Random random, Map<String,Document> docs, Directory dir) throws IOException {
  IndexWriter w = new IndexWriter(dir, LuceneTestCase.newIndexWriterConfig(random, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));

  // index all docs in a single thread
  Iterator<Document> iter = docs.values().iterator();
  while (iter.hasNext()) {
    Document d = iter.next();
    ArrayList<IndexableField> fields = new ArrayList<>();
    fields.addAll(d.getFields());
    // put fields in same order each time
    Collections.sort(fields, fieldNameComparator);
    
    Document d1 = new Document();
    for (int i=0; i<fields.size(); i++) {
      d1.add(fields.get(i));
    }
    w.addDocument(d1);
    // System.out.println("indexing "+d1);
  }
  
  w.close();
}
 
Example #4
Source File: TestLegacyTerms.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testDoubleFieldMinMax() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), dir);
  int numDocs = atLeast(100);
  double minValue = Double.POSITIVE_INFINITY;
  double maxValue = Double.NEGATIVE_INFINITY;
  for(int i=0;i<numDocs;i++ ){
    Document doc = new Document();
    double num = random().nextDouble();
    minValue = Math.min(num, minValue);
    maxValue = Math.max(num, maxValue);
    doc.add(new LegacyDoubleField("field", num, Field.Store.NO));
    w.addDocument(doc);
  }
  
  IndexReader r = w.getReader();

  Terms terms = MultiTerms.getTerms(r, "field");

  assertEquals(minValue, NumericUtils.sortableLongToDouble(LegacyNumericUtils.getMinLong(terms)), 0.0);
  assertEquals(maxValue, NumericUtils.sortableLongToDouble(LegacyNumericUtils.getMaxLong(terms)), 0.0);

  r.close();
  w.close();
  dir.close();
}
 
Example #5
Source File: ParentNodeFacetTreeBuilder.java    From BioSolr with Apache License 2.0 6 votes vote down vote up
private Map<String, Set<String>> findParentIdsForNodes(SolrIndexSearcher searcher, Collection<String> nodeIds) throws IOException {
	Map<String, Set<String>> parentIds = new HashMap<>();
	
	LOGGER.debug("Looking up parents for {} nodes", nodeIds.size());
	Query filter = buildFilterQuery(getNodeField(), nodeIds);
	LOGGER.trace("Filter query: {}", filter);
	
	DocSet docs = searcher.getDocSet(filter);
	
	for (DocIterator it = docs.iterator(); it.hasNext(); ) {
		Document doc = searcher.doc(it.nextDoc(), docFields);
		String nodeId = doc.get(getNodeField());
		
		Set<String> parentIdValues = new HashSet<>(Arrays.asList(doc.getValues(parentField)));
		parentIds.put(nodeId, parentIdValues);
		
		// Record the label, if required
		if (isLabelRequired(nodeId)) {
			recordLabel(nodeId, doc.getValues(getLabelField()));
		}
	}
	
	return parentIds;
}
 
Example #6
Source File: InMemoryIndex.java    From SnowGraph with Apache License 2.0 6 votes vote down vote up
public InMemoryIndex(Map<String,String> id2Text){
    Analyzer analyzer = new EnglishAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    try {
        IndexWriter writer = new IndexWriter(directory, iwc);
        for (String id:id2Text.keySet()) {
            Document doc=new Document();
            doc.add(new StringField("id", id, Field.Store.YES));
            doc.add(new TextField("content", id2Text.get(id), Field.Store.YES));
            writer.addDocument(doc);
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example #7
Source File: TestOmitTf.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** test that when freqs are omitted, that totalTermFreq and sumTotalTermFreq are docFreq, and sumDocFreq */
public void testStats() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir,
      newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS);
  ft.freeze();
  Field f = newField("foo", "bar", ft);
  doc.add(f);
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  assertEquals(ir.docFreq(new Term("foo", new BytesRef("bar"))), ir.totalTermFreq(new Term("foo", new BytesRef("bar"))));
  assertEquals(ir.getSumDocFreq("foo"), ir.getSumTotalTermFreq("foo"));
  ir.close();
  dir.close();
}
 
Example #8
Source File: GroupDocument.java    From olat with Apache License 2.0 6 votes vote down vote up
public static Document createDocument(final SearchResourceContext searchResourceContext, final BusinessGroup businessGroup) {
    final GroupDocument groupDocument = new GroupDocument();

    // Set all know attributes
    groupDocument.setResourceUrl(searchResourceContext.getResourceUrl());
    groupDocument.setLastChange(businessGroup.getLastModified());
    groupDocument.setDocumentType(TYPE);
    groupDocument.setCssIcon(CSSHelper.CSS_CLASS_GROUP);
    groupDocument.setTitle(businessGroup.getName());
    // description is rich text
    groupDocument.setDescription(FilterFactory.unescapeAndFilterHtml(businessGroup.getDescription()));

    if (log.isDebugEnabled()) {
        log.debug(groupDocument.toString());
    }
    return groupDocument.getLuceneDocument();
}
 
Example #9
Source File: BaseXYPointTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** test we can search for a polygon with a hole (that excludes the doc) */
public void testPolygonHoleExcludes() throws Exception {
  assumeTrue("Impl does not support polygons", supportsPolygons());
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);

  // add a doc with a point
  Document document = new Document();
  addPointToDoc("field", document, 18.313694f, -65.227444f);
  writer.addDocument(document);
  
  // search and verify we found our doc
  IndexReader reader = writer.getReader();
  IndexSearcher searcher = newSearcher(reader);
  XYPolygon inner = new XYPolygon(new float[] { 18.2f, 18.2f, 18.4f, 18.4f, 18.2f },
                              new float[] { -65.3f, -65.2f, -65.2f, -65.3f, -65.3f });
  XYPolygon outer = new XYPolygon(new float[] { 18, 18, 19, 19, 18 },
                              new float[] { -66, -65, -65, -66, -66 }, inner);
  assertEquals(0, searcher.count(newPolygonQuery("field", outer)));

  reader.close();
  writer.close();
  dir.close();
}
 
Example #10
Source File: SearchService.java    From subsonic with GNU General Public License v3.0 6 votes vote down vote up
@Override
public Document createDocument(MediaFile mediaFile) {
    Document doc = new Document();
    doc.add(new NumericField(FIELD_ID, Field.Store.YES, false).setIntValue(mediaFile.getId()));
    doc.add(new Field(FIELD_MEDIA_TYPE, mediaFile.getMediaType().name(), Field.Store.NO, Field.Index.ANALYZED_NO_NORMS));

    if (mediaFile.getTitle() != null) {
        doc.add(new Field(FIELD_TITLE, mediaFile.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
    }
    if (mediaFile.getArtist() != null) {
        doc.add(new Field(FIELD_ARTIST, mediaFile.getArtist(), Field.Store.YES, Field.Index.ANALYZED));
    }
    if (mediaFile.getGenre() != null) {
        doc.add(new Field(FIELD_GENRE, normalizeGenre(mediaFile.getGenre()), Field.Store.NO, Field.Index.ANALYZED));
    }
    if (mediaFile.getYear() != null) {
        doc.add(new NumericField(FIELD_YEAR, Field.Store.NO, true).setIntValue(mediaFile.getYear()));
    }
    if (mediaFile.getFolder() != null) {
        doc.add(new Field(FIELD_FOLDER, mediaFile.getFolder(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
    }

    return doc;
}
 
Example #11
Source File: TestFieldCacheSort.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
  Directory indexStore = newDirectory();
  IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0; i<5; i++) {
      Document doc = new Document();
      doc.add(new StringField("string", "a"+i, Field.Store.NO));
      doc.add(new StringField("string", "b"+i, Field.Store.NO));
      writer.addDocument(doc);
  }
  writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
  writer.close();
  Sort sort = new Sort(
      new SortField("string", SortField.Type.STRING),
      SortField.FIELD_DOC);
  IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
                       Collections.singletonMap("string", Type.SORTED));
  IndexSearcher searcher = new IndexSearcher(reader);
  expectThrows(IllegalStateException.class, () -> {
    searcher.search(new MatchAllDocsQuery(), 500, sort);
  });
  reader.close();
  indexStore.close();
}
 
Example #12
Source File: TestPKIndexSplitter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Document createDocument(int n, String indexName, 
    int numFields, NumberFormat format) {
  StringBuilder sb = new StringBuilder();
  Document doc = new Document();
  String id = format.format(n);
  doc.add(newStringField("id", id, Field.Store.YES));
  doc.add(newStringField("indexname", indexName, Field.Store.YES));
  sb.append("a");
  sb.append(n);
  doc.add(newTextField("field1", sb.toString(), Field.Store.YES));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(newTextField("field" + (i + 1), sb.toString(), Field.Store.YES));
  }
  return doc;
}
 
Example #13
Source File: TestTaxonomyFacetCounts.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testReallyNoNormsForDrillDown() throws Exception {
  Directory dir = newDirectory();
  Directory taxoDir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(new PerFieldSimilarityWrapper() {
      final Similarity sim = new ClassicSimilarity();

      @Override
      public Similarity get(String name) {
        assertEquals("field", name);
        return sim;
      }
    });
  TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
  FacetsConfig config = new FacetsConfig();

  Document doc = new Document();
  doc.add(newTextField("field", "text", Field.Store.NO));
  doc.add(new FacetField("a", "path"));
  writer.addDocument(config.build(taxoWriter, doc));
  writer.close();
  IOUtils.close(taxoWriter, dir, taxoDir);
}
 
Example #14
Source File: TestTransactionRollback.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();

  //Build index, of records 1 to 100, committing after each batch of 10
  IndexDeletionPolicy sdp=new KeepAllDeletionPolicy();
  IndexWriter w=new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                        .setIndexDeletionPolicy(sdp));

  for(int currentRecordId=1;currentRecordId<=100;currentRecordId++) {
    Document doc=new Document();
    doc.add(newTextField(FIELD_RECORD_ID, ""+currentRecordId, Field.Store.YES));
    w.addDocument(doc);

    if (currentRecordId%10 == 0) {
      Map<String,String> data = new HashMap<>();
      data.put("index", "records 1-"+currentRecordId);
      w.setLiveCommitData(data.entrySet());
      w.commit();
    }
  }

  w.close();
}
 
Example #15
Source File: TRECAquaintDocumentIndexer.java    From lucene4ir with Apache License 2.0 6 votes vote down vote up
public TRECAquaintDocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
    super(indexPath, tokenFilterFile, positional);

    try {
        whiteList = Whitelist.relaxed();
        whiteList.addTags("docno");
        whiteList.addTags("doc");
        whiteList.addTags("headline");
        whiteList.addTags("text");
        whiteList.addTags("date_time");
        whiteList.addTags("slug");
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }

    doc = new Document();
    initFields();
    initAQUAINTDoc();
}
 
Example #16
Source File: PortfolioCourseNodeIndexer.java    From olat with Apache License 2.0 6 votes vote down vote up
@Override
public void doIndex(final SearchResourceContext searchResourceContext, final ICourse course, final CourseNode courseNode, final OlatFullIndexer indexWriter)
        throws IOException {
    if (!portfolioModule.isEnabled())
        return;
    final PortfolioCourseNode portfolioNode = (PortfolioCourseNode) courseNode;
    final RepositoryEntry repoEntry = portfolioNode.getReferencedRepositoryEntry();
    if (repoEntry != null) {
        final SearchResourceContext courseNodeResourceContext = new SearchResourceContext(searchResourceContext);
        courseNodeResourceContext.setBusinessControlFor(courseNode);
        courseNodeResourceContext.setDocumentType(NODE_TYPE);
        courseNodeResourceContext.setDocumentContext(course.getResourceableId() + " " + courseNode.getIdent());
        final OLATResource ores = repoEntry.getOlatResource();
        final PortfolioStructure element = structureManager.loadPortfolioStructure(ores);
        final Document document = PortfolioMapDocument.createDocument(courseNodeResourceContext, element);
        indexWriter.addDocument(document);
    }
}
 
Example #17
Source File: CodePatternSearcher.java    From SnowGraph with Apache License 2.0 6 votes vote down vote up
private static List<String> search(List<String> contents, String query, int n) throws IOException, ParseException {
    List<String> r=new ArrayList<>();
    Directory dir=new RAMDirectory();
    IndexWriter indexWriter=new IndexWriter(dir, new IndexWriterConfig(new EnglishAnalyzer()));
    for (String method:contents){
        Document document=new Document();
        document.add(new TextField("content",method, Field.Store.YES));
        indexWriter.addDocument(document);
    }
    indexWriter.close();
    QueryParser qp = new QueryParser("content", new EnglishAnalyzer());
    IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir));
    TopDocs topDocs = indexSearcher.search(qp.parse(query), n);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        r.add(indexSearcher.doc(scoreDoc.doc).get("content"));
    }
    return r;
}
 
Example #18
Source File: TestDocValuesIndexing.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testMixedTypesAfterReopenAppend1() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  doc.add(new NumericDocValuesField("foo", 0));
  w.addDocument(doc);
  w.close();

  IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc2 = new Document();
  doc2.add(new SortedDocValuesField("foo", new BytesRef("hello")));
  expectThrows(IllegalArgumentException.class, () -> {
    w2.addDocument(doc2);
  });

  w2.close();
  dir.close();
}
 
Example #19
Source File: TestIDVersionPostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testCannotIndexTermVectors() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(TestUtil.alwaysPostingsFormat(new IDVersionPostingsFormat()));
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc, false);
  Document doc = new Document();

  FieldType ft = new FieldType(StringAndPayloadField.TYPE);
  ft.setStoreTermVectors(true);
  SingleTokenWithPayloadTokenStream ts = new SingleTokenWithPayloadTokenStream();
  BytesRef payload = new BytesRef(8);
  payload.length = 8;
  IDVersionPostingsFormat.longToBytes(17, payload);
  ts.setValue("foo", payload);
  Field field = new Field("id", ts, ft);
  doc.add(new Field("id", ts, ft));
  expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
    w.commit(false);
    fail("didn't hit expected exception");
  });

  w.close();
  dir.close();
}
 
Example #20
Source File: TestMultiDocValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testNumerics() throws Exception {
  Directory dir = newDirectory();
  Document doc = new Document();
  Field field = new NumericDocValuesField("numbers", 0);
  doc.add(field);
  
  IndexWriterConfig iwc = newIndexWriterConfig(random(), null);
  iwc.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);

  int numDocs = TEST_NIGHTLY ? atLeast(500) : atLeast(50);
  for (int i = 0; i < numDocs; i++) {
    field.setLongValue(random().nextLong());
    iw.addDocument(doc);
    if (random().nextInt(17) == 0) {
      iw.commit();
    }
  }
  DirectoryReader ir = iw.getReader();
  iw.forceMerge(1);
  DirectoryReader ir2 = iw.getReader();
  LeafReader merged = getOnlyLeafReader(ir2);
  iw.close();
  
  NumericDocValues multi = MultiDocValues.getNumericValues(ir, "numbers");
  NumericDocValues single = merged.getNumericDocValues("numbers");
  for (int i = 0; i < numDocs; i++) {
    assertEquals(i, multi.nextDoc());
    assertEquals(i, single.nextDoc());
    assertEquals(single.longValue(), multi.longValue());
  }
  testRandomAdvance(merged.getNumericDocValues("numbers"), MultiDocValues.getNumericValues(ir, "numbers"));
  testRandomAdvanceExact(merged.getNumericDocValues("numbers"), MultiDocValues.getNumericValues(ir, "numbers"), merged.maxDoc());

  ir.close();
  ir2.close();
  dir.close();
}
 
Example #21
Source File: LuceneResultSet.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public Document getDocument(int n)
{
    try
    {
        prefetch(n);
        Document doc = hits.doc(n);
        return doc;
    }
    catch (IOException e)
    {
        throw new SearcherException("IO Error reading reading document from the result set", e);
    }
}
 
Example #22
Source File: TestNRTReaderCleanup.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testClosingNRTReaderDoesNotCorruptYourIndex() throws IOException {

    // Windows disallows deleting & overwriting files still
    // open for reading:
    assumeFalse("this test can't run on Windows", Constants.WINDOWS);

    MockDirectoryWrapper dir = newMockDirectory();
    
    IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
    LogMergePolicy lmp = new LogDocMergePolicy();
    lmp.setMergeFactor(2);
    iwc.setMergePolicy(lmp);

    RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    doc.add(new TextField("a", "foo", Field.Store.NO));
    w.addDocument(doc);
    w.commit();
    w.addDocument(doc);

    // Get a new reader, but this also sets off a merge:
    IndexReader r = w.getReader();
    w.close();

    // Blow away index and make a new writer:
    for(String name : dir.listAll()) {
      dir.deleteFile(name);
    }

    w = new RandomIndexWriter(random(), dir);
    w.addDocument(doc);
    w.close();
    r.close();
    dir.close();
  }
 
Example #23
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testInvalidProx() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}
 
Example #24
Source File: DocumentFactory.java    From airsonic with GNU General Public License v3.0 5 votes vote down vote up
/**
 * Create a document.
 *
 * @param mediaFile target of document
 * @return document
 * @since legacy
 */
public Document createSongDocument(MediaFile mediaFile) {
    Document doc = new Document();
    fieldId.accept(doc, mediaFile.getId());
    fieldMediatype.accept(doc, mediaFile.getMediaType().name());
    fieldWords.accept(doc, FieldNames.TITLE, mediaFile.getTitle());
    fieldWords.accept(doc, FieldNames.ARTIST, mediaFile.getArtist());
    fieldGenre.accept(doc, mediaFile.getGenre());
    fieldYear.accept(doc, FieldNames.YEAR, mediaFile.getYear());
    fieldFolderPath.accept(doc, mediaFile.getFolder());
    return doc;
}
 
Example #25
Source File: TestSuggestField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNRTDeletedDocFiltering() throws Exception {
  Analyzer analyzer = new MockAnalyzer(random());
  // using IndexWriter instead of RandomIndexWriter
  IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));

  int num = Math.min(1000, atLeast(10));

  int numLive = 0;
  List<Entry> expectedEntries = new ArrayList<>();
  for (int i = 0; i < num; i++) {
    Document document = new Document();
    document.add(new SuggestField("suggest_field", "abc_" + i, num - i));
    if (i % 2 == 0) {
      document.add(newStringField("str_field", "delete", Field.Store.YES));
    } else {
      numLive++;
      expectedEntries.add(new Entry("abc_" + i, num - i));
      document.add(newStringField("str_field", "no_delete", Field.Store.YES));
    }
    iw.addDocument(document);

    if (usually()) {
      iw.commit();
    }
  }

  iw.deleteDocuments(new Term("str_field", "delete"));

  DirectoryReader reader = DirectoryReader.open(iw);
  SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
  PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
  TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
  assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));

  reader.close();
  iw.close();
}
 
Example #26
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testUniqueTermCount() throws Exception {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  doc.add(newTextField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO));
  doc.add(newTextField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO));
  writer.addDocument(doc);
  writer.addDocument(doc);
  writer.commit();

  DirectoryReader r = DirectoryReader.open(dir);
  LeafReader r1 = getOnlyLeafReader(r);
  assertEquals(26, r1.terms("field").size());
  assertEquals(10, r1.terms("number").size());
  writer.addDocument(doc);
  writer.commit();
  DirectoryReader r2 = DirectoryReader.openIfChanged(r);
  assertNotNull(r2);
  r.close();

  for(LeafReaderContext s : r2.leaves()) {
    assertEquals(26, s.reader().terms("field").size());
    assertEquals(10, s.reader().terms("number").size());
  }
  r2.close();
  writer.close();
  dir.close();
}
 
Example #27
Source File: Monitor.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * Match a DocumentBatch against the queries stored in the Monitor, also returning information
 * about which queries were selected by the presearcher, and why.
 *
 * @param docs    a DocumentBatch to match against the index
 * @param factory a {@link MatcherFactory} to use to create a {@link CandidateMatcher} for the match run
 * @param <T>     the type of QueryMatch produced by the CandidateMatcher
 * @return a {@link PresearcherMatches} object containing debug information
 * @throws IOException on IO errors
 */
public <T extends QueryMatch> PresearcherMatches<T> debug(Document[] docs, MatcherFactory<T> factory)
    throws IOException {
  try (DocumentBatch batch = DocumentBatch.of(analyzer, docs)) {
    LeafReader reader = batch.get();
    IndexSearcher searcher = new IndexSearcher(reader);
    searcher.setQueryCache(null);
    PresearcherQueryCollector<T> collector = new PresearcherQueryCollector<>(factory.createMatcher(searcher));
    long buildTime = queryIndex.search(t -> new ForceNoBulkScoringQuery(presearcher.buildQuery(reader, t)), collector);
    return collector.getMatches(buildTime);
  }
}
 
Example #28
Source File: TestNeedsScores.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  for (int i = 0; i < 5; i++) {
    Document doc = new Document();
    doc.add(new TextField("field", "this is document " + i, Field.Store.NO));
    iw.addDocument(doc);
  }
  reader = iw.getReader();
  searcher = newSearcher(reader);
  iw.close();
}
 
Example #29
Source File: TestIndexSorting.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testBadAddIndexes() throws Exception {
  Directory dir = newDirectory();
  Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
  IndexWriterConfig iwc1 = newIndexWriterConfig();
  iwc1.setIndexSort(indexSort);
  IndexWriter w = new IndexWriter(dir, iwc1);
  w.addDocument(new Document());
  List<Sort> indexSorts = Arrays.asList(null, new Sort(new SortField("bar", SortField.Type.LONG)));
  for (Sort sort : indexSorts) {
    Directory dir2 = newDirectory();
    IndexWriterConfig iwc2 = newIndexWriterConfig();
    if (sort != null) {
      iwc2.setIndexSort(sort);
    }
    IndexWriter w2 = new IndexWriter(dir2, iwc2);
    w2.addDocument(new Document());
    final IndexReader reader = w2.getReader();
    w2.close();
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> w.addIndexes(dir2));
    assertThat(expected.getMessage(), containsString("cannot change index sort"));
    CodecReader[] codecReaders = new CodecReader[reader.leaves().size()];
    for (int i = 0; i < codecReaders.length; ++i) {
      codecReaders[i] = (CodecReader) reader.leaves().get(i).reader();
    }
    expected = expectThrows(IllegalArgumentException.class, () -> w.addIndexes(codecReaders));
    assertThat(expected.getMessage(), containsString("cannot change index sort"));

    reader.close();
    dir2.close();
  }
  w.close();
  dir.close();
}
 
Example #30
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
public String getPath(int n) throws IOException
{
    // return getStringValue(n, "PATH");
    Document d = document(n, new SingleFieldSelector("PATH", true));
    Field f = d.getField("PATH");
    return f == null ? null : f.stringValue();
}