Java Code Examples for org.apache.lucene.index.IndexWriter#addDocument()
The following examples show how to use
org.apache.lucene.index.IndexWriter#addDocument() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestSearcherManager.java From lucene-solr with Apache License 2.0 | 6 votes |
public void testListenerCalled() throws Exception { Directory dir = newDirectory(); IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null)); final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false); SearcherManager sm = new SearcherManager(iw, false, false, new SearcherFactory()); sm.addListener(new ReferenceManager.RefreshListener() { @Override public void beforeRefresh() { } @Override public void afterRefresh(boolean didRefresh) { if (didRefresh) { afterRefreshCalled.set(true); } } }); iw.addDocument(new Document()); iw.commit(); assertFalse(afterRefreshCalled.get()); sm.maybeRefreshBlocking(); assertTrue(afterRefreshCalled.get()); sm.close(); iw.close(); dir.close(); }
Example 2
Source File: TatoebaParser.java From aedict with GNU General Public License v3.0 | 6 votes |
private void writeLucene(IndexWriter writer) throws IOException { final Set<String> languages = new HashSet<String>(); int sc = 0; for (final Entry<Integer, Sentences> e : sentences.entrySet()) { if (e.getValue().bLine == null) { System.out.println("Missing B-Line for sentence " + e.getKey() + ", skipping"); } else { languages.addAll(e.getValue().sentences.keySet()); final Document doc = new Document(); doc.add(new Field("japanese", e.getValue().japanese, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("translations", e.getValue().getSentences(), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("jp-deinflected", e.getValue().bLine.dictionaryFormWordList, Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("kana", CompressionTools.compressString(e.getValue().bLine.kana), Field.Store.YES)); writer.addDocument(doc); sc++; } } System.out.println("Lucene indexed " + sc + " example sentences"); System.out.println("Tatoeba contains sentences in the following languages: " + languages); }
Example 3
Source File: QueryAutoStopWordAnalyzerTest.java From lucene-solr with Apache License 2.0 | 6 votes |
@Override public void setUp() throws Exception { super.setUp(); dir = new ByteBuffersDirectory(); appAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(appAnalyzer)); int numDocs = 200; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); String variedFieldValue = variedFieldValues[i % variedFieldValues.length]; String repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.length]; doc.add(new TextField("variedField", variedFieldValue, Field.Store.YES)); doc.add(new TextField("repetitiveField", repetitiveFieldValue, Field.Store.YES)); writer.addDocument(doc); } writer.close(); reader = DirectoryReader.open(dir); }
Example 4
Source File: TableShardCountCollapserTest.java From incubator-retired-blur with Apache License 2.0 | 6 votes |
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount) throws IOException { HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy(); mergePolicy.setUseCompoundFile(false); IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf); Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>(); int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount); assertEquals(i, partition); Document doc = getDoc(i); indexWriter.addDocument(doc); indexWriter.close(); }
Example 5
Source File: HighlighterPhraseTest.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSparseSpan() throws IOException, InvalidTokenOffsetsException { final String TEXT = "the fox did not jump"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false))); try { final Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectorOffsets(true); customType.setStoreTermVectorPositions(true); customType.setStoreTermVectors(true); document.add(new Field(FIELD, new TokenStreamSparse(), customType)); indexWriter.addDocument(document); } finally { indexWriter.close(); } final IndexReader indexReader = DirectoryReader.open(directory); try { assertEquals(1, indexReader.numDocs()); final IndexSearcher indexSearcher = newSearcher(indexReader); final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { new SpanTermQuery(new Term(FIELD, "did")), new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true); TopDocs hits = indexSearcher.search(phraseQuery, 1); assertEquals(0, hits.totalHits.value); final Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(phraseQuery)); final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1); assertEquals( highlighter.getBestFragment(new TokenStreamSparse(), TEXT), highlighter.getBestFragment(tokenStream, TEXT)); } finally { indexReader.close(); directory.close(); } }
Example 6
Source File: TestSuggestField.java From lucene-solr with Apache License 2.0 | 5 votes |
@Test public void testNRTDeletedDocFiltering() throws Exception { Analyzer analyzer = new MockAnalyzer(random()); // using IndexWriter instead of RandomIndexWriter IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field")); int num = Math.min(1000, atLeast(10)); int numLive = 0; List<Entry> expectedEntries = new ArrayList<>(); for (int i = 0; i < num; i++) { Document document = new Document(); document.add(new SuggestField("suggest_field", "abc_" + i, num - i)); if (i % 2 == 0) { document.add(newStringField("str_field", "delete", Field.Store.YES)); } else { numLive++; expectedEntries.add(new Entry("abc_" + i, num - i)); document.add(newStringField("str_field", "no_delete", Field.Store.YES)); } iw.addDocument(document); if (usually()) { iw.commit(); } } iw.deleteDocuments(new Term("str_field", "delete")); DirectoryReader reader = DirectoryReader.open(iw); SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader); PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_")); TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false); assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()])); reader.close(); iw.close(); }
Example 7
Source File: PrimeDocOverFlowHelper.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private static Directory getDirectoryUpdateRow(String currentRowId) { try { RAMDirectory directoryUpdateRow = new RAMDirectory(); IndexWriter writer = new IndexWriter(directoryUpdateRow, new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer())); Document document = new Document(); document.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO)); document.add(new StringField(BlurConstants.UPDATE_ROW, currentRowId, Store.NO)); writer.addDocument(document); writer.close(); return directoryUpdateRow; } catch (Exception e) { throw new RuntimeException(e); } }
Example 8
Source File: IpColumnReferenceTest.java From crate with Apache License 2.0 | 5 votes |
@Override protected void insertValues(IndexWriter writer) throws Exception { addIPv4Values(writer); addIPv6Values(writer); // Doc without IP_COLUMN field to simulate NULL value Document doc = new Document(); doc.add(new StringField("_id", Integer.toString(20), Field.Store.NO)); writer.addDocument(doc); }
Example 9
Source File: TestPhraseQuery.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testTopPhrases() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); String[] docs = ArrayUtil.copyOfSubArray(DOCS, 0, DOCS.length); Collections.shuffle(Arrays.asList(docs), random()); for (String value : DOCS) { Document doc = new Document(); doc.add(new TextField("f", value, Store.NO)); w.addDocument(doc); } IndexReader r = DirectoryReader.open(w); w.close(); IndexSearcher searcher = newSearcher(r); for (Query query : Arrays.asList( new PhraseQuery("f", "b", "c"), // common phrase new PhraseQuery("f", "e", "f"), // always appear next to each other new PhraseQuery("f", "d", "d") // repeated term )) { for (int topN = 1; topN <= 2; ++topN) { TopScoreDocCollector collector1 = TopScoreDocCollector.create(topN, null, Integer.MAX_VALUE); searcher.search(query, collector1); ScoreDoc[] hits1 = collector1.topDocs().scoreDocs; TopScoreDocCollector collector2 = TopScoreDocCollector.create(topN, null, 1); searcher.search(query, collector2); ScoreDoc[] hits2 = collector2.topDocs().scoreDocs; assertTrue("" + query, hits1.length > 0); CheckHits.checkEqual(query, hits1, hits2); } } r.close(); dir.close(); }
Example 10
Source File: InternalEngine.java From crate with Apache License 2.0 | 5 votes |
private void addDocs(final List<ParseContext.Document> docs, final IndexWriter indexWriter) throws IOException { if (docs.size() > 1) { indexWriter.addDocuments(docs); } else { indexWriter.addDocument(docs.get(0)); } numDocAppends.inc(docs.size()); }
Example 11
Source File: SecureAtomicReaderTestBase.java From incubator-retired-blur with Apache License 2.0 | 5 votes |
private void addDoc(IndexWriter writer, AccessControlWriter accessControlWriter, String read, String discover, int doc, String... readMaskFields) throws IOException { Iterable<? extends IndexableField> fields = getDoc(doc); fields = accessControlWriter.addReadVisiblity(read, fields); fields = accessControlWriter.addDiscoverVisiblity(discover, fields); if (readMaskFields != null) { for (String readMaskField : readMaskFields) { fields = accessControlWriter.addReadMask(readMaskField, fields); } } writer.addDocument(accessControlWriter.lastStepBeforeIndexing(fields)); }
Example 12
Source File: TokenSourcesTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testOverlapWithOffset() throws IOException, InvalidTokenOffsetsException { final String TEXT = "the fox did not jump"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null)); try { final Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); // no positions! customType.setStoreTermVectorOffsets(true); document.add(new Field(FIELD, new OverlappingTokenStream(), customType)); indexWriter.addDocument(document); } finally { indexWriter.close(); } final IndexReader indexReader = DirectoryReader.open(directory); assertEquals(1, indexReader.numDocs()); final IndexSearcher indexSearcher = newSearcher(indexReader); try { final DisjunctionMaxQuery query = new DisjunctionMaxQuery( Arrays.asList( new SpanTermQuery(new Term(FIELD, "{fox}")), new SpanTermQuery(new Term(FIELD, "fox"))), 1); // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { // new SpanTermQuery(new Term(FIELD, "{fox}")), // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true); TopDocs hits = indexSearcher.search(query, 1); assertEquals(1, hits.totalHits.value); final Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query)); final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1); assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT)); } finally { indexReader.close(); directory.close(); } }
Example 13
Source File: Blur024CodecTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
@Test public void testSmallDocs() throws IOException { RAMDirectory directory = new RAMDirectory(); IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf1.setCodec(new Blur024Codec()); Random random1 = new Random(1); IndexWriter writer1 = new IndexWriter(directory, conf1); for (int i = 0; i < 1000; i++) { writer1.addDocument(getSmallDoc(random1)); } writer1.close(); DirectoryReader reader1 = DirectoryReader.open(directory); int numDocs1 = reader1.numDocs(); assertEquals(1000, numDocs1); // for (int i = 0; i < numDocs1; i++) { // System.out.println(reader1.document(i)); // } IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43)); conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION)); Random random2 = new Random(1); IndexWriter writer2 = new IndexWriter(directory, conf2); for (int i = 0; i < 1000; i++) { writer2.addDocument(getSmallDoc(random2)); } writer2.close(); DirectoryReader reader2 = DirectoryReader.open(directory); int numDocs2 = reader2.numDocs(); assertEquals(2000, numDocs2); for (int i = 0; i < 2; i++) { long t1 = System.nanoTime(); long hash1 = 0; long hash2 = 0; for (int d = 0; d < 1000; d++) { Document document1 = reader1.document(d); hash1 += document1.hashCode(); } long t2 = System.nanoTime(); for (int d = 0; d < 1000; d++) { Document document2 = reader2.document(d + 1000); hash2 += document2.hashCode(); } long t3 = System.nanoTime(); System.out.println((t3 - t2) / 1000000.0); System.out.println((t2 - t1) / 1000000.0); System.out.println("doc1 " + hash1); System.out.println("doc2 " + hash2); } // for (int i = 0; i < numDocs2; i++) { // System.out.println(reader2.document(i)); // } // long fileLength = directory.fileLength("_0.fdt"); for (String name : directory.listAll()) { if (name.endsWith(".fdt")) { System.out.println(name); System.out.println(directory.fileLength(name)); } } }
Example 14
Source File: TestMergeSchedulerExternal.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testSubclassConcurrentMergeScheduler() throws IOException { MockDirectoryWrapper dir = newMockDirectory(); dir.failOn(new FailOnlyOnMerge()); Document doc = new Document(); Field idField = newStringField("id", "", Field.Store.YES); doc.add(idField); IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())) .setMergeScheduler(new MyMergeScheduler()) .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH) .setMergePolicy(newLogMergePolicy()); ByteArrayOutputStream baos = new ByteArrayOutputStream(); infoStream = new PrintStreamInfoStream(new PrintStream(baos, true, IOUtils.UTF_8)); iwc.setInfoStream(infoStream); IndexWriter writer = new IndexWriter(dir, iwc); LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy(); logMP.setMergeFactor(10); for(int i=0;i<20;i++) { writer.addDocument(doc); } try { ((MyMergeScheduler) writer.getConfig().getMergeScheduler()).sync(); } catch (IllegalStateException ise) { // OK } writer.rollback(); try { assertTrue(mergeThreadCreated); assertTrue(mergeCalled); assertTrue(excCalled); } catch (AssertionError ae) { System.out.println("TEST FAILED; IW infoStream output:"); System.out.println(baos.toString(IOUtils.UTF_8)); throw ae; } dir.close(); }
Example 15
Source File: FastVectorHighlighterTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMultiValuedSortByScore() throws IOException { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer( random() ) ) ); Document doc = new Document(); FieldType type = new FieldType( TextField.TYPE_STORED ); type.setStoreTermVectorOffsets( true ); type.setStoreTermVectorPositions( true ); type.setStoreTermVectors( true ); type.freeze(); doc.add( new Field( "field", "zero if naught", type ) ); // The first two fields contain the best match doc.add( new Field( "field", "hero of legend", type ) ); // but total a lower score (3) than the bottom doc.add( new Field( "field", "naught of hero", type ) ); // two fields (4) doc.add( new Field( "field", "naught of hero", type ) ); writer.addDocument(doc); FastVectorHighlighter highlighter = new FastVectorHighlighter(); ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder(); fragmentsBuilder.setDiscreteMultiValueHighlighting( true ); IndexReader reader = DirectoryReader.open(writer); String[] preTags = new String[] { "<b>" }; String[] postTags = new String[] { "</b>" }; Encoder encoder = new DefaultEncoder(); int docId = 0; BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add( clause( "field", "hero" ), Occur.SHOULD); query.add( clause( "field", "of" ), Occur.SHOULD); query.add( clause( "field", "legend" ), Occur.SHOULD); FieldQuery fieldQuery = highlighter.getFieldQuery( query.build(), reader ); for ( FragListBuilder fragListBuilder : new FragListBuilder[] { new SimpleFragListBuilder(), new WeightedFragListBuilder() } ) { String[] bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 20, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 28, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]); bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 30000, 1, fragListBuilder, fragmentsBuilder, preTags, postTags, encoder ); assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]); } reader.close(); writer.close(); dir.close(); }
Example 16
Source File: DefaultIndexManager.java From onedev with MIT License | 4 votes |
private void indexBlob(IndexWriter writer, Repository repository, SymbolExtractor<Symbol> extractor, ObjectId blobId, String blobPath) throws IOException { Document document = new Document(); document.add(new StoredField(BLOB_INDEX_VERSION.name(), getIndexVersion(extractor))); document.add(new StringField(BLOB_HASH.name(), blobId.name(), Store.NO)); document.add(new StringField(BLOB_PATH.name(), blobPath, Store.NO)); document.add(new BinaryDocValuesField(BLOB_PATH.name(), new BytesRef(blobPath.getBytes(StandardCharsets.UTF_8)))); String blobName = blobPath; if (blobPath.indexOf('/') != -1) blobName = StringUtils.substringAfterLast(blobPath, "/"); document.add(new StringField(BLOB_NAME.name(), blobName.toLowerCase(), Store.NO)); ObjectLoader objectLoader = repository.open(blobId); if (objectLoader.getSize() <= MAX_INDEXABLE_SIZE) { byte[] bytes = objectLoader.getCachedBytes(); String content = ContentDetector.convertToText(bytes, blobName); if (content != null) { document.add(new TextField(BLOB_TEXT.name(), content, Store.NO)); if (extractor != null) { List<Symbol> symbols = null; try { symbols = extractor.extract(blobName, StringUtils.removeBOM(content)); } catch (Exception e) { logger.trace("Can not extract symbols from blob (hash:" + blobId.name() + ", path:" + blobPath + ")", e); } if (symbols != null) { for (Symbol symbol: symbols) { String fieldValue = symbol.getName(); if (fieldValue != null && symbol.isSearchable()) { fieldValue = fieldValue.toLowerCase(); String fieldName; if (symbol.isPrimary()) fieldName = BLOB_PRIMARY_SYMBOLS.name(); else fieldName = BLOB_SECONDARY_SYMBOLS.name(); document.add(new StringField(fieldName, fieldValue, Store.NO)); } } byte[] bytesOfSymbols = SerializationUtils.serialize((Serializable) symbols); document.add(new StoredField(BLOB_SYMBOL_LIST.name(), bytesOfSymbols)); } } } else { logger.debug("Ignore content of binary file '{}'.", blobPath); } } else { logger.debug("Ignore content of large file '{}'.", blobPath); } writer.addDocument(document); }
Example 17
Source File: TestReqOptSumScorer.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testMaxScoreSegment() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy())); for (String[] values : Arrays.asList( new String[]{ "A" }, // 0 new String[]{ "A" }, // 1 new String[]{ }, // 2 new String[]{ "A", "B" }, // 3 new String[]{ "A" }, // 4 new String[]{ "B" }, // 5 new String[]{ "A", "B" }, // 6 new String[]{ "B" } // 7 )) { Document doc = new Document(); for (String value : values) { doc.add(new StringField("foo", value, Store.NO)); } w.addDocument(doc); } w.forceMerge(1); w.close(); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = newSearcher(reader); final Query reqQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "A"))); final Query optQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "B"))); Scorer scorer = reqOptScorer(searcher, reqQ, optQ, false); assertEquals(0, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(1, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(3, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(4, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(6, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); scorer = reqOptScorer(searcher, reqQ, optQ, false); scorer.setMinCompetitiveScore(Math.nextDown(1f)); assertEquals(0, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(1, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(3, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(4, scorer.iterator().nextDoc()); assertEquals(1, scorer.score(), 0); assertEquals(6, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); scorer = reqOptScorer(searcher, reqQ, optQ, false); scorer.setMinCompetitiveScore(Math.nextUp(1f)); assertEquals(3, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(6, scorer.iterator().nextDoc()); assertEquals(2, scorer.score(), 0); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); scorer = reqOptScorer(searcher, reqQ, optQ, true); scorer.setMinCompetitiveScore(Math.nextUp(2f)); assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc()); reader.close(); dir.close(); }
Example 18
Source File: TestIndexOrDocValuesQuery.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testUseIndexForSelectiveMultiValueQueries() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig() // relies on costs and PointValues.estimateCost so we need the default codec .setCodec(TestUtil.getDefaultCodec())); for (int i = 0; i < 2000; ++i) { Document doc = new Document(); if (i < 1000) { doc.add(new StringField("f1", "bar", Store.NO)); for (int j =0; j < 500; j++) { doc.add(new LongPoint("f2", 42L)); doc.add(new SortedNumericDocValuesField("f2", 42L)); } } else if (i == 1001) { doc.add(new StringField("f1", "foo", Store.NO)); doc.add(new LongPoint("f2", 2L)); doc.add(new SortedNumericDocValuesField("f2", 42L)); } else { doc.add(new StringField("f1", "bar", Store.NO)); for (int j =0; j < 100; j++) { doc.add(new LongPoint("f2", 2L)); doc.add(new SortedNumericDocValuesField("f2", 2L)); } } w.addDocument(doc); } w.forceMerge(1); IndexReader reader = DirectoryReader.open(w); IndexSearcher searcher = newSearcher(reader); searcher.setQueryCache(null); // The term query is less selective, so the IndexOrDocValuesQuery should use points final Query q1 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "bar")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), SortedNumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST) .build(); final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1); final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0)); assertNull(s1.twoPhaseIterator()); // means we use points // The term query is less selective, so the IndexOrDocValuesQuery should use points final Query q2 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "bar")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST) .build(); final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1); final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0)); assertNull(s2.twoPhaseIterator()); // means we use points // The term query is more selective, so the IndexOrDocValuesQuery should use doc values final Query q3 = new BooleanQuery.Builder() .add(new TermQuery(new Term("f1", "foo")), Occur.MUST) .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST) .build(); final Weight w3 = searcher.createWeight(searcher.rewrite(q3), ScoreMode.COMPLETE, 1); final Scorer s3 = w3.scorer(searcher.getIndexReader().leaves().get(0)); assertNotNull(s3.twoPhaseIterator()); // means we use doc values reader.close(); w.close(); dir.close(); }
Example 19
Source File: TokenSourcesTest.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testOverlapWithPositionsAndOffset() throws IOException, InvalidTokenOffsetsException { final String TEXT = "the fox did not jump"; final Directory directory = newDirectory(); final IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig(null)); try { final Document document = new Document(); FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); customType.setStoreTermVectors(true); customType.setStoreTermVectorOffsets(true); customType.setStoreTermVectorPositions(true); document.add(new Field(FIELD, new OverlappingTokenStream(), customType)); indexWriter.addDocument(document); } finally { indexWriter.close(); } final IndexReader indexReader = DirectoryReader.open(directory); try { assertEquals(1, indexReader.numDocs()); final IndexSearcher indexSearcher = newSearcher(indexReader); final DisjunctionMaxQuery query = new DisjunctionMaxQuery( Arrays.asList( new SpanTermQuery(new Term(FIELD, "{fox}")), new SpanTermQuery(new Term(FIELD, "fox"))), 1); // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] { // new SpanTermQuery(new Term(FIELD, "{fox}")), // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true); TopDocs hits = indexSearcher.search(query, 1); assertEquals(1, hits.totalHits.value); final Highlighter highlighter = new Highlighter( new SimpleHTMLFormatter(), new SimpleHTMLEncoder(), new QueryScorer(query)); final TokenStream tokenStream = TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1); assertEquals("<B>the fox</B> did not jump", highlighter.getBestFragment(tokenStream, TEXT)); } finally { indexReader.close(); directory.close(); } }
Example 20
Source File: IndexBasedSpellCheckerTest.java From lucene-solr with Apache License 2.0 | 4 votes |
@Test @SuppressWarnings({"unchecked"}) public void testAlternateLocation() throws Exception { String[] ALT_DOCS = new String[]{ "jumpin jack flash", "Sargent Peppers Lonely Hearts Club Band", "Born to Run", "Thunder Road", "Londons Burning", "A Horse with No Name", "Sweet Caroline" }; IndexBasedSpellChecker checker = new IndexBasedSpellChecker(); @SuppressWarnings({"rawtypes"}) NamedList spellchecker = new NamedList(); spellchecker.add("classname", IndexBasedSpellChecker.class.getName()); File tmpDir = createTempDir().toFile(); File indexDir = new File(tmpDir, "spellingIdx"); //create a standalone index File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime()); Directory dir = newFSDirectory(altIndexDir.toPath()); IndexWriter iw = new IndexWriter( dir, new IndexWriterConfig(new WhitespaceAnalyzer()) ); for (int i = 0; i < ALT_DOCS.length; i++) { Document doc = new Document(); doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES)); iw.addDocument(doc); } iw.forceMerge(1); iw.close(); dir.close(); indexDir.mkdirs(); spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath()); spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title"); spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker); SolrCore core = h.getCore(); String dictName = checker.init(spellchecker, core); assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME, dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true); h.getCore().withSearcher(searcher -> { checker.build(core, searcher); IndexReader reader = searcher.getIndexReader(); Collection<Token> tokens = queryConverter.convert("flesh"); SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null); SpellingResult result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); //should be lowercased, b/c we are using a lowercasing analyzer Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("flesh is null and it shouldn't be", suggestions != null); assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1); Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next(); assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true); assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1); //test something not in the spell checker spellOpts.tokens = queryConverter.convert("super"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions size should be 0", suggestions.size()==0); spellOpts.tokens = queryConverter.convert("Caroline"); result = checker.getSuggestions(spellOpts); assertTrue("result is null and it shouldn't be", result != null); suggestions = result.get(spellOpts.tokens.iterator().next()); assertTrue("suggestions is not null and it should be", suggestions == null); return null; }); }