Java Code Examples for org.apache.lucene.index.IndexWriter#addDocument()

The following examples show how to use org.apache.lucene.index.IndexWriter#addDocument() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestSearcherManager.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testListenerCalled() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(null));
  final AtomicBoolean afterRefreshCalled = new AtomicBoolean(false);
  SearcherManager sm = new SearcherManager(iw, false, false, new SearcherFactory());
  sm.addListener(new ReferenceManager.RefreshListener() {
    @Override
    public void beforeRefresh() {
    }
    @Override
    public void afterRefresh(boolean didRefresh) {
      if (didRefresh) {
        afterRefreshCalled.set(true);
      }
    }
  });
  iw.addDocument(new Document());
  iw.commit();
  assertFalse(afterRefreshCalled.get());
  sm.maybeRefreshBlocking();
  assertTrue(afterRefreshCalled.get());
  sm.close();
  iw.close();
  dir.close();
}
 
Example 2
Source File: TatoebaParser.java    From aedict with GNU General Public License v3.0 6 votes vote down vote up
private void writeLucene(IndexWriter writer) throws IOException {
    final Set<String> languages = new HashSet<String>();
    int sc = 0;
    for (final Entry<Integer, Sentences> e : sentences.entrySet()) {
        if (e.getValue().bLine == null) {
            System.out.println("Missing B-Line for sentence " + e.getKey() + ", skipping");
        } else {
            languages.addAll(e.getValue().sentences.keySet());
            final Document doc = new Document();
            doc.add(new Field("japanese", e.getValue().japanese, Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("translations", e.getValue().getSentences(), Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("jp-deinflected", e.getValue().bLine.dictionaryFormWordList, Field.Store.YES, Field.Index.ANALYZED));
            doc.add(new Field("kana", CompressionTools.compressString(e.getValue().bLine.kana), Field.Store.YES));
            writer.addDocument(doc);
            sc++;
        }
    }
    System.out.println("Lucene indexed " + sc + " example sentences");
    System.out.println("Tatoeba contains sentences in the following languages: " + languages);
}
 
Example 3
Source File: QueryAutoStopWordAnalyzerTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  dir = new ByteBuffersDirectory();
  appAnalyzer = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(appAnalyzer));
  int numDocs = 200;
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    String variedFieldValue = variedFieldValues[i % variedFieldValues.length];
    String repetitiveFieldValue = repetitiveFieldValues[i % repetitiveFieldValues.length];
    doc.add(new TextField("variedField", variedFieldValue, Field.Store.YES));
    doc.add(new TextField("repetitiveField", repetitiveFieldValue, Field.Store.YES));
    writer.addDocument(doc);
  }
  writer.close();
  reader = DirectoryReader.open(dir);
}
 
Example 4
Source File: TableShardCountCollapserTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private static void createShard(Configuration configuration, int i, Path path, int totalShardCount)
    throws IOException {
  HdfsDirectory hdfsDirectory = new HdfsDirectory(configuration, path);
  IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
  TieredMergePolicy mergePolicy = (TieredMergePolicy) conf.getMergePolicy();
  mergePolicy.setUseCompoundFile(false);
  IndexWriter indexWriter = new IndexWriter(hdfsDirectory, conf);

  Partitioner<IntWritable, IntWritable> partitioner = new HashPartitioner<IntWritable, IntWritable>();
  int partition = partitioner.getPartition(new IntWritable(i), null, totalShardCount);
  assertEquals(i, partition);

  Document doc = getDoc(i);
  indexWriter.addDocument(doc);
  indexWriter.close();
}
 
Example 5
Source File: HighlighterPhraseTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, new TokenStreamSparse(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        new SpanTermQuery(new Term(FIELD, "did")),
        new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);

    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(0, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals(
        highlighter.getBestFragment(new TokenStreamSparse(), TEXT),
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 6
Source File: TestSuggestField.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
public void testNRTDeletedDocFiltering() throws Exception {
  Analyzer analyzer = new MockAnalyzer(random());
  // using IndexWriter instead of RandomIndexWriter
  IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));

  int num = Math.min(1000, atLeast(10));

  int numLive = 0;
  List<Entry> expectedEntries = new ArrayList<>();
  for (int i = 0; i < num; i++) {
    Document document = new Document();
    document.add(new SuggestField("suggest_field", "abc_" + i, num - i));
    if (i % 2 == 0) {
      document.add(newStringField("str_field", "delete", Field.Store.YES));
    } else {
      numLive++;
      expectedEntries.add(new Entry("abc_" + i, num - i));
      document.add(newStringField("str_field", "no_delete", Field.Store.YES));
    }
    iw.addDocument(document);

    if (usually()) {
      iw.commit();
    }
  }

  iw.deleteDocuments(new Term("str_field", "delete"));

  DirectoryReader reader = DirectoryReader.open(iw);
  SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
  PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
  TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
  assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));

  reader.close();
  iw.close();
}
 
Example 7
Source File: PrimeDocOverFlowHelper.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private static Directory getDirectoryUpdateRow(String currentRowId) {
  try {
    RAMDirectory directoryUpdateRow = new RAMDirectory();
    IndexWriter writer = new IndexWriter(directoryUpdateRow, new IndexWriterConfig(Version.LUCENE_43,
        new KeywordAnalyzer()));
    Document document = new Document();
    document.add(new StringField(BlurConstants.PRIME_DOC, BlurConstants.PRIME_DOC_VALUE, Store.NO));
    document.add(new StringField(BlurConstants.UPDATE_ROW, currentRowId, Store.NO));
    writer.addDocument(document);
    writer.close();
    return directoryUpdateRow;
  } catch (Exception e) {
    throw new RuntimeException(e);
  }
}
 
Example 8
Source File: IpColumnReferenceTest.java    From crate with Apache License 2.0 5 votes vote down vote up
@Override
protected void insertValues(IndexWriter writer) throws Exception {
    addIPv4Values(writer);
    addIPv6Values(writer);

    // Doc without IP_COLUMN field to simulate NULL value
    Document doc = new Document();
    doc.add(new StringField("_id", Integer.toString(20), Field.Store.NO));
    writer.addDocument(doc);
}
 
Example 9
Source File: TestPhraseQuery.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTopPhrases() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
  String[] docs = ArrayUtil.copyOfSubArray(DOCS, 0, DOCS.length);
  Collections.shuffle(Arrays.asList(docs), random());
  for (String value : DOCS) {
    Document doc = new Document();
    doc.add(new TextField("f", value, Store.NO));
    w.addDocument(doc);
  }
  IndexReader r = DirectoryReader.open(w);
  w.close();
  IndexSearcher searcher = newSearcher(r);
  for (Query query : Arrays.asList(
      new PhraseQuery("f", "b", "c"), // common phrase
      new PhraseQuery("f", "e", "f"), // always appear next to each other
      new PhraseQuery("f", "d", "d")  // repeated term
      )) {
    for (int topN = 1; topN <= 2; ++topN) {
      TopScoreDocCollector collector1 = TopScoreDocCollector.create(topN, null, Integer.MAX_VALUE);
      searcher.search(query, collector1);
      ScoreDoc[] hits1 = collector1.topDocs().scoreDocs;
      TopScoreDocCollector collector2 = TopScoreDocCollector.create(topN, null, 1);
      searcher.search(query, collector2);
      ScoreDoc[] hits2 = collector2.topDocs().scoreDocs;
      assertTrue("" + query, hits1.length > 0);
      CheckHits.checkEqual(query, hits1, hits2);
    }
  }
  r.close();
  dir.close();
}
 
Example 10
Source File: InternalEngine.java    From crate with Apache License 2.0 5 votes vote down vote up
private void addDocs(final List<ParseContext.Document> docs, final IndexWriter indexWriter) throws IOException {
    if (docs.size() > 1) {
        indexWriter.addDocuments(docs);
    } else {
        indexWriter.addDocument(docs.get(0));
    }
    numDocAppends.inc(docs.size());
}
 
Example 11
Source File: SecureAtomicReaderTestBase.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
private void addDoc(IndexWriter writer, AccessControlWriter accessControlWriter, String read, String discover,
    int doc, String... readMaskFields) throws IOException {
  Iterable<? extends IndexableField> fields = getDoc(doc);
  fields = accessControlWriter.addReadVisiblity(read, fields);
  fields = accessControlWriter.addDiscoverVisiblity(discover, fields);
  if (readMaskFields != null) {
    for (String readMaskField : readMaskFields) {
      fields = accessControlWriter.addReadMask(readMaskField, fields);
    }
  }
  writer.addDocument(accessControlWriter.lastStepBeforeIndexing(fields));
}
 
Example 12
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testOverlapWithOffset() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    // no positions!
    customType.setStoreTermVectorOffsets(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  assertEquals(1, indexReader.numDocs());
  final IndexSearcher indexSearcher = newSearcher(indexReader);
  try {
    final DisjunctionMaxQuery query = new DisjunctionMaxQuery(
        Arrays.asList(
            new SpanTermQuery(new Term(FIELD, "{fox}")),
            new SpanTermQuery(new Term(FIELD, "fox"))),
        1);
      // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
      // new SpanTermQuery(new Term(FIELD, "{fox}")),
      // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);

    TopDocs hits = indexSearcher.search(query, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(query));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("<B>the fox</B> did not jump",
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 13
Source File: Blur024CodecTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
@Test
public void testSmallDocs() throws IOException {

  RAMDirectory directory = new RAMDirectory();
  IndexWriterConfig conf1 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf1.setCodec(new Blur024Codec());
  Random random1 = new Random(1);
  IndexWriter writer1 = new IndexWriter(directory, conf1);
  for (int i = 0; i < 1000; i++) {
    writer1.addDocument(getSmallDoc(random1));
  }
  writer1.close();

  DirectoryReader reader1 = DirectoryReader.open(directory);
  int numDocs1 = reader1.numDocs();
  assertEquals(1000, numDocs1);

  // for (int i = 0; i < numDocs1; i++) {
  // System.out.println(reader1.document(i));
  // }

  IndexWriterConfig conf2 = new IndexWriterConfig(Version.LUCENE_43, new WhitespaceAnalyzer(Version.LUCENE_43));
  conf2.setCodec(new Blur024Codec(1 << 16, CompressionMode.HIGH_COMPRESSION));
  Random random2 = new Random(1);
  IndexWriter writer2 = new IndexWriter(directory, conf2);
  for (int i = 0; i < 1000; i++) {
    writer2.addDocument(getSmallDoc(random2));
  }
  writer2.close();

  DirectoryReader reader2 = DirectoryReader.open(directory);
  int numDocs2 = reader2.numDocs();
  assertEquals(2000, numDocs2);

  for (int i = 0; i < 2; i++) {

    long t1 = System.nanoTime();
    long hash1 = 0;
    long hash2 = 0;
    for (int d = 0; d < 1000; d++) {
      Document document1 = reader1.document(d);
      hash1 += document1.hashCode();
    }
    long t2 = System.nanoTime();
    for (int d = 0; d < 1000; d++) {
      Document document2 = reader2.document(d + 1000);
      hash2 += document2.hashCode();
    }
    long t3 = System.nanoTime();

    System.out.println((t3 - t2) / 1000000.0);
    System.out.println((t2 - t1) / 1000000.0);

    System.out.println("doc1 " + hash1);
    System.out.println("doc2 " + hash2);
  }

  // for (int i = 0; i < numDocs2; i++) {
  // System.out.println(reader2.document(i));
  // }

  // long fileLength = directory.fileLength("_0.fdt");

  for (String name : directory.listAll()) {
    if (name.endsWith(".fdt")) {
      System.out.println(name);
      System.out.println(directory.fileLength(name));
    }
  }
}
 
Example 14
Source File: TestMergeSchedulerExternal.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testSubclassConcurrentMergeScheduler() throws IOException {
  MockDirectoryWrapper dir = newMockDirectory();
  dir.failOn(new FailOnlyOnMerge());

  Document doc = new Document();
  Field idField = newStringField("id", "", Field.Store.YES);
  doc.add(idField);

  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()))
    .setMergeScheduler(new MyMergeScheduler())
    .setMaxBufferedDocs(2).setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
    .setMergePolicy(newLogMergePolicy());

  ByteArrayOutputStream baos = new ByteArrayOutputStream();
  infoStream = new PrintStreamInfoStream(new PrintStream(baos, true, IOUtils.UTF_8));
  iwc.setInfoStream(infoStream);

  IndexWriter writer = new IndexWriter(dir, iwc);
  LogMergePolicy logMP = (LogMergePolicy) writer.getConfig().getMergePolicy();
  logMP.setMergeFactor(10);
  for(int i=0;i<20;i++) {
    writer.addDocument(doc);
  }

  try {
    ((MyMergeScheduler) writer.getConfig().getMergeScheduler()).sync();
  } catch (IllegalStateException ise) {
    // OK
  }
  writer.rollback();

  try {
    assertTrue(mergeThreadCreated);
    assertTrue(mergeCalled);
    assertTrue(excCalled);
  } catch (AssertionError ae) {
    System.out.println("TEST FAILED; IW infoStream output:");
    System.out.println(baos.toString(IOUtils.UTF_8));
    throw ae;
  }
  dir.close();
}
 
Example 15
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMultiValuedSortByScore() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer( random() ) ) );
  Document doc = new Document();
  FieldType type = new FieldType( TextField.TYPE_STORED );
  type.setStoreTermVectorOffsets( true );
  type.setStoreTermVectorPositions( true );
  type.setStoreTermVectors( true );
  type.freeze();
  doc.add( new Field( "field", "zero if naught", type ) ); // The first two fields contain the best match
  doc.add( new Field( "field", "hero of legend", type ) ); // but total a lower score (3) than the bottom
  doc.add( new Field( "field", "naught of hero", type ) ); // two fields (4)
  doc.add( new Field( "field", "naught of hero", type ) );
  writer.addDocument(doc);

  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();    
  fragmentsBuilder.setDiscreteMultiValueHighlighting( true );
  IndexReader reader = DirectoryReader.open(writer);
  String[] preTags = new String[] { "<b>" };
  String[] postTags = new String[] { "</b>" };
  Encoder encoder = new DefaultEncoder();
  int docId = 0;
  BooleanQuery.Builder query = new BooleanQuery.Builder();
  query.add( clause( "field", "hero" ), Occur.SHOULD);
  query.add( clause( "field", "of" ), Occur.SHOULD);
  query.add( clause( "field", "legend" ), Occur.SHOULD);
  FieldQuery fieldQuery = highlighter.getFieldQuery( query.build(), reader );

  for ( FragListBuilder fragListBuilder : new FragListBuilder[] {
    new SimpleFragListBuilder(), new WeightedFragListBuilder() } ) {
    String[] bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 20, 1,
        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
    assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
    bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 28, 1,
        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
    assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
    bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 30000, 1,
        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
    assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
  }

  reader.close();
  writer.close();
  dir.close();
}
 
Example 16
Source File: DefaultIndexManager.java    From onedev with MIT License 4 votes vote down vote up
private void indexBlob(IndexWriter writer, Repository repository, 
		SymbolExtractor<Symbol> extractor, ObjectId blobId, String blobPath) throws IOException {
	Document document = new Document();
	
	document.add(new StoredField(BLOB_INDEX_VERSION.name(), getIndexVersion(extractor)));
	document.add(new StringField(BLOB_HASH.name(), blobId.name(), Store.NO));
	document.add(new StringField(BLOB_PATH.name(), blobPath, Store.NO));
	document.add(new BinaryDocValuesField(BLOB_PATH.name(), new BytesRef(blobPath.getBytes(StandardCharsets.UTF_8))));
	
	String blobName = blobPath;
	if (blobPath.indexOf('/') != -1) 
		blobName = StringUtils.substringAfterLast(blobPath, "/");
	
	document.add(new StringField(BLOB_NAME.name(), blobName.toLowerCase(), Store.NO));
	
	ObjectLoader objectLoader = repository.open(blobId);
	if (objectLoader.getSize() <= MAX_INDEXABLE_SIZE) {
		byte[] bytes = objectLoader.getCachedBytes();
		String content = ContentDetector.convertToText(bytes, blobName);
		if (content != null) {
			document.add(new TextField(BLOB_TEXT.name(), content, Store.NO));
			
			if (extractor != null) {
				List<Symbol> symbols = null;
				try {
					symbols = extractor.extract(blobName, StringUtils.removeBOM(content));
				} catch (Exception e) {
					logger.trace("Can not extract symbols from blob (hash:" + blobId.name() + ", path:" + blobPath + ")", e);
				}
				if (symbols != null) {
					for (Symbol symbol: symbols) {
						String fieldValue = symbol.getName();
						if (fieldValue != null && symbol.isSearchable()) {
							fieldValue = fieldValue.toLowerCase();

							String fieldName;
							if (symbol.isPrimary())
								fieldName = BLOB_PRIMARY_SYMBOLS.name();
							else
								fieldName = BLOB_SECONDARY_SYMBOLS.name();
							document.add(new StringField(fieldName, fieldValue, Store.NO));
						}
					}
					byte[] bytesOfSymbols = SerializationUtils.serialize((Serializable) symbols);
					document.add(new StoredField(BLOB_SYMBOL_LIST.name(), bytesOfSymbols));
				}
			} 
		} else {
			logger.debug("Ignore content of binary file '{}'.", blobPath);
		}
	} else {
		logger.debug("Ignore content of large file '{}'.", blobPath);
	}

	writer.addDocument(document);
}
 
Example 17
Source File: TestReqOptSumScorer.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMaxScoreSegment() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(newLogMergePolicy()));
  for (String[] values : Arrays.asList(
      new String[]{ "A" },            // 0
      new String[]{ "A" },            // 1
      new String[]{ },                // 2
      new String[]{ "A", "B" },       // 3
      new String[]{ "A" },            // 4
      new String[]{ "B" },            // 5
      new String[]{ "A", "B" },       // 6
      new String[]{ "B" }             // 7
  )) {
    Document doc = new Document();
    for (String value : values) {
      doc.add(new StringField("foo", value, Store.NO));
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  w.close();

  IndexReader reader = DirectoryReader.open(dir);
  IndexSearcher searcher = newSearcher(reader);
  final Query reqQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "A")));
  final Query optQ = new ConstantScoreQuery(new TermQuery(new Term("foo", "B")));
  Scorer scorer = reqOptScorer(searcher, reqQ, optQ, false);
  assertEquals(0, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(1, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(4, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, false);
  scorer.setMinCompetitiveScore(Math.nextDown(1f));
  assertEquals(0, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(1, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(4, scorer.iterator().nextDoc());
  assertEquals(1, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, false);
  scorer.setMinCompetitiveScore(Math.nextUp(1f));
  assertEquals(3, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(6, scorer.iterator().nextDoc());
  assertEquals(2, scorer.score(), 0);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  scorer = reqOptScorer(searcher, reqQ, optQ, true);
  scorer.setMinCompetitiveScore(Math.nextUp(2f));
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, scorer.iterator().nextDoc());

  reader.close();
  dir.close();
}
 
Example 18
Source File: TestIndexOrDocValuesQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testUseIndexForSelectiveMultiValueQueries() throws IOException {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()
      // relies on costs and PointValues.estimateCost so we need the default codec
      .setCodec(TestUtil.getDefaultCodec()));
  for (int i = 0; i < 2000; ++i) {
    Document doc = new Document();
    if (i < 1000) {
      doc.add(new StringField("f1", "bar", Store.NO));
      for (int j =0; j < 500; j++) {
        doc.add(new LongPoint("f2", 42L));
        doc.add(new SortedNumericDocValuesField("f2", 42L));
      }
    } else if (i == 1001) {
      doc.add(new StringField("f1", "foo", Store.NO));
      doc.add(new LongPoint("f2", 2L));
      doc.add(new SortedNumericDocValuesField("f2", 42L));
    } else {
      doc.add(new StringField("f1", "bar", Store.NO));
      for (int j =0; j < 100; j++) {
        doc.add(new LongPoint("f2", 2L));
        doc.add(new SortedNumericDocValuesField("f2", 2L));
      }
    }
    w.addDocument(doc);
  }
  w.forceMerge(1);
  IndexReader reader = DirectoryReader.open(w);
  IndexSearcher searcher = newSearcher(reader);
  searcher.setQueryCache(null);

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q1 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 2), SortedNumericDocValuesField.newSlowRangeQuery("f2", 2L, 2L)), Occur.MUST)
      .build();

  final Weight w1 = searcher.createWeight(searcher.rewrite(q1), ScoreMode.COMPLETE, 1);
  final Scorer s1 = w1.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s1.twoPhaseIterator()); // means we use points

  // The term query is less selective, so the IndexOrDocValuesQuery should use points
  final Query q2 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "bar")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
      .build();

  final Weight w2 = searcher.createWeight(searcher.rewrite(q2), ScoreMode.COMPLETE, 1);
  final Scorer s2 = w2.scorer(searcher.getIndexReader().leaves().get(0));
  assertNull(s2.twoPhaseIterator()); // means we use points

  // The term query is more selective, so the IndexOrDocValuesQuery should use doc values
  final Query q3 = new BooleanQuery.Builder()
      .add(new TermQuery(new Term("f1", "foo")), Occur.MUST)
      .add(new IndexOrDocValuesQuery(LongPoint.newExactQuery("f2", 42), SortedNumericDocValuesField.newSlowRangeQuery("f2", 42, 42L)), Occur.MUST)
      .build();

  final Weight w3 = searcher.createWeight(searcher.rewrite(q3), ScoreMode.COMPLETE, 1);
  final Scorer s3 = w3.scorer(searcher.getIndexReader().leaves().get(0));
  assertNotNull(s3.twoPhaseIterator()); // means we use doc values

  reader.close();
  w.close();
  dir.close();
}
 
Example 19
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testOverlapWithPositionsAndOffset()
    throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final DisjunctionMaxQuery query = new DisjunctionMaxQuery(
        Arrays.asList(
            new SpanTermQuery(new Term(FIELD, "{fox}")),
            new SpanTermQuery(new Term(FIELD, "fox"))),
        1);
    // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
    // new SpanTermQuery(new Term(FIELD, "{fox}")),
    // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);

    TopDocs hits = indexSearcher.search(query, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(query));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("<B>the fox</B> did not jump",
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 20
Source File: IndexBasedSpellCheckerTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
@SuppressWarnings({"unchecked"})
public void testAlternateLocation() throws Exception {
  String[] ALT_DOCS = new String[]{
          "jumpin jack flash",
          "Sargent Peppers Lonely Hearts Club Band",
          "Born to Run",
          "Thunder Road",
          "Londons Burning",
          "A Horse with No Name",
          "Sweet Caroline"
  };

  IndexBasedSpellChecker checker = new IndexBasedSpellChecker();
  @SuppressWarnings({"rawtypes"})
  NamedList spellchecker = new NamedList();
  spellchecker.add("classname", IndexBasedSpellChecker.class.getName());
  
  File tmpDir = createTempDir().toFile();
  File indexDir = new File(tmpDir, "spellingIdx");
  //create a standalone index
  File altIndexDir = new File(tmpDir, "alternateIdx" + new Date().getTime());
  Directory dir = newFSDirectory(altIndexDir.toPath());
  IndexWriter iw = new IndexWriter(
      dir,
      new IndexWriterConfig(new WhitespaceAnalyzer())
  );
  for (int i = 0; i < ALT_DOCS.length; i++) {
    Document doc = new Document();
    doc.add(new TextField("title", ALT_DOCS[i], Field.Store.YES));
    iw.addDocument(doc);
  }
  iw.forceMerge(1);
  iw.close();
  dir.close();
  indexDir.mkdirs();
  spellchecker.add(AbstractLuceneSpellChecker.INDEX_DIR, indexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.LOCATION, altIndexDir.getAbsolutePath());
  spellchecker.add(AbstractLuceneSpellChecker.FIELD, "title");
  spellchecker.add(AbstractLuceneSpellChecker.SPELLCHECKER_ARG_NAME, spellchecker);
  SolrCore core = h.getCore();
  String dictName = checker.init(spellchecker, core);
  assertTrue(dictName + " is not equal to " + SolrSpellChecker.DEFAULT_DICTIONARY_NAME,
          dictName.equals(SolrSpellChecker.DEFAULT_DICTIONARY_NAME) == true);
  h.getCore().withSearcher(searcher -> {
    checker.build(core, searcher);

    IndexReader reader = searcher.getIndexReader();
    Collection<Token> tokens = queryConverter.convert("flesh");
    SpellingOptions spellOpts = new SpellingOptions(tokens, reader, 1, SuggestMode.SUGGEST_WHEN_NOT_IN_INDEX, true, 0.5f, null);
    SpellingResult result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    //should be lowercased, b/c we are using a lowercasing analyzer
    Map<String, Integer> suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("flesh is null and it shouldn't be", suggestions != null);
    assertTrue("flesh Size: " + suggestions.size() + " is not: " + 1, suggestions.size() == 1);
    Map.Entry<String, Integer> entry = suggestions.entrySet().iterator().next();
    assertTrue(entry.getKey() + " is not equal to " + "flash", entry.getKey().equals("flash") == true);
    assertTrue(entry.getValue() + " does not equal: " + 1, entry.getValue() == 1);

    //test something not in the spell checker
    spellOpts.tokens = queryConverter.convert("super");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions size should be 0", suggestions.size()==0);

    spellOpts.tokens = queryConverter.convert("Caroline");
    result = checker.getSuggestions(spellOpts);
    assertTrue("result is null and it shouldn't be", result != null);
    suggestions = result.get(spellOpts.tokens.iterator().next());
    assertTrue("suggestions is not null and it should be", suggestions == null);
    return null;
  });
}