org.apache.lucene.document.TextField Java Examples

The following examples show how to use org.apache.lucene.document.TextField. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestIndexReaderFunctions.java    From lucene-solr with Apache License 2.0 7 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  dir = newDirectory();
  analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwConfig = newIndexWriterConfig(analyzer);
  iwConfig.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
  for (String [] doc : documents) {
    Document document = new Document();
    document.add(new StringField("id", doc[0], Field.Store.NO));
    document.add(new SortedDocValuesField("id", new BytesRef(doc[0])));
    document.add(new StringField("string", doc[5], Field.Store.NO));
    document.add(new SortedDocValuesField("string", new BytesRef(doc[5])));
    document.add(new TextField("text", doc[6], Field.Store.NO));
    iw.addDocument(document);
  }

  reader = iw.getReader();
  searcher = newSearcher(reader);
  iw.close();
}
 
Example #2
Source File: AbstractLuceneQueryVisitorTest.java    From cxf with Apache License 2.0 7 votes vote down vote up
@Before
public void setUp() throws Exception {
    analyzer = new StandardAnalyzer();
    tempDirectory = Files.createTempDirectory("lucene");
    directory = new MMapDirectory(tempDirectory);
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter iwriter = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("contents", "name=text", TextField.TYPE_STORED));

    IntPoint intPoint = new IntPoint("intfield", 4);
    doc.add(intPoint);
    doc.add(new StoredField("intfield", 4));
    iwriter.addDocument(doc);

    iwriter.close();
    ireader = DirectoryReader.open(directory);
    isearcher = new IndexSearcher(ireader);
}
 
Example #3
Source File: FullTextIndex.java    From jease with GNU General Public License v3.0 6 votes vote down vote up
public FullTextIndex() {
    try {
        objects = new ArrayList<>();

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig config =  new IndexWriterConfig(new LimitTokenCountAnalyzer(analyzer, Integer.MAX_VALUE));

        indexDirectory = new RAMDirectory();
        indexWriter = new IndexWriter(indexDirectory, config);

        queryParser = new QueryParser("text", analyzer);
        queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);

        fulltext = new TextField("text", "", Field.Store.NO);

        // Used as base-set for a NOT-Query
        Field inverse = new TextField("true", "yes", Field.Store.NO);

        document = new Document();
        document.add(fulltext);
        document.add(inverse);
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}
 
Example #4
Source File: PageDocumentTransformer.java    From gravitee-management-rest-api with Apache License 2.0 6 votes vote down vote up
@Override
public Document transform(PageEntity page) {
    Document doc = new Document();

    doc.add(new StringField(FIELD_ID, page.getId(), Field.Store.YES));
    doc.add(new StringField(FIELD_TYPE, FIELD_TYPE_VALUE, Field.Store.YES));
    if (page.getName() != null) {
        doc.add(new TextField(FIELD_NAME, page.getName(), Field.Store.NO));
    }

    if (page.getContent() != null) {
        doc.add(new TextField(FIELD_CONTENT, page.getContent(), Field.Store.NO));
    }

    if (page instanceof ApiPageEntity && ((ApiPageEntity)page).getApi() != null) {
        doc.add(new StringField(FIELD_API, ((ApiPageEntity)page).getApi(), Field.Store.YES));
    }

    return doc;
}
 
Example #5
Source File: TestIndexWriterOnDiskFull.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testImmediateDiskFull() throws IOException {
  MockDirectoryWrapper dir = newMockDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                              .setMaxBufferedDocs(2)
                                              .setMergeScheduler(new ConcurrentMergeScheduler())
                                              .setCommitOnClose(false));
  writer.commit(); // empty commit, to not create confusing situation with first commit
  dir.setMaxSizeInBytes(Math.max(1, dir.sizeInBytes()));
  final Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
  expectThrows(IOException.class, () -> {
    writer.addDocument(doc);
  });
  assertTrue(writer.isDeleterClosed());
  assertTrue(writer.isClosed());

  dir.close();
}
 
Example #6
Source File: InMemoryLuceneIndex.java    From tutorials with MIT License 6 votes vote down vote up
/**
 * 
 * @param title
 * @param body
 */
public void indexDocument(String title, String body) {

    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    try {
        IndexWriter writter = new IndexWriter(memoryIndex, indexWriterConfig);
        Document document = new Document();

        document.add(new TextField("title", title, Field.Store.YES));
        document.add(new TextField("body", body, Field.Store.YES));
        document.add(new SortedDocValuesField("title", new BytesRef(title)));

        writter.addDocument(document);
        writter.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example #7
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Document newDocument() {
  Document doc = new Document();
  for (IndexOptions option : IndexOptions.values()) {
    if (option == IndexOptions.NONE) {
      continue;
    }
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setIndexOptions(option);
    doc.add(new Field(option.toString(), "", ft));
  }
  return doc;
}
 
Example #8
Source File: SourceSimpleFragmentsBuilder.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException {
    // we know its low level reader, and matching docId, since that's how we call the highlighter with
    SourceLookup sourceLookup = searchContext.lookup().source();
    sourceLookup.setSegmentAndDocument((LeafReaderContext) reader.getContext(), docId);

    List<Object> values = sourceLookup.extractRawValues(hitContext.getSourcePath(mapper.fieldType().names().fullName()));
    if (values.isEmpty()) {
        return EMPTY_FIELDS;
    }
    Field[] fields = new Field[values.size()];
    for (int i = 0; i < values.size(); i++) {
        fields[i] = new Field(mapper.fieldType().names().indexName(), values.get(i).toString(), TextField.TYPE_NOT_STORED);
    }
    return fields;
}
 
Example #9
Source File: SimpleTransLog.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void replayAddDocument(Connection c, NodeProcess primary, DataInput in) throws IOException {
  String id = in.readString();

  Document doc = new Document();
  doc.add(new StringField("docid", id, Field.Store.YES));

  String title = readNullableString(in);
  if (title != null) {
    doc.add(new StringField("title", title, Field.Store.NO));
    doc.add(new TextField("titleTokenized", title, Field.Store.NO));
  }
  String body = readNullableString(in);
  if (body != null) {
    doc.add(new TextField("body", body, Field.Store.NO));
  }
  String marker = readNullableString(in);
  if (marker != null) {
    //TestStressNRTReplication.message("xlog: replay marker=" + id);
    doc.add(new StringField("marker", marker, Field.Store.YES));
  }

  // For both add and update originally, we use updateDocument to replay,
  // because the doc could in fact already be in the index:
  // nocomit what if this fails?
  primary.addOrUpdateDocument(c, doc, false);
}
 
Example #10
Source File: IndexSearcherTest.java    From incubator-retired-blur with Apache License 2.0 6 votes vote down vote up
private Iterable<? extends IndexableField> getDoc(int docId, String read, String discover, String field1,
    String field2) {
  Document doc = new Document();
  doc.add(new StringField("id", Integer.toString(docId), Store.YES));
  AccessControlWriter writer = _accessControlFactory.getWriter();
  doc.add(new StringField("f1", field1, Store.YES));
  doc.add(new StringField("f2", field2, Store.YES));
  doc.add(new TextField("text", "constant text", Store.YES));
  Iterable<? extends IndexableField> fields = doc;
  if (read != null) {
    fields = writer.addReadVisiblity(read, doc);
  }
  if (discover != null) {
    fields = writer.addDiscoverVisiblity(discover, fields);
  }
  return fields;
}
 
Example #11
Source File: SolrDocumentFetcher.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/**
 * Returns a collection of the names of all stored fields which can be highlighted the index reader knows about.
 */
public Collection<String> getStoredHighlightFieldNames() {
  synchronized (this) {
    if (storedHighlightFieldNames == null) {
      storedHighlightFieldNames = new LinkedList<>();
      for (FieldInfo fieldInfo : searcher.getFieldInfos()) {
        final String fieldName = fieldInfo.name;
        try {
          SchemaField field = searcher.getSchema().getField(fieldName);
          if (field.stored() && ((field.getType() instanceof org.apache.solr.schema.TextField)
              || (field.getType() instanceof org.apache.solr.schema.StrField))) {
            storedHighlightFieldNames.add(fieldName);
          }
        } catch (RuntimeException e) { // getField() throws a SolrException, but it arrives as a RuntimeException
          log.warn("Field [{}] found in index, but not defined in schema.", fieldName);
        }
      }
    }
    return storedHighlightFieldNames;
  }
}
 
Example #12
Source File: LuceneSearch.java    From zeppelin with Apache License 2.0 6 votes vote down vote up
/**
 * If paragraph is not null, indexes code in the paragraph, otherwise indexes the notebook name.
 *
 * @param id id of the document, different for Note name and paragraph
 * @param noteName name of the note
 * @param p paragraph
 * @return
 */
private Document newDocument(String id, String noteName, Paragraph p) {
  Document doc = new Document();

  Field pathField = new StringField(ID_FIELD, id, Field.Store.YES);
  doc.add(pathField);
  doc.add(new StringField("title", noteName, Field.Store.YES));

  if (null != p) {
    doc.add(new TextField(SEARCH_FIELD_TEXT, p.getText(), Field.Store.YES));
    if (p.getTitle() != null) {
      doc.add(new TextField(SEARCH_FIELD_TITLE, p.getTitle(), Field.Store.YES));
    }
    Date date = p.getDateStarted() != null ? p.getDateStarted() : p.getDateCreated();
    doc.add(new LongField("modified", date.getTime(), Field.Store.NO));
  } else {
    doc.add(new TextField(SEARCH_FIELD_TEXT, noteName, Field.Store.YES));
  }
  return doc;
}
 
Example #13
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInvalidTermVectorOffsets() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  fieldType.setStoreTermVectors(true);
  fieldType.setStoreTermVectorOffsets(true);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index term vector offsets while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}
 
Example #14
Source File: TestPerFieldPostingsFormat2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void doTestMixedPostings(Codec codec) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
  iwc.setCodec(codec);
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  // turn on vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  Field idField = new Field("id", "", ft);
  Field dateField = new Field("date", "", ft);
  doc.add(idField);
  doc.add(dateField);
  for (int i = 0; i < 100; i++) {
    idField.setStringValue(Integer.toString(random().nextInt(50)));
    dateField.setStringValue(Integer.toString(random().nextInt(100)));
    iw.addDocument(doc);
  }
  iw.close();
  dir.close(); // checkindex
}
 
Example #15
Source File: SingleFieldTestDb.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public SingleFieldTestDb(Random random, String[] documents, String fName) {
  try {
    db = new MockDirectoryWrapper(random, new ByteBuffersDirectory());
    docs = documents;
    fieldName = fName;
    IndexWriter writer = new IndexWriter(db, new IndexWriterConfig(new MockAnalyzer(random)));
    for (int j = 0; j < docs.length; j++) {
      Document d = new Document();
      d.add(new TextField(fieldName, docs[j], Field.Store.NO));
      writer.addDocument(d);
    }
    writer.close();
  } catch (java.io.IOException ioe) {
    throw new Error(ioe);
  }
}
 
Example #16
Source File: BlockGroupingTest.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private static List<Document> createRandomBlock(int book) {
  List<Document> block = new ArrayList<>();
  String bookName = "book" + book;
  int chapterCount = atLeast(10);
  for (int j = 0; j < chapterCount; j++) {
    Document doc = new Document();
    String chapterName = "chapter" + j;
    String chapterText = randomText();
    doc.add(new TextField("book", bookName, Field.Store.YES));
    doc.add(new TextField("chapter", chapterName, Field.Store.YES));
    doc.add(new TextField("text", chapterText, Field.Store.NO));
    doc.add(new NumericDocValuesField("length", chapterText.length()));
    doc.add(new SortedDocValuesField("book", new BytesRef(bookName)));
    if (j == chapterCount - 1) {
      doc.add(new TextField("blockEnd", "true", Field.Store.NO));
    }
    block.add(doc);
  }
  return block;
}
 
Example #17
Source File: TestDirectoryReaderReopen.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static Document createDocument(int n, int numFields) {
  StringBuilder sb = new StringBuilder();
  Document doc = new Document();
  sb.append("a");
  sb.append(n);
  FieldType customType2 = new FieldType(TextField.TYPE_STORED);
  customType2.setTokenized(false);
  customType2.setOmitNorms(true);
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  doc.add(new TextField("field1", sb.toString(), Field.Store.YES));
  doc.add(new Field("fielda", sb.toString(), customType2));
  doc.add(new Field("fieldb", sb.toString(), customType3));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(new TextField("field" + (i+1), sb.toString(), Field.Store.YES));
  }
  return doc;
}
 
Example #18
Source File: LuceneExample.java    From yuzhouwan with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) throws Exception {
    // index
    try (Directory index = new NIOFSDirectory(Paths.get("/tmp/index"))) {
        // add
        try (IndexWriter writer = new IndexWriter(index, new IndexWriterConfig(new StandardAnalyzer()))) {
            Document doc = new Document();
            doc.add(new TextField("blog", "yuzhouwan.com", Field.Store.YES));
            doc.add(new StringField("github", "asdf2014", Field.Store.YES));
            writer.addDocument(doc);
            writer.commit();
        }
        // search
        try (DirectoryReader reader = DirectoryReader.open(index)) {
            IndexSearcher searcher = new IndexSearcher(reader);
            QueryParser parser = new QueryParser("blog", new StandardAnalyzer());
            Query query = parser.parse("yuzhouwan.com");
            ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
            for (ScoreDoc hit : hits) {
                Document hitDoc = searcher.doc(hit.doc);
                System.out.println(hitDoc.get("blog"));
            }
        }
    }
}
 
Example #19
Source File: DocumentValueSourceDictionaryTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Map<String, Document> generateIndexDocuments(int ndocs) {
  Map<String, Document> docs = new HashMap<>();
  for(int i = 0; i < ndocs ; i++) {
    Field field = new TextField(FIELD_NAME, "field_" + i, Field.Store.YES);
    Field weight1 = new NumericDocValuesField(WEIGHT_FIELD_NAME_1, 10 + i);
    Field weight2 = new NumericDocValuesField(WEIGHT_FIELD_NAME_2, 20 + i);
    Field weight3 = new NumericDocValuesField(WEIGHT_FIELD_NAME_3, 30 + i);
    Field contexts = new StoredField(CONTEXTS_FIELD_NAME, new BytesRef("ctx_"  + i + "_0"));
    Document doc = new Document();
    doc.add(field);
    // even if payload is not required usually have it
    if (usually()) {
      Field payload = new StoredField(PAYLOAD_FIELD_NAME, new BytesRef("payload_" + i));
      doc.add(payload);
    }
    doc.add(weight1);
    doc.add(weight2);
    doc.add(weight3);
    doc.add(contexts);
    for(int j = 1; j < atLeast(3); j++) {
      contexts.setBytesValue(new BytesRef("ctx_" + i + "_" + j));
      doc.add(contexts);
    }
    docs.put(field.stringValue(), doc);
  }
  return docs;
}
 
Example #20
Source File: SpatialPrefixTreeTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * A PrefixTree pruning optimization gone bad, applicable when optimize=true.
 * See <a href="https://issues.apache.org/jira/browse/LUCENE-4770">LUCENE-4770</a>.
 */
@Test
public void testBadPrefixTreePrune() throws Exception {

  trie = new QuadPrefixTree(ctx, 12);
  TermQueryPrefixTreeStrategy strategy = new TermQueryPrefixTreeStrategy(trie, "geo");
  Document doc = new Document();
  doc.add(new TextField("id", "1", Store.YES));

  Shape area = ctx.makeRectangle(-122.82, -122.78, 48.54, 48.56);

  Field[] fields = strategy.createIndexableFields(area, 0.025);
  for (Field field : fields) {
    doc.add(field);
  }
  addDocument(doc);

  Point upperleft = ctx.makePoint(-122.88, 48.54);
  Point lowerright = ctx.makePoint(-122.82, 48.62);

  Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Intersects, ctx.makeRectangle(upperleft, lowerright)));

  commit();

  TopDocs search = indexSearcher.search(query, 10);
  ScoreDoc[] scoreDocs = search.scoreDocs;
  for (ScoreDoc scoreDoc : scoreDocs) {
    System.out.println(indexSearcher.doc(scoreDoc.doc));
  }

  assertEquals(1, search.totalHits.value);
}
 
Example #21
Source File: TripleIndexCreatorContext.java    From AGDISTIS with GNU Affero General Public License v3.0 5 votes vote down vote up
private void addDocumentToIndex(String subject, String predicate, String object, boolean isUri) throws IOException {
	log.info("here again");
	List<Triple> triples = new ArrayList<>();

	try {
		triples = search(subject, null, null, 100);
	} catch (Exception e) {
	}
	if (triples.size() == 0) {
		Document doc = new Document();
		log.debug(subject + " " + predicate + " " + object);
		doc.add(new StringField(FIELD_NAME_URI, subject, Store.YES));
		doc.add(new TextField(FIELD_NAME_SURFACE_FORM, object, Store.YES));
		doc.add(new TextField(FIELD_NAME_URI_COUNT, "1", Store.YES));
		doc.add(new TextField(FIELD_NAME_CONTEXT, object, Store.YES));
		iwriter.addDocument(doc);
	} else {
		String docID = triples.get(0).subject;
		log.info(triples.toString());
		if (isUri) {
			if (endpoint.isEmpty()) {
				log.info("endpoint empty");
				object = object.replace(nodeType, "");
			} else {
				object = sparql(subject);
				log.info("endpoint working");
			}
		}
		String remainContext = triples.get(0).object.concat(" " + object);
		log.info(remainContext);
		Document hitDoc = isearcher.doc(Integer.parseInt(docID));
		Document newDoc = new Document();
		newDoc.add(new StringField(FIELD_NAME_URI, triples.get(0).predicate, Store.YES));
		newDoc.add(new TextField(FIELD_NAME_SURFACE_FORM, hitDoc.get(FIELD_NAME_SURFACE_FORM), Store.YES));
		newDoc.add(new TextField(FIELD_NAME_URI_COUNT, "1", Store.YES));
		newDoc.add(new TextField(FIELD_NAME_CONTEXT, remainContext, Store.YES));
		iwriter.updateDocument(new Term(FIELD_NAME_URI, subject), newDoc);
	}

}
 
Example #22
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException
{
  Document doc = new Document();
  
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  doc.add(newStringField("keyword2", "test1", Field.Store.YES));
  doc.add(newTextField("text2", "test1", Field.Store.YES));
  doc.add(newField("unindexed2", "test1", customType3));
  doc.add(new TextField("unstored2","test1", Field.Store.NO));
  writer.addDocument(doc);
}
 
Example #23
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSingletonTermsOneDoc() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar"},
                                              new int[] {42, 128}),
                          fieldType);
  doc.add(field);
  w.addDocument(doc);
  IndexReader r = DirectoryReader.open(w);
  PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef("bar"), (int) PostingsEnum.FREQS);
  assertNotNull(postings);
  assertEquals(0, postings.nextDoc());
  assertEquals(128, postings.freq());
  assertEquals(NO_MORE_DOCS, postings.nextDoc());

  postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef("foo"), (int) PostingsEnum.FREQS);
  assertNotNull(postings);
  assertEquals(0, postings.nextDoc());
  assertEquals(42, postings.freq());
  assertEquals(NO_MORE_DOCS, postings.nextDoc());
  
  IOUtils.close(r, w, dir);
}
 
Example #24
Source File: TestTopDocsCollector.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRelationVsTopDocsCount() throws Exception {
  try (Directory dir = newDirectory();
      IndexWriter w = new IndexWriter(dir, newIndexWriterConfig().setMergePolicy(NoMergePolicy.INSTANCE))) {
    Document doc = new Document();
    doc.add(new TextField("f", "foo bar", Store.NO));
    w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
    w.flush();
    w.addDocuments(Arrays.asList(doc, doc, doc, doc, doc));
    w.flush();
    
    try (IndexReader reader = DirectoryReader.open(w)) {
      IndexSearcher searcher = new IndexSearcher(reader);
      TopScoreDocCollector collector = TopScoreDocCollector.create(2, null, 10);
      searcher.search(new TermQuery(new Term("f", "foo")), collector);
      assertEquals(10, collector.totalHits);
      assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
      
      collector = TopScoreDocCollector.create(2, null, 2);
      searcher.search(new TermQuery(new Term("f", "foo")), collector);
      assertTrue(10 >= collector.totalHits);
      assertEquals(TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO, collector.totalHitsRelation);
      
      collector = TopScoreDocCollector.create(10, null, 2);
      searcher.search(new TermQuery(new Term("f", "foo")), collector);
      assertEquals(10, collector.totalHits);
      assertEquals(TotalHits.Relation.EQUAL_TO, collector.totalHitsRelation);
    }
  }
}
 
Example #25
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSimpleHighlightTest() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
  
  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered 
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
Example #26
Source File: SimpleFragmentsBuilderTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void makeUnstoredIndex() throws Exception {
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzerW).setOpenMode(OpenMode.CREATE));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorOffsets(true);
  customType.setStoreTermVectorPositions(true);
  doc.add( new Field( F, "aaa", customType) );
  //doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
  writer.addDocument( doc );
  writer.close();
  if (reader != null) reader.close();
  reader = DirectoryReader.open(dir);
}
 
Example #27
Source File: JtsPolygonTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * A PrefixTree pruning optimization gone bad.
 * See <a href="https://issues.apache.org/jira/browse/LUCENE-4770">LUCENE-4770</a>.
 */
@Test
public void testBadPrefixTreePrune() throws Exception {

  Shape area = ctx.readShapeFromWkt("POLYGON((-122.83 48.57, -122.77 48.56, -122.79 48.53, -122.83 48.57))");

  SpatialPrefixTree trie = new QuadPrefixTree(ctx, 12);
  TermQueryPrefixTreeStrategy strategy = new TermQueryPrefixTreeStrategy(trie, "geo");
  Document doc = new Document();
  doc.add(new TextField("id", "1", Store.YES));

  Field[] fields = strategy.createIndexableFields(area, 0.025);
  for (Field field : fields) {
    doc.add(field);
  }
  addDocument(doc);

  Point upperleft = ctx.getShapeFactory().pointXY(-122.88, 48.54);
  Point lowerright = ctx.getShapeFactory().pointXY(-122.82, 48.62);

  Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Intersects, ctx.getShapeFactory().rect(upperleft, lowerright)));
  commit();

  TopDocs search = indexSearcher.search(query, 10);
  ScoreDoc[] scoreDocs = search.scoreDocs;
  for (ScoreDoc scoreDoc : scoreDocs) {
    System.out.println(indexSearcher.doc(scoreDoc.doc));
  }

  assertEquals(1, search.totalHits.value);
}
 
Example #28
Source File: BaseIndex.java    From everywhere with Apache License 2.0 5 votes vote down vote up
private static void indexDoc(IndexWriter writer, FileBean t) throws Exception {
        Document doc = new Document();
        if (t.getContent() != null) {
            doc.add(new TextField(LuceneConstants.PATH, t.getFilepath(), Field.Store.YES));
            doc.add(new StringField(LuceneConstants.MODIFIED, UtilsTool.getDateStrByLastModified(t.getLastModified()), Field.Store.YES));
            doc.add(new TextField(LuceneConstants.CONTENT, t.getContent(), CommonConstants.IS_OPEN_CONTEXT ? Field.Store.YES : Field.Store.NO));
//            System.out.println("added to document:" + t.getFilepath());
            if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE){
                writer.addDocument(doc);
            } else{
                writer.updateDocument(new Term(LuceneConstants.PATH, t.getFilepath()), doc);
            }
        }
    }
 
Example #29
Source File: TestCustomNorms.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFloatNorms() throws IOException {

    Directory dir = newDirectory();
    MockAnalyzer analyzer = new MockAnalyzer(random());
    analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 2, IndexWriter.MAX_TERM_LENGTH));

    IndexWriterConfig config = newIndexWriterConfig(analyzer);
    Similarity provider = new MySimProvider();
    config.setSimilarity(provider);
    RandomIndexWriter writer = new RandomIndexWriter(random(), dir, config);
    final LineFileDocs docs = new LineFileDocs(random());
    int num = atLeast(100);
    for (int i = 0; i < num; i++) {
      Document doc = docs.nextDoc();
      int boost = TestUtil.nextInt(random(), 1, 10);
      String value = IntStream.range(0, boost).mapToObj(k -> Integer.toString(boost)).collect(Collectors.joining(" "));
      Field f = new TextField(FLOAT_TEST_FIELD, value, Field.Store.YES);

      doc.add(f);
      writer.addDocument(doc);
      doc.removeField(FLOAT_TEST_FIELD);
      if (rarely()) {
        writer.commit();
      }
    }
    writer.commit();
    writer.close();
    DirectoryReader open = DirectoryReader.open(dir);
    NumericDocValues norms = MultiDocValues.getNormValues(open, FLOAT_TEST_FIELD);
    assertNotNull(norms);
    for (int i = 0; i < open.maxDoc(); i++) {
      Document document = open.document(i);
      int expected = Integer.parseInt(document.get(FLOAT_TEST_FIELD).split(" ")[0]);
      assertEquals(i, norms.nextDoc());
      assertEquals(expected, norms.longValue());
    }
    open.close();
    dir.close();
    docs.close();
  }
 
Example #30
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testHighFreqTerm() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setRAMBufferSizeMB(0.01));
  // Massive doc that has 128 K a's
  StringBuilder b = new StringBuilder(1024*1024);
  for(int i=0;i<4096;i++) {
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
  }
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  doc.add(newField("field", b.toString(), customType));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = DirectoryReader.open(dir);
  assertEquals(1, reader.maxDoc());
  assertEquals(1, reader.numDocs());
  Term t = new Term("field", "a");
  assertEquals(1, reader.docFreq(t));
  PostingsEnum td = TestUtil.docs(random(), reader,
                                  "field",
                                  new BytesRef("a"),
                                  null,
                                  PostingsEnum.FREQS);
  td.nextDoc();
  assertEquals(128*1024, td.freq());
  reader.close();
  dir.close();
}