org.apache.lucene.document.Field.Index Java Examples

The following examples show how to use org.apache.lucene.document.Field.Index. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: LuceneMessageSearchIndex.java    From james-project with Apache License 2.0 6 votes vote down vote up
/**
 * Add the given {@link Flags} to the {@link Document}
 */
private void indexFlags(Document doc, Flags f) {
    List<String> fString = new ArrayList<>();
    Flag[] flags = f.getSystemFlags();
    for (Flag flag : flags) {
        fString.add(toString(flag));
        doc.add(new Field(FLAGS_FIELD, toString(flag), Store.YES, Index.NOT_ANALYZED));
    }
    
    String[] userFlags = f.getUserFlags();
    for (String userFlag : userFlags) {
        doc.add(new Field(FLAGS_FIELD, userFlag, Store.YES, Index.NOT_ANALYZED));
    }
    
    // if no flags are there we just use a empty field
    if (flags.length == 0 && userFlags.length == 0) {
        doc.add(new Field(FLAGS_FIELD, "",Store.NO, Index.NOT_ANALYZED));
    }
    
}
 
Example #2
Source File: WebDSLDynamicFieldBridge.java    From webdsl with Apache License 2.0 5 votes vote down vote up
@Override
  public void set(
      String name, Object value, Document document, LuceneOptions luceneOptions) {
for(DynamicSearchField dsf : ( (DynamicSearchFields) value).getDynamicSearchFields_()){
	document.add( new Field( dsf.fieldName, dsf.fieldValue, Store.NO,
            Index.NOT_ANALYZED, luceneOptions.getTermVector() ) );
}
 }
 
Example #3
Source File: LuceneMessageSearchIndex.java    From james-project with Apache License 2.0 5 votes vote down vote up
/**
 * Index the {@link Flags} and add it to the {@link Document}
 */
private Document createFlagsDocument(MailboxMessage message) {
    Document doc = new Document();
    doc.add(new Field(ID_FIELD, "flags-" + message.getMailboxId().serialize() + "-" + Long.toString(message.getUid().asLong()), Store.YES, Index.NOT_ANALYZED));
    doc.add(new Field(MAILBOX_ID_FIELD, message.getMailboxId().serialize(), Store.YES, Index.NOT_ANALYZED));
    doc.add(new NumericField(UID_FIELD,Store.YES, true).setLongValue(message.getUid().asLong()));
    
    indexFlags(doc, message.createFlags());
    return doc;
}
 
Example #4
Source File: ReferenceCountingReadOnlyIndexReaderFactory.java    From alfresco-repository with GNU Lesser General Public License v3.0 4 votes vote down vote up
public Field get(int n, FieldSelector fieldSelector) throws IOException
{
    return new Field(fieldName, getStringValue(n, fieldName), Store.NO, Index.UN_TOKENIZED);
}
 
Example #5
Source File: AnchorIndexer.java    From tagme with Apache License 2.0 4 votes vote down vote up
@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		log.info("Loading support datasets...");
		
		File all_anchors = new WikipediaAnchorParser(lang).getFile();
		long numAnchors = ExternalSortUtils.wcl(all_anchors);
		AnchorIterator iterator = new AnchorIterator(all_anchors);
		
		IntSet people = new PeopleWIDs(lang).getDataset();
		
//		IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang));
		IndexSearcher articles = openWikipediaIndex(lang);
		//QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34));
		QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>()));
		
		IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED);
		Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED);
		Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO);
		
		doc.add(fId);
		doc.add(fText);
		doc.add(fObject);
		
//		Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED);
//		Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED);
		
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped");
		plog.setEnd(0, numAnchors);
		plog.start("Support datasets loaded, now parsing...");
		int id=0;
		while(iterator.next())
		{
			plog.update(0, iterator.scroll);
			plog.update(1);
			String anchorText = iterator.anchor;
			
			int freq = freq(iterator.originals, articles, queryParser);
			plog.update(2, iterator.originals.size());
			if (freq == 0) plog.update(4);
			
			Anchor anchorObj = Anchor.build(id, iterator.links, freq, people);
			if (anchorObj == null){
				plog.update(5);
				continue;
			}
			
			String anchorSerial = Anchor.serialize(anchorObj);
			fId.setValue(Integer.toString(++id));
			fText.setValue(anchorText);
			fObject.setValue(anchorSerial);
			
			for(int page : anchorObj){
				Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED);
//				fWID.setBoost(iterator.links.get(page));
				doc.add(fWID);
			}
			for(String original : iterator.originals) {
				doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED));
			}
			
			index.addDocument(doc);
			plog.update(3);
			
			doc.removeFields(FIELD_ORIGINAL);
			doc.removeFields(FIELD_WID);
		}
		plog.stop();
		iterator.close();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		log.info("Done.");
	}
 
Example #6
Source File: TopicIndexer.java    From tagme with Apache License 2.0 4 votes vote down vote up
@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		
		IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang));
		Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset();
		
		IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED);
		Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED);
		Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO);
		Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO);
		doc.add(fWID);
		doc.add(fTitle);
		doc.add(fAbstract);
		doc.add(fBestAnchor);
				
		
		int max = articles.maxDoc();
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest");
		plog.setEnd(max);
		plog.start("Start indexing...");
		
		for(int i=0; i<max; i++)
		{
			plog.update(0);
			Document oldDoc = articles.document(i);
			PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE));
			if (type == PageType.TOPIC)
			{
				int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT));
				fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE));
				
				String bestAnchor = bestAnchorMap.get(wid);
				if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2);
				fBestAnchor.setValue(bestAnchor==null?"":bestAnchor);
				
				String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT);
				if (cats != null) {
					for (int j=0; j<cats.length; j++)
						doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED));
				}
				
				index.addDocument(doc);
				plog.update(1);
				
				doc.removeFields(FIELD_CAT);
			}
		}
		
		plog.stop();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		
		//we cannot call this because the index is still in the temporary dir
		//so TopicDocs will be created using old index
//		log.info("Index Done, now creating WID->DOC_ID map");
//		
//		TopicDocs td = new TopicDocs(lang);
//		td.forceParsing();
		
		log.info("Done.");
	}