Java Code Examples for org.apache.lucene.document.Field.Store#YES

The following examples show how to use org.apache.lucene.document.Field.Store#YES . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimpleDocumentWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private void addToDoc(IndexKey key, String... values){
  Preconditions.checkArgument(key.getValueType() == String.class);
  final boolean sorted = key.isSorted();
  if (sorted) {
    checkIfSorted(key, (Object[]) values);
  }

  checkIfMultiValueField(key, (Object[]) values);

  final String indexFieldName = key.getIndexFieldName();
  final Store stored = key.isStored() ? Store.YES : Store.NO;
  for (final String value : values) {
    if (value == null) {
      continue;
    }
    final String truncatedValue = StringUtils.abbreviate(value, MAX_STRING_LENGTH);
    doc.add(new StringField(indexFieldName, truncatedValue, stored));
  }

  if (sorted && values.length == 1 && values[0] != null) {
    Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING);
    doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0])));
  }
}
 
Example 2
Source File: SimpleDocumentWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private void addToDoc(IndexKey key, byte[]... values){
  Preconditions.checkArgument(key.getValueType() == String.class);
  final boolean sorted = key.isSorted();
  if (sorted) {
    checkIfSorted(key, (Object[]) values);
  }

  checkIfMultiValueField(key, (Object[]) values);

  final String indexFieldName = key.getIndexFieldName();
  final Store stored = key.isStored() ? Store.YES : Store.NO;
  for (final byte[] value : values) {
    if (value == null) {
      continue;
    }
    final BytesRef truncatedValue = new BytesRef(value,0, Math.min(value.length, MAX_STRING_LENGTH));
    doc.add(new StringField(indexFieldName, truncatedValue, stored));
  }

  if (sorted && values.length == 1 && values[0] != null) {
    Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING);
    doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0])));
  }
}
 
Example 3
Source File: NewsToDocument.java    From cqunews-web with Apache License 2.0 6 votes vote down vote up
public static Document newsToDocument(NewsDetailModel news){
	Document document = new Document();
	
	StringField idField = new StringField("id",news.getId(),Store.YES);
	StringField urlField = new StringField("url",news.getUrl(),Store.YES);
	StringField titleField = new StringField("title",news.getTitle(),Store.YES);
	StringField contentField = new StringField("content",news.getContent(),Store.YES);
	StringField timeField = new StringField("time",news.getTime(),Store.YES);
	
	document.add(idField);
	document.add(urlField);
	document.add(titleField);
	document.add(contentField);
	document.add(timeField);

	return document;
}
 
Example 4
Source File: TestOrdValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
  Document d = new Document();
  Field f;
  int scoreAndID = i + 1;

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  customType.setOmitNorms(true);
  
  f = newField(ID_FIELD, id2String(scoreAndID), customType); // for debug purposes
  d.add(f);
  d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));

  FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
  customType2.setOmitNorms(true);
  f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2); // for regular search
  d.add(f);

  f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));

  f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));

  iw.addDocument(d);
  log("added: " + d);
}
 
Example 5
Source File: RowIndexer.java    From sql-layer with GNU Affero General Public License v3.0 5 votes vote down vote up
protected void getKeyBytes(Row row) {
    
    byte[] bytes = row.hKey().hKeyBytes();
    keyEncodedString = encodeBytes(bytes, 0, bytes.length);
    Field field = new StringField(IndexedField.KEY_FIELD, keyEncodedString, Store.YES);
    currentDocument.add(field);
}
 
Example 6
Source File: TokenMapperGeneric.java    From stratio-cassandra with Apache License 2.0 5 votes vote down vote up
/** {@inheritDoc} */
@Override
@SuppressWarnings("unchecked")
public void addFields(Document document, DecoratedKey partitionKey) {
    ByteBuffer bb = factory.toByteArray(partitionKey.getToken());
    String serialized = ByteBufferUtils.toString(bb);
    Field field = new StringField(FIELD_NAME, serialized, Store.YES);
    document.add(field);
}
 
Example 7
Source File: AnchorIndexer.java    From tagme with Apache License 2.0 4 votes vote down vote up
@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		log.info("Loading support datasets...");
		
		File all_anchors = new WikipediaAnchorParser(lang).getFile();
		long numAnchors = ExternalSortUtils.wcl(all_anchors);
		AnchorIterator iterator = new AnchorIterator(all_anchors);
		
		IntSet people = new PeopleWIDs(lang).getDataset();
		
//		IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang));
		IndexSearcher articles = openWikipediaIndex(lang);
		//QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34));
		QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>()));
		
		IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED);
		Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED);
		Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO);
		
		doc.add(fId);
		doc.add(fText);
		doc.add(fObject);
		
//		Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED);
//		Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED);
		
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped");
		plog.setEnd(0, numAnchors);
		plog.start("Support datasets loaded, now parsing...");
		int id=0;
		while(iterator.next())
		{
			plog.update(0, iterator.scroll);
			plog.update(1);
			String anchorText = iterator.anchor;
			
			int freq = freq(iterator.originals, articles, queryParser);
			plog.update(2, iterator.originals.size());
			if (freq == 0) plog.update(4);
			
			Anchor anchorObj = Anchor.build(id, iterator.links, freq, people);
			if (anchorObj == null){
				plog.update(5);
				continue;
			}
			
			String anchorSerial = Anchor.serialize(anchorObj);
			fId.setValue(Integer.toString(++id));
			fText.setValue(anchorText);
			fObject.setValue(anchorSerial);
			
			for(int page : anchorObj){
				Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED);
//				fWID.setBoost(iterator.links.get(page));
				doc.add(fWID);
			}
			for(String original : iterator.originals) {
				doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED));
			}
			
			index.addDocument(doc);
			plog.update(3);
			
			doc.removeFields(FIELD_ORIGINAL);
			doc.removeFields(FIELD_WID);
		}
		plog.stop();
		iterator.close();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		log.info("Done.");
	}
 
Example 8
Source File: TopicIndexer.java    From tagme with Apache License 2.0 4 votes vote down vote up
@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		
		IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang));
		Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset();
		
		IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED);
		Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED);
		Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO);
		Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO);
		doc.add(fWID);
		doc.add(fTitle);
		doc.add(fAbstract);
		doc.add(fBestAnchor);
				
		
		int max = articles.maxDoc();
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest");
		plog.setEnd(max);
		plog.start("Start indexing...");
		
		for(int i=0; i<max; i++)
		{
			plog.update(0);
			Document oldDoc = articles.document(i);
			PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE));
			if (type == PageType.TOPIC)
			{
				int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT));
				fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE));
				
				String bestAnchor = bestAnchorMap.get(wid);
				if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2);
				fBestAnchor.setValue(bestAnchor==null?"":bestAnchor);
				
				String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT);
				if (cats != null) {
					for (int j=0; j<cats.length; j++)
						doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED));
				}
				
				index.addDocument(doc);
				plog.update(1);
				
				doc.removeFields(FIELD_CAT);
			}
		}
		
		plog.stop();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		
		//we cannot call this because the index is still in the temporary dir
		//so TopicDocs will be created using old index
//		log.info("Index Done, now creating WID->DOC_ID map");
//		
//		TopicDocs td = new TopicDocs(lang);
//		td.forceParsing();
		
		log.info("Done.");
	}
 
Example 9
Source File: TestLegacyFieldCache.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testLongFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
  doc.add(field);
  final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final long v;
    switch (random().nextInt(10)) {
      case 0:
        v = Long.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Long.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextLong(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setLongValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, longs.nextDoc());
      assertEquals(values[i], longs.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, longs.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}
 
Example 10
Source File: TestLegacyFieldCache.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testIntFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
  doc.add(field);
  final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final int v;
    switch (random().nextInt(10)) {
      case 0:
        v = Integer.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Integer.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextInt(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setIntValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, ints.nextDoc());
      assertEquals(values[i], ints.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, ints.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}
 
Example 11
Source File: SuperQueryTest.java    From incubator-retired-blur with Apache License 2.0 4 votes vote down vote up
private static IndexableField newStringField(String name, String value) {
  return new StringField(name, value, Store.YES);
}
 
Example 12
Source File: TikaLuceneContentExtractor.java    From cxf with Apache License 2.0 4 votes vote down vote up
private static Field getContentField(final LuceneDocumentMetadata documentMetadata, final String content) {
    return new TextField(documentMetadata.getContentFieldName(), content, Store.YES);
}
 
Example 13
Source File: PartitionKeyMapper.java    From stratio-cassandra with Apache License 2.0 2 votes vote down vote up
/**
 * Adds to the specified {@link Document} the {@link Field}s associated to the specified raw partition key.
 *
 * @param document     The document in which the fields are going to be added.
 * @param partitionKey The raw partition key to be converted.
 */
public void addFields(Document document, DecoratedKey partitionKey) {
    String serializedKey = ByteBufferUtils.toString(partitionKey.getKey());
    Field field = new StringField(FIELD_NAME, serializedKey, Store.YES);
    document.add(field);
}