org.apache.lucene.document.Field.Store#YES

Source File: SimpleDocumentWriter.java From dremio-oss with Apache License 2.0

6 votes

private void addToDoc(IndexKey key, String... values){
  Preconditions.checkArgument(key.getValueType() == String.class);
  final boolean sorted = key.isSorted();
  if (sorted) {
    checkIfSorted(key, (Object[]) values);
  }

  checkIfMultiValueField(key, (Object[]) values);

  final String indexFieldName = key.getIndexFieldName();
  final Store stored = key.isStored() ? Store.YES : Store.NO;
  for (final String value : values) {
    if (value == null) {
      continue;
    }
    final String truncatedValue = StringUtils.abbreviate(value, MAX_STRING_LENGTH);
    doc.add(new StringField(indexFieldName, truncatedValue, stored));
  }

  if (sorted && values.length == 1 && values[0] != null) {
    Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING);
    doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0])));
  }
}

Source File: SimpleDocumentWriter.java From dremio-oss with Apache License 2.0

6 votes

private void addToDoc(IndexKey key, byte[]... values){
  Preconditions.checkArgument(key.getValueType() == String.class);
  final boolean sorted = key.isSorted();
  if (sorted) {
    checkIfSorted(key, (Object[]) values);
  }

  checkIfMultiValueField(key, (Object[]) values);

  final String indexFieldName = key.getIndexFieldName();
  final Store stored = key.isStored() ? Store.YES : Store.NO;
  for (final byte[] value : values) {
    if (value == null) {
      continue;
    }
    final BytesRef truncatedValue = new BytesRef(value,0, Math.min(value.length, MAX_STRING_LENGTH));
    doc.add(new StringField(indexFieldName, truncatedValue, stored));
  }

  if (sorted && values.length == 1 && values[0] != null) {
    Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING);
    doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0])));
  }
}

Source File: NewsToDocument.java From cqunews-web with Apache License 2.0

6 votes

public static Document newsToDocument(NewsDetailModel news){
	Document document = new Document();
	
	StringField idField = new StringField("id",news.getId(),Store.YES);
	StringField urlField = new StringField("url",news.getUrl(),Store.YES);
	StringField titleField = new StringField("title",news.getTitle(),Store.YES);
	StringField contentField = new StringField("content",news.getContent(),Store.YES);
	StringField timeField = new StringField("time",news.getTime(),Store.YES);
	
	document.add(idField);
	document.add(urlField);
	document.add(titleField);
	document.add(contentField);
	document.add(timeField);

	return document;
}

Source File: TestOrdValues.java From lucene-solr with Apache License 2.0

5 votes

private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
  Document d = new Document();
  Field f;
  int scoreAndID = i + 1;

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  customType.setOmitNorms(true);
  
  f = newField(ID_FIELD, id2String(scoreAndID), customType); // for debug purposes
  d.add(f);
  d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));

  FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
  customType2.setOmitNorms(true);
  f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2); // for regular search
  d.add(f);

  f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));

  f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));

  iw.addDocument(d);
  log("added: " + d);
}

Source File: RowIndexer.java From sql-layer with GNU Affero General Public License v3.0

5 votes

protected void getKeyBytes(Row row) {
    
    byte[] bytes = row.hKey().hKeyBytes();
    keyEncodedString = encodeBytes(bytes, 0, bytes.length);
    Field field = new StringField(IndexedField.KEY_FIELD, keyEncodedString, Store.YES);
    currentDocument.add(field);
}

Source File: TokenMapperGeneric.java From stratio-cassandra with Apache License 2.0

5 votes

/** {@inheritDoc} */
@Override
@SuppressWarnings("unchecked")
public void addFields(Document document, DecoratedKey partitionKey) {
    ByteBuffer bb = factory.toByteArray(partitionKey.getToken());
    String serialized = ByteBufferUtils.toString(bb);
    Field field = new StringField(FIELD_NAME, serialized, Store.YES);
    document.add(field);
}

Source File: AnchorIndexer.java From tagme with Apache License 2.0

4 votes

@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		log.info("Loading support datasets...");
		
		File all_anchors = new WikipediaAnchorParser(lang).getFile();
		long numAnchors = ExternalSortUtils.wcl(all_anchors);
		AnchorIterator iterator = new AnchorIterator(all_anchors);
		
		IntSet people = new PeopleWIDs(lang).getDataset();
		
//		IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang));
		IndexSearcher articles = openWikipediaIndex(lang);
		//QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34));
		QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>()));
		
		IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED);
		Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED);
		Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO);
		
		doc.add(fId);
		doc.add(fText);
		doc.add(fObject);
		
//		Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED);
//		Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED);
		
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped");
		plog.setEnd(0, numAnchors);
		plog.start("Support datasets loaded, now parsing...");
		int id=0;
		while(iterator.next())
		{
			plog.update(0, iterator.scroll);
			plog.update(1);
			String anchorText = iterator.anchor;
			
			int freq = freq(iterator.originals, articles, queryParser);
			plog.update(2, iterator.originals.size());
			if (freq == 0) plog.update(4);
			
			Anchor anchorObj = Anchor.build(id, iterator.links, freq, people);
			if (anchorObj == null){
				plog.update(5);
				continue;
			}
			
			String anchorSerial = Anchor.serialize(anchorObj);
			fId.setValue(Integer.toString(++id));
			fText.setValue(anchorText);
			fObject.setValue(anchorSerial);
			
			for(int page : anchorObj){
				Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED);
//				fWID.setBoost(iterator.links.get(page));
				doc.add(fWID);
			}
			for(String original : iterator.originals) {
				doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED));
			}
			
			index.addDocument(doc);
			plog.update(3);
			
			doc.removeFields(FIELD_ORIGINAL);
			doc.removeFields(FIELD_WID);
		}
		plog.stop();
		iterator.close();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		log.info("Done.");
	}

Source File: TopicIndexer.java From tagme with Apache License 2.0

4 votes

@Override
	public void makeIndex(String lang, File workingDir) throws IOException
	{
		
		IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang));
		Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset();
		
		IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer()));
		Document doc = new Document();
		Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED);
		Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED);
		Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO);
		Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO);
		doc.add(fWID);
		doc.add(fTitle);
		doc.add(fAbstract);
		doc.add(fBestAnchor);
				
		
		int max = articles.maxDoc();
		PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest");
		plog.setEnd(max);
		plog.start("Start indexing...");
		
		for(int i=0; i<max; i++)
		{
			plog.update(0);
			Document oldDoc = articles.document(i);
			PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE));
			if (type == PageType.TOPIC)
			{
				int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID));
				fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT));
				fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE));
				
				String bestAnchor = bestAnchorMap.get(wid);
				if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2);
				fBestAnchor.setValue(bestAnchor==null?"":bestAnchor);
				
				String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT);
				if (cats != null) {
					for (int j=0; j<cats.length; j++)
						doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED));
				}
				
				index.addDocument(doc);
				plog.update(1);
				
				doc.removeFields(FIELD_CAT);
			}
		}
		
		plog.stop();
		
		log.info("Now optimizing...");
		index.optimize();
		
		index.close();
		
		//we cannot call this because the index is still in the temporary dir
		//so TopicDocs will be created using old index
//		log.info("Index Done, now creating WID->DOC_ID map");
//		
//		TopicDocs td = new TopicDocs(lang);
//		td.forceParsing();
		
		log.info("Done.");
	}

Source File: TestLegacyFieldCache.java From lucene-solr with Apache License 2.0

4 votes

public void testLongFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  LegacyLongField field = new LegacyLongField("f", 0L, Store.YES);
  doc.add(field);
  final long[] values = new long[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final long v;
    switch (random().nextInt(10)) {
      case 0:
        v = Long.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Long.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextLong(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setLongValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, longs.nextDoc());
      assertEquals(values[i], longs.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, longs.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}

Source File: TestLegacyFieldCache.java From lucene-solr with Apache License 2.0

4 votes

public void testIntFieldCache() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random()));
  cfg.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg);
  Document doc = new Document();
  LegacyIntField field = new LegacyIntField("f", 0, Store.YES);
  doc.add(field);
  final int[] values = new int[TestUtil.nextInt(random(), 1, 10)];
  Set<Integer> missing = new HashSet<>();
  for (int i = 0; i < values.length; ++i) {
    final int v;
    switch (random().nextInt(10)) {
      case 0:
        v = Integer.MIN_VALUE;
        break;
      case 1:
        v = 0;
        break;
      case 2:
        v = Integer.MAX_VALUE;
        break;
      default:
        v = TestUtil.nextInt(random(), -10, 10);
        break;
    }
    values[i] = v;
    if (v == 0 && random().nextBoolean()) {
      // missing
      iw.addDocument(new Document());
      missing.add(i);
    } else {
      field.setIntValue(v);
      iw.addDocument(doc);
    }
  }
  iw.forceMerge(1);
  final DirectoryReader reader = iw.getReader();
  final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER);
  for (int i = 0; i < values.length; ++i) {
    if (missing.contains(i) == false) {
      assertEquals(i, ints.nextDoc());
      assertEquals(values[i], ints.longValue());
    }
  }
  assertEquals(NO_MORE_DOCS, ints.nextDoc());
  reader.close();
  iw.close();
  dir.close();
}

Source File: SuperQueryTest.java From incubator-retired-blur with Apache License 2.0

4 votes

private static IndexableField newStringField(String name, String value) {
  return new StringField(name, value, Store.YES);
}

Source File: TikaLuceneContentExtractor.java From cxf with Apache License 2.0

4 votes

private static Field getContentField(final LuceneDocumentMetadata documentMetadata, final String content) {
    return new TextField(documentMetadata.getContentFieldName(), content, Store.YES);
}

Source File: PartitionKeyMapper.java From stratio-cassandra with Apache License 2.0

2 votes

/**
 * Adds to the specified {@link Document} the {@link Field}s associated to the specified raw partition key.
 *
 * @param document     The document in which the fields are going to be added.
 * @param partitionKey The raw partition key to be converted.
 */
public void addFields(Document document, DecoratedKey partitionKey) {
    String serializedKey = ByteBufferUtils.toString(partitionKey.getKey());
    Field field = new StringField(FIELD_NAME, serializedKey, Store.YES);
    document.add(field);
}

Java Code Examples for org.apache.lucene.document.Field.Store#YES