Java Code Examples for org.apache.lucene.document.Field.Store#YES
The following examples show how to use
org.apache.lucene.document.Field.Store#YES .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: SimpleDocumentWriter.java From dremio-oss with Apache License 2.0 | 6 votes |
private void addToDoc(IndexKey key, String... values){ Preconditions.checkArgument(key.getValueType() == String.class); final boolean sorted = key.isSorted(); if (sorted) { checkIfSorted(key, (Object[]) values); } checkIfMultiValueField(key, (Object[]) values); final String indexFieldName = key.getIndexFieldName(); final Store stored = key.isStored() ? Store.YES : Store.NO; for (final String value : values) { if (value == null) { continue; } final String truncatedValue = StringUtils.abbreviate(value, MAX_STRING_LENGTH); doc.add(new StringField(indexFieldName, truncatedValue, stored)); } if (sorted && values.length == 1 && values[0] != null) { Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING); doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0]))); } }
Example 2
Source File: SimpleDocumentWriter.java From dremio-oss with Apache License 2.0 | 6 votes |
private void addToDoc(IndexKey key, byte[]... values){ Preconditions.checkArgument(key.getValueType() == String.class); final boolean sorted = key.isSorted(); if (sorted) { checkIfSorted(key, (Object[]) values); } checkIfMultiValueField(key, (Object[]) values); final String indexFieldName = key.getIndexFieldName(); final Store stored = key.isStored() ? Store.YES : Store.NO; for (final byte[] value : values) { if (value == null) { continue; } final BytesRef truncatedValue = new BytesRef(value,0, Math.min(value.length, MAX_STRING_LENGTH)); doc.add(new StringField(indexFieldName, truncatedValue, stored)); } if (sorted && values.length == 1 && values[0] != null) { Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING); doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0]))); } }
Example 3
Source File: NewsToDocument.java From cqunews-web with Apache License 2.0 | 6 votes |
public static Document newsToDocument(NewsDetailModel news){ Document document = new Document(); StringField idField = new StringField("id",news.getId(),Store.YES); StringField urlField = new StringField("url",news.getUrl(),Store.YES); StringField titleField = new StringField("title",news.getTitle(),Store.YES); StringField contentField = new StringField("content",news.getContent(),Store.YES); StringField timeField = new StringField("time",news.getTime(),Store.YES); document.add(idField); document.add(urlField); document.add(titleField); document.add(contentField); document.add(timeField); return document; }
Example 4
Source File: TestOrdValues.java From lucene-solr with Apache License 2.0 | 5 votes |
private static void addDoc(RandomIndexWriter iw, int i) throws Exception { Document d = new Document(); Field f; int scoreAndID = i + 1; FieldType customType = new FieldType(TextField.TYPE_STORED); customType.setTokenized(false); customType.setOmitNorms(true); f = newField(ID_FIELD, id2String(scoreAndID), customType); // for debug purposes d.add(f); d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID)))); FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED); customType2.setOmitNorms(true); f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2); // for regular search d.add(f); f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES); // for function scoring d.add(f); d.add(new NumericDocValuesField(INT_FIELD, scoreAndID)); f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES); // for function scoring d.add(f); d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID))); iw.addDocument(d); log("added: " + d); }
Example 5
Source File: RowIndexer.java From sql-layer with GNU Affero General Public License v3.0 | 5 votes |
protected void getKeyBytes(Row row) { byte[] bytes = row.hKey().hKeyBytes(); keyEncodedString = encodeBytes(bytes, 0, bytes.length); Field field = new StringField(IndexedField.KEY_FIELD, keyEncodedString, Store.YES); currentDocument.add(field); }
Example 6
Source File: TokenMapperGeneric.java From stratio-cassandra with Apache License 2.0 | 5 votes |
/** {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public void addFields(Document document, DecoratedKey partitionKey) { ByteBuffer bb = factory.toByteArray(partitionKey.getToken()); String serialized = ByteBufferUtils.toString(bb); Field field = new StringField(FIELD_NAME, serialized, Store.YES); document.add(field); }
Example 7
Source File: AnchorIndexer.java From tagme with Apache License 2.0 | 4 votes |
@Override public void makeIndex(String lang, File workingDir) throws IOException { log.info("Loading support datasets..."); File all_anchors = new WikipediaAnchorParser(lang).getFile(); long numAnchors = ExternalSortUtils.wcl(all_anchors); AnchorIterator iterator = new AnchorIterator(all_anchors); IntSet people = new PeopleWIDs(lang).getDataset(); // IndexSearcher articles = Indexes.getSearcher(RepositoryDirs.WIKIPEDIA.getPath(lang)); IndexSearcher articles = openWikipediaIndex(lang); //QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new WhitespaceAnalyzer(Version.LUCENE_34)); QueryParser queryParser = new QueryParser(Version.LUCENE_34, WikipediaIndexer.FIELD_BODY, new StandardAnalyzer(Version.LUCENE_34, new HashSet<String>())); IndexWriter index = new IndexWriter(FSDirectory.open(workingDir.getAbsoluteFile()), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer())); Document doc = new Document(); Field fId = new Field(FIELD_ID, "", Store.YES, Index.NOT_ANALYZED); Field fText = new Field(FIELD_TEXT, "", Store.YES, Index.NOT_ANALYZED); Field fObject = new Field(FIELD_OBJECT, "", Store.YES, Index.NO); doc.add(fId); doc.add(fText); doc.add(fObject); // Field fOriginal = new Field(FIELD_ORIGINAL, "", Store.YES, Index.ANALYZED); // Field fWID = new Field(FIELD_WID, "", Store.NO, Index.ANALYZED); PLogger plog = new PLogger(log, Step.TEN_MINUTES, "lines", "anchors", "searches", "indexed", "0-freq","dropped"); plog.setEnd(0, numAnchors); plog.start("Support datasets loaded, now parsing..."); int id=0; while(iterator.next()) { plog.update(0, iterator.scroll); plog.update(1); String anchorText = iterator.anchor; int freq = freq(iterator.originals, articles, queryParser); plog.update(2, iterator.originals.size()); if (freq == 0) plog.update(4); Anchor anchorObj = Anchor.build(id, iterator.links, freq, people); if (anchorObj == null){ plog.update(5); continue; } String anchorSerial = Anchor.serialize(anchorObj); fId.setValue(Integer.toString(++id)); fText.setValue(anchorText); fObject.setValue(anchorSerial); for(int page : anchorObj){ Field fWID = new Field(FIELD_WID, Integer.toString(page), Store.YES, Index.NOT_ANALYZED); // fWID.setBoost(iterator.links.get(page)); doc.add(fWID); } for(String original : iterator.originals) { doc.add(new Field(FIELD_ORIGINAL, original, Store.YES, Index.NOT_ANALYZED)); } index.addDocument(doc); plog.update(3); doc.removeFields(FIELD_ORIGINAL); doc.removeFields(FIELD_WID); } plog.stop(); iterator.close(); log.info("Now optimizing..."); index.optimize(); index.close(); log.info("Done."); }
Example 8
Source File: TopicIndexer.java From tagme with Apache License 2.0 | 4 votes |
@Override public void makeIndex(String lang, File workingDir) throws IOException { IndexReader articles = Indexes.getReader(RepositoryDirs.WIKIPEDIA.getPath(lang)); Int2ObjectMap<String> bestAnchorMap = new BestAnchors(lang).getDataset(); IndexWriter index = new IndexWriter(new SimpleFSDirectory(workingDir), new IndexWriterConfig(Version.LUCENE_34, new KeywordAnalyzer())); Document doc = new Document(); Field fWID = new Field(FIELD_WID, "", Store.YES, Index.NOT_ANALYZED); Field fTitle = new Field(FIELD_TITLE, "", Store.YES, Index.NOT_ANALYZED); Field fAbstract = new Field(FIELD_ABSTRACT, "", Store.YES, Index.NO); Field fBestAnchor = new Field(FIELD_BEST_ANCHOR, "", Store.YES, Index.NO); doc.add(fWID); doc.add(fTitle); doc.add(fAbstract); doc.add(fBestAnchor); int max = articles.maxDoc(); PLogger plog = new PLogger(log, Step.TEN_MINUTES, "pages", "indexed", "noBest"); plog.setEnd(max); plog.start("Start indexing..."); for(int i=0; i<max; i++) { plog.update(0); Document oldDoc = articles.document(i); PageType type = PageType.valueOf(oldDoc.get(WikipediaIndexer.FIELD_TYPE)); if (type == PageType.TOPIC) { int wid = Integer.parseInt(oldDoc.get(WikipediaIndexer.FIELD_WID)); fWID.setValue(oldDoc.get(WikipediaIndexer.FIELD_WID)); fAbstract.setValue(oldDoc.get(WikipediaIndexer.FIELD_ABSTRACT)); fTitle.setValue(oldDoc.get(WikipediaIndexer.FIELD_TITLE)); String bestAnchor = bestAnchorMap.get(wid); if (bestAnchor == null || bestAnchor.length() == 0) plog.update(2); fBestAnchor.setValue(bestAnchor==null?"":bestAnchor); String[] cats = oldDoc.getValues(WikipediaIndexer.FIELD_CAT); if (cats != null) { for (int j=0; j<cats.length; j++) doc.add(new Field(FIELD_CAT, cats[j], Store.YES, Index.NOT_ANALYZED)); } index.addDocument(doc); plog.update(1); doc.removeFields(FIELD_CAT); } } plog.stop(); log.info("Now optimizing..."); index.optimize(); index.close(); //we cannot call this because the index is still in the temporary dir //so TopicDocs will be created using old index // log.info("Index Done, now creating WID->DOC_ID map"); // // TopicDocs td = new TopicDocs(lang); // td.forceParsing(); log.info("Done."); }
Example 9
Source File: TestLegacyFieldCache.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testLongFieldCache() throws IOException { Directory dir = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); Document doc = new Document(); LegacyLongField field = new LegacyLongField("f", 0L, Store.YES); doc.add(field); final long[] values = new long[TestUtil.nextInt(random(), 1, 10)]; Set<Integer> missing = new HashSet<>(); for (int i = 0; i < values.length; ++i) { final long v; switch (random().nextInt(10)) { case 0: v = Long.MIN_VALUE; break; case 1: v = 0; break; case 2: v = Long.MAX_VALUE; break; default: v = TestUtil.nextLong(random(), -10, 10); break; } values[i] = v; if (v == 0 && random().nextBoolean()) { // missing iw.addDocument(new Document()); missing.add(i); } else { field.setLongValue(v); iw.addDocument(doc); } } iw.forceMerge(1); final DirectoryReader reader = iw.getReader(); final NumericDocValues longs = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_LONG_PARSER); for (int i = 0; i < values.length; ++i) { if (missing.contains(i) == false) { assertEquals(i, longs.nextDoc()); assertEquals(values[i], longs.longValue()); } } assertEquals(NO_MORE_DOCS, longs.nextDoc()); reader.close(); iw.close(); dir.close(); }
Example 10
Source File: TestLegacyFieldCache.java From lucene-solr with Apache License 2.0 | 4 votes |
public void testIntFieldCache() throws IOException { Directory dir = newDirectory(); IndexWriterConfig cfg = newIndexWriterConfig(new MockAnalyzer(random())); cfg.setMergePolicy(newLogMergePolicy()); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, cfg); Document doc = new Document(); LegacyIntField field = new LegacyIntField("f", 0, Store.YES); doc.add(field); final int[] values = new int[TestUtil.nextInt(random(), 1, 10)]; Set<Integer> missing = new HashSet<>(); for (int i = 0; i < values.length; ++i) { final int v; switch (random().nextInt(10)) { case 0: v = Integer.MIN_VALUE; break; case 1: v = 0; break; case 2: v = Integer.MAX_VALUE; break; default: v = TestUtil.nextInt(random(), -10, 10); break; } values[i] = v; if (v == 0 && random().nextBoolean()) { // missing iw.addDocument(new Document()); missing.add(i); } else { field.setIntValue(v); iw.addDocument(doc); } } iw.forceMerge(1); final DirectoryReader reader = iw.getReader(); final NumericDocValues ints = FieldCache.DEFAULT.getNumerics(getOnlyLeafReader(reader), "f", FieldCache.LEGACY_INT_PARSER); for (int i = 0; i < values.length; ++i) { if (missing.contains(i) == false) { assertEquals(i, ints.nextDoc()); assertEquals(values[i], ints.longValue()); } } assertEquals(NO_MORE_DOCS, ints.nextDoc()); reader.close(); iw.close(); dir.close(); }
Example 11
Source File: SuperQueryTest.java From incubator-retired-blur with Apache License 2.0 | 4 votes |
private static IndexableField newStringField(String name, String value) { return new StringField(name, value, Store.YES); }
Example 12
Source File: TikaLuceneContentExtractor.java From cxf with Apache License 2.0 | 4 votes |
private static Field getContentField(final LuceneDocumentMetadata documentMetadata, final String content) { return new TextField(documentMetadata.getContentFieldName(), content, Store.YES); }
Example 13
Source File: PartitionKeyMapper.java From stratio-cassandra with Apache License 2.0 | 2 votes |
/** * Adds to the specified {@link Document} the {@link Field}s associated to the specified raw partition key. * * @param document The document in which the fields are going to be added. * @param partitionKey The raw partition key to be converted. */ public void addFields(Document document, DecoratedKey partitionKey) { String serializedKey = ByteBufferUtils.toString(partitionKey.getKey()); Field field = new StringField(FIELD_NAME, serializedKey, Store.YES); document.add(field); }