org.apache.lucene.document.StringField Java Examples

The following examples show how to use org.apache.lucene.document.StringField. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TestSpanNotQuery.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testNoPositions() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(new StringField("foo", "bar", Field.Store.NO));
  iw.addDocument(doc);
  
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher is = new IndexSearcher(ir);
  SpanTermQuery query = new SpanTermQuery(new Term("foo", "bar"));
  SpanTermQuery query2 = new SpanTermQuery(new Term("foo", "baz"));

  IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
    is.search(new SpanNotQuery(query, query2), 5);
  });
  assertTrue(expected.getMessage().contains("was indexed without position data"));

  ir.close();
  dir.close();
}
 
Example #2
Source File: TestIndexSorting.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBadDVUpdate() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG));
  iwc.setIndexSort(indexSort);
  IndexWriter w = new IndexWriter(dir, iwc);
  Document doc = new Document();
  doc.add(new StringField("id", new BytesRef("0"), Store.NO));
  doc.add(new NumericDocValuesField("foo", random().nextInt()));
  w.addDocument(doc);
  w.commit();
  IllegalArgumentException exc = expectThrows(IllegalArgumentException.class,
      () -> w.updateDocValues(new Term("id", "0"), new NumericDocValuesField("foo", -1)));
  assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
  exc = expectThrows(IllegalArgumentException.class,
      () -> w.updateNumericDocValue(new Term("id", "0"), "foo", -1));
  assertEquals(exc.getMessage(), "cannot update docvalues field involved in the index sort, field=foo, sort=<long: \"foo\">");
  w.close();
  dir.close();
}
 
Example #3
Source File: TestTaxonomyFacetCounts.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void indexTwoDocs(TaxonomyWriter taxoWriter, IndexWriter indexWriter, FacetsConfig config, boolean withContent) throws Exception {
  for (int i = 0; i < 2; i++) {
    Document doc = new Document();
    if (withContent) {
      doc.add(new StringField("f", "a", Field.Store.NO));
    }
    if (config != null) {
      doc.add(new FacetField("A", Integer.toString(i)));
      indexWriter.addDocument(config.build(taxoWriter, doc));
    } else {
      indexWriter.addDocument(doc);
    }
  }
  
  indexWriter.commit();
}
 
Example #4
Source File: SimpleDocumentWriter.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
private void addToDoc(IndexKey key, byte[]... values){
  Preconditions.checkArgument(key.getValueType() == String.class);
  final boolean sorted = key.isSorted();
  if (sorted) {
    checkIfSorted(key, (Object[]) values);
  }

  checkIfMultiValueField(key, (Object[]) values);

  final String indexFieldName = key.getIndexFieldName();
  final Store stored = key.isStored() ? Store.YES : Store.NO;
  for (final byte[] value : values) {
    if (value == null) {
      continue;
    }
    final BytesRef truncatedValue = new BytesRef(value,0, Math.min(value.length, MAX_STRING_LENGTH));
    doc.add(new StringField(indexFieldName, truncatedValue, stored));
  }

  if (sorted && values.length == 1 && values[0] != null) {
    Preconditions.checkArgument(key.getSortedValueType() == SearchFieldSorting.FieldType.STRING);
    doc.add(new SortedDocValuesField(indexFieldName, new BytesRef(values[0])));
  }
}
 
Example #5
Source File: LuceneWorkflowInstanceRepository.java    From oodt with Apache License 2.0 6 votes vote down vote up
private void addInstanceMetadataToDoc(Document doc, Metadata met) {
    if (met != null && met.getMap().keySet().size() > 0) {
        for (String metKey : met.getMap().keySet()) {
            List metVals = met.getAllMetadata(metKey);
            if (metVals != null && metVals.size() > 0) {
                for (Object metVal1 : metVals) {
                    String metVal = (String) metVal1;
                    doc.add(new Field(metKey, metVal, StringField.TYPE_STORED));
                }

                // now index the field name so that we can use it to
                // look it up when converting from doc to
                // WorkflowInstance
                doc.add(new Field("workflow_inst_met_flds", metKey,
                        StringField.TYPE_STORED));

            }
        }
    }
}
 
Example #6
Source File: TestLuceneIndexer.java    From dremio-oss with Apache License 2.0 6 votes vote down vote up
@Test(expected = StaleSearcherException.class)
public void testSearcherCacheTTL() throws Exception {
  try (LuceneSearchIndex index = new LuceneSearchIndex(null, "multithreaded-search", true, CommitWrapper.NO_OP, 500)) {
    for (int i = 0; i < 10; ++i) {
      final Document doc = new Document();
      doc.add(
          new StringField(CoreIndexedStore.ID_FIELD_NAME, new BytesRef(Integer.toString(i).getBytes()), Store.YES));
      doc.add(new StringField("user", "u1", Field.Store.YES));
      index.add(doc);
    }

    Query query = new TermQuery(new Term("user", "u1"));
    LuceneSearchIndex.SearchHandle searchHandle = index.createSearchHandle();
    List<Doc> docs = index.search(searchHandle, query, 4, new Sort(), 0);
    assertEquals(4, docs.size());

    // sleep to force cache expiry.
    Thread.sleep(1000);

    docs = index.searchAfter(searchHandle, query, 6, new Sort(), docs.get(3));
    assertEquals(6, docs.size());

    searchHandle.close();
  }
}
 
Example #7
Source File: TestSoftDeletesRetentionMergePolicy.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testSoftDeleteWithTryUpdateDocValue() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField("soft_delete")
      .setMergePolicy(new SoftDeletesRetentionMergePolicy("soft_delete", MatchAllDocsQuery::new, newLogMergePolicy()));
  IndexWriter writer = new IndexWriter(dir, config);
  SearcherManager sm = new SearcherManager(writer, new SearcherFactory());
  Document d = new Document();
  d.add(new StringField("id", "0", Field.Store.YES));
  writer.addDocument(d);
  sm.maybeRefreshBlocking();
  doUpdate(new Term("id", "0"), writer,
      new NumericDocValuesField("soft_delete", 1), new NumericDocValuesField("other-field", 1));
  sm.maybeRefreshBlocking();
  assertEquals(1, writer.cloneSegmentInfos().size());
  SegmentCommitInfo si = writer.cloneSegmentInfos().info(0);
  assertEquals(1, si.getSoftDelCount());
  assertEquals(1, si.info.maxDoc());
  IOUtils.close(sm, writer, dir);
}
 
Example #8
Source File: InMemoryIndex.java    From SnowGraph with Apache License 2.0 6 votes vote down vote up
public InMemoryIndex(Map<String,String> id2Text){
    Analyzer analyzer = new EnglishAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    try {
        IndexWriter writer = new IndexWriter(directory, iwc);
        for (String id:id2Text.keySet()) {
            Document doc=new Document();
            doc.add(new StringField("id", id, Field.Store.YES));
            doc.add(new TextField("content", id2Text.get(id), Field.Store.YES));
            writer.addDocument(doc);
        }
        writer.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example #9
Source File: TestMultiTermsEnum.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testNoTermsInField() throws Exception {
  Directory directory = new ByteBuffersDirectory();
  IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())));
  Document document = new Document();
  document.add(new StringField("deleted", "0", Field.Store.YES));
  writer.addDocument(document);

  DirectoryReader reader = DirectoryReader.open(writer);
  writer.close();

  Directory directory2 = new ByteBuffersDirectory();
  writer = new IndexWriter(directory2, new IndexWriterConfig(new MockAnalyzer(random())));
  
  List<LeafReaderContext> leaves = reader.leaves();
  CodecReader[] codecReaders = new CodecReader[leaves.size()];
  for (int i = 0; i < leaves.size(); i++) {
    codecReaders[i] = new MigratingCodecReader((CodecReader) leaves.get(i).reader());
  }

  writer.addIndexes(codecReaders); // <- bang

  IOUtils.close(writer, reader, directory);
}
 
Example #10
Source File: TestNumericDocValuesUpdates.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testUpdateAllDeletedSegment() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  Document doc = new Document();
  doc.add(new StringField("id", "doc", Store.NO));
  doc.add(new NumericDocValuesField("f1", 1L));
  writer.addDocument(doc);
  writer.addDocument(doc);
  writer.commit();
  writer.deleteDocuments(new Term("id", "doc")); // delete all docs in the first segment
  writer.addDocument(doc);
  writer.updateNumericDocValue(new Term("id", "doc"), "f1", 2L);
  writer.close();
  
  DirectoryReader reader = DirectoryReader.open(dir);
  assertEquals(1, reader.leaves().size());
  NumericDocValues dvs = reader.leaves().get(0).reader().getNumericDocValues("f1");
  assertEquals(0, dvs.nextDoc());
  assertEquals(2, dvs.longValue());
  
  reader.close();
  
  dir.close();
}
 
Example #11
Source File: TestMemoryIndex.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@Test
public void testBuildFromDocument() {

  Document doc = new Document();
  doc.add(new TextField("field1", "some text", Field.Store.NO));
  doc.add(new TextField("field1", "some more text", Field.Store.NO));
  doc.add(new StringField("field2", "untokenized text", Field.Store.NO));

  analyzer.setPositionIncrementGap(100);

  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);

  assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
  assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
  assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));

  assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
  assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
  assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));

}
 
Example #12
Source File: TestDocValuesStatsCollector.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testOneDoc() throws IOException {
  try (Directory dir = newDirectory();
      IndexWriter indexWriter = new IndexWriter(dir, newIndexWriterConfig())) {
    String field = "numeric";
    Document doc = new Document();
    doc.add(new NumericDocValuesField(field, 1));
    doc.add(new StringField("id", "doc1", Store.NO));
    indexWriter.addDocument(doc);

    try (DirectoryReader reader = DirectoryReader.open(indexWriter)) {
      IndexSearcher searcher = new IndexSearcher(reader);
      LongDocValuesStats stats = new LongDocValuesStats(field);
      searcher.search(new MatchAllDocsQuery(), new DocValuesStatsCollector(stats));

      assertEquals(1, stats.count());
      assertEquals(0, stats.missing());
      assertEquals(1, stats.max().longValue());
      assertEquals(1, stats.min().longValue());
      assertEquals(1, stats.sum().longValue());
      assertEquals(1, stats.mean(), 0.0001);
      assertEquals(0, stats.variance(), 0.0001);
      assertEquals(0, stats.stdev(), 0.0001);
    }
  }
}
 
Example #13
Source File: TestFieldCacheSort.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** test that we throw exception on multi-valued field, creates corrupt reader, use SORTED_SET instead */
public void testMultiValuedField() throws IOException {
  Directory indexStore = newDirectory();
  IndexWriter writer = new IndexWriter(indexStore, newIndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0; i<5; i++) {
      Document doc = new Document();
      doc.add(new StringField("string", "a"+i, Field.Store.NO));
      doc.add(new StringField("string", "b"+i, Field.Store.NO));
      writer.addDocument(doc);
  }
  writer.forceMerge(1); // enforce one segment to have a higher unique term count in all cases
  writer.close();
  Sort sort = new Sort(
      new SortField("string", SortField.Type.STRING),
      SortField.FIELD_DOC);
  IndexReader reader = UninvertingReader.wrap(DirectoryReader.open(indexStore),
                       Collections.singletonMap("string", Type.SORTED));
  IndexSearcher searcher = new IndexSearcher(reader);
  expectThrows(IllegalStateException.class, () -> {
    searcher.search(new MatchAllDocsQuery(), 500, sort);
  });
  reader.close();
  indexStore.close();
}
 
Example #14
Source File: AnalyzingInfixSuggester.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
  String textString = text.utf8ToString();
  Document doc = new Document();
  FieldType ft = getTextFieldType();
  doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
  if (minPrefixChars>0) {
    doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
  }
  doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
  doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
  doc.add(new NumericDocValuesField("weight", weight));
  if (payload != null) {
    doc.add(new BinaryDocValuesField("payloads", payload));
  }
  if (contexts != null) {
    for(BytesRef context : contexts) {
      doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
      doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
    }
  }
  return doc;
}
 
Example #15
Source File: TestEmptyTokenStream.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIndexWriter_LUCENE4656() throws IOException {
  Directory directory = newDirectory();
  IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(null));

  TokenStream ts = new EmptyTokenStream();
  assertFalse(ts.hasAttribute(TermToBytesRefAttribute.class));

  Document doc = new Document();
  doc.add(new StringField("id", "0", Field.Store.YES));
  doc.add(new TextField("description", ts));
  
  // this should not fail because we have no TermToBytesRefAttribute
  writer.addDocument(doc);
  
  assertEquals(1, writer.getDocStats().numDocs);

  writer.close();
  directory.close();
}
 
Example #16
Source File: FacetsConfig.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void processSSDVFacetFields(Map<String,List<SortedSetDocValuesFacetField>> byField, Document doc) throws IOException {
  //System.out.println("process SSDV: " + byField);
  for(Map.Entry<String,List<SortedSetDocValuesFacetField>> ent : byField.entrySet()) {

    String indexFieldName = ent.getKey();
    //System.out.println("  field=" + indexFieldName);

    for(SortedSetDocValuesFacetField facetField : ent.getValue()) {
      FacetLabel cp = new FacetLabel(facetField.dim, facetField.label);
      String fullPath = pathToString(cp.components, cp.length);
      //System.out.println("add " + fullPath);

      // For facet counts:
      doc.add(new SortedSetDocValuesField(indexFieldName, new BytesRef(fullPath)));

      // For drill-down:
      doc.add(new StringField(indexFieldName, fullPath, Field.Store.NO));

      FacetsConfig.DimConfig ft = getDimConfig(facetField.dim);        
      if (ft.requireDimensionDrillDown) {
        doc.add(new StringField(indexFieldName, facetField.dim, Field.Store.NO));
      }
    }
  }
}
 
Example #17
Source File: TestNumericDocValuesUpdates.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testUpdateNumericDVFieldWithSameNameAsPostingField() throws Exception {
  // this used to fail because FieldInfos.Builder neglected to update
  // globalFieldMaps.docValuesTypes map
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter writer = new IndexWriter(dir, conf);
  
  Document doc = new Document();
  doc.add(new StringField("f", "mock-value", Store.NO));
  doc.add(new NumericDocValuesField("f", 5));
  writer.addDocument(doc);
  writer.commit();
  writer.updateNumericDocValue(new Term("f", "mock-value"), "f", 17L);
  writer.close();
  
  DirectoryReader r = DirectoryReader.open(dir);
  NumericDocValues ndv = r.leaves().get(0).reader().getNumericDocValues("f");
  assertEquals(0, ndv.nextDoc());
  assertEquals(17, ndv.longValue());
  r.close();
  
  dir.close();
}
 
Example #18
Source File: StrategyTestCase.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
protected List<Document> getDocuments(Iterator<SpatialTestData> sampleData) {
  List<Document> documents = new ArrayList<>();
  while (sampleData.hasNext()) {
    SpatialTestData data = sampleData.next();
    Document document = new Document();
    document.add(new StringField("id", data.id, Field.Store.YES));
    document.add(new StringField("name", data.name, Field.Store.YES));
    Shape shape = data.shape;
    shape = convertShapeFromGetDocuments(shape);
    if (shape != null) {
      for (Field f : strategy.createIndexableFields(shape)) {
        document.add(f);
      }
      if (storeShape)//just for diagnostics
        document.add(new StoredField(strategy.getFieldName(), shape.toString()));
    }

    documents.add(document);
  }
  return documents;
}
 
Example #19
Source File: DocumentBuilder.java    From modernmt with Apache License 2.0 6 votes vote down vote up
public static Document newInstance(UUID owner, long memory, LanguageDirection direction, Reader contentReader) {
    Document document = new Document();
    document.add(new StringField(DOC_ID_FIELD, makeId(memory, direction), Field.Store.NO));
    document.add(new LongField(MEMORY_FIELD, memory, Field.Store.YES));

    if (owner != null) {
        document.add(new LongField(OWNER_MSB_FIELD, owner.getMostSignificantBits(), Field.Store.NO));
        document.add(new LongField(OWNER_LSB_FIELD, owner.getLeastSignificantBits(), Field.Store.NO));
    } else {
        document.add(new LongField(OWNER_MSB_FIELD, 0L, Field.Store.NO));
        document.add(new LongField(OWNER_LSB_FIELD, 0L, Field.Store.NO));
    }

    document.add(new CorpusContentField(makeContentFieldName(direction), contentReader));

    return document;
}
 
Example #20
Source File: TestSimpleIndexManager.java    From nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testMultipleWritersSimultaneouslySameIndex() throws IOException {
    final StandardIndexManager mgr = new StandardIndexManager(new RepositoryConfiguration());
    final File dir = new File("target/" + UUID.randomUUID().toString());
    try {
        final EventIndexWriter writer1 = mgr.borrowIndexWriter(dir);
        final EventIndexWriter writer2 = mgr.borrowIndexWriter(dir);

        final Document doc1 = new Document();
        doc1.add(new StringField("id", "1", Store.YES));

        final Document doc2 = new Document();
        doc2.add(new StringField("id", "2", Store.YES));

        writer1.index(doc1, 1000);
        writer2.index(doc2, 1000);
        mgr.returnIndexWriter(writer2);
        mgr.returnIndexWriter(writer1);

        final EventIndexSearcher searcher = mgr.borrowIndexSearcher(dir);
        final TopDocs topDocs = searcher.getIndexSearcher().search(new MatchAllDocsQuery(), 2);
        assertEquals(2, topDocs.totalHits.value);
        mgr.returnIndexSearcher(searcher);
    } finally {
        FileUtils.deleteFile(dir, true);
    }
}
 
Example #21
Source File: IndexDBO_classes.java    From NLIWOD with GNU Affero General Public License v3.0 5 votes vote down vote up
private void addDocumentToIndex(final Resource resource, final String predicate, final String object) throws IOException {
	Document doc = new Document();
	doc.add(new StringField(FIELD_NAME_SUBJECT, resource.getURI(), Store.YES));
	doc.add(new StringField(FIELD_NAME_PREDICATE, predicate, Store.YES));
	doc.add(new TextField(FIELD_NAME_OBJECT, object, Store.YES));
	iwriter.addDocument(doc);
}
 
Example #22
Source File: TRECCCDocumentIndexer.java    From lucene4ir with Apache License 2.0 5 votes vote down vote up
private void initFields() {
    docnumField = new StringField(Lucene4IRConstants.FIELD_DOCNUM, "", Field.Store.YES);
    if (indexPositions) {
        titleField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES);
        textField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES);
        allField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES);
        authorField = new TermVectorEnabledTextField(Lucene4IRConstants.FIELD_AUTHOR, "", Field.Store.YES);
    } else {
        titleField = new TextField(Lucene4IRConstants.FIELD_TITLE, "", Field.Store.YES);
        textField = new TextField(Lucene4IRConstants.FIELD_CONTENT, "", Field.Store.YES);
        allField = new TextField(Lucene4IRConstants.FIELD_ALL, "", Field.Store.YES);
        authorField = new TextField(Lucene4IRConstants.FIELD_AUTHOR, "", Field.Store.YES);
    }
}
 
Example #23
Source File: UserDocumentTransformer.java    From gravitee-management-rest-api with Apache License 2.0 5 votes vote down vote up
@Override
public Document transform(UserEntity user) {
    Document doc = new Document();

    doc.add(new StringField(FIELD_ID, user.getId(), Field.Store.YES));
    doc.add(new StringField(FIELD_TYPE, FIELD_TYPE_VALUE, Field.Store.YES));
    if (user.getSource() != null) {
        doc.add(new StringField(FIELD_SOURCE, user.getSource(), Field.Store.NO));
    }
    if (user.getSourceId() != null) {
        doc.add(new StringField(FIELD_REFERENCE, user.getSourceId(), Field.Store.NO));
    }

    if (user.getDisplayName() != null) {
        doc.add(new StringField(FIELD_DISPLAYNAME, user.getDisplayName(), Field.Store.NO));
        doc.add(new TextField(FIELD_DISPLAYNAME_SPLIT, user.getDisplayName(), Field.Store.NO));
    }
    if (user.getFirstname() != null) {
        doc.add(new StringField(FIELD_FIRSTNAME, user.getFirstname(), Field.Store.NO));
    }

    if (user.getLastname() != null) {
        doc.add(new StringField(FIELD_LASTNAME, user.getLastname(), Field.Store.NO));
    }

    if (user.getEmail() != null) {
        // For security reasons, we remove the domain part of the email
        doc.add(new StringField(FIELD_EMAIL, user.getEmail().substring(0, user.getEmail().indexOf('@')), Field.Store.NO));
    }

    return doc;
}
 
Example #24
Source File: TripleIndexCreatorContext.java    From AGDISTIS with GNU Affero General Public License v3.0 5 votes vote down vote up
private void addDocumentToIndex(String subject, String predicate, String object, boolean isUri) throws IOException {
	log.info("here again");
	List<Triple> triples = new ArrayList<>();

	try {
		triples = search(subject, null, null, 100);
	} catch (Exception e) {
	}
	if (triples.size() == 0) {
		Document doc = new Document();
		log.debug(subject + " " + predicate + " " + object);
		doc.add(new StringField(FIELD_NAME_URI, subject, Store.YES));
		doc.add(new TextField(FIELD_NAME_SURFACE_FORM, object, Store.YES));
		doc.add(new TextField(FIELD_NAME_URI_COUNT, "1", Store.YES));
		doc.add(new TextField(FIELD_NAME_CONTEXT, object, Store.YES));
		iwriter.addDocument(doc);
	} else {
		String docID = triples.get(0).subject;
		log.info(triples.toString());
		if (isUri) {
			if (endpoint.isEmpty()) {
				log.info("endpoint empty");
				object = object.replace(nodeType, "");
			} else {
				object = sparql(subject);
				log.info("endpoint working");
			}
		}
		String remainContext = triples.get(0).object.concat(" " + object);
		log.info(remainContext);
		Document hitDoc = isearcher.doc(Integer.parseInt(docID));
		Document newDoc = new Document();
		newDoc.add(new StringField(FIELD_NAME_URI, triples.get(0).predicate, Store.YES));
		newDoc.add(new TextField(FIELD_NAME_SURFACE_FORM, hitDoc.get(FIELD_NAME_SURFACE_FORM), Store.YES));
		newDoc.add(new TextField(FIELD_NAME_URI_COUNT, "1", Store.YES));
		newDoc.add(new TextField(FIELD_NAME_CONTEXT, remainContext, Store.YES));
		iwriter.updateDocument(new Term(FIELD_NAME_URI, subject), newDoc);
	}

}
 
Example #25
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testPreventAddingIndexesWithDifferentSoftDeletesField() throws Exception {
  Directory dir1 = newDirectory();
  IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig().setSoftDeletesField("soft_deletes_1"));
  for (int i = 0; i < 2; i++) {
    Document d = new Document();
    d.add(new StringField("id", "1", Field.Store.YES));
    d.add(new StringField("version", Integer.toString(i), Field.Store.YES));
    w1.softUpdateDocument(new Term("id", "1"), d, new NumericDocValuesField("soft_deletes_1", 1));
  }
  w1.commit();
  w1.close();

  Directory dir2 = newDirectory();
  IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig().setSoftDeletesField("soft_deletes_2"));
  IllegalArgumentException error = expectThrows(IllegalArgumentException.class, () -> w2.addIndexes(dir1));
  assertEquals("cannot configure [soft_deletes_2] as soft-deletes; this index uses [soft_deletes_1] as soft-deletes already",
      error.getMessage());
  w2.close();

  Directory dir3 = newDirectory();
  IndexWriterConfig config = newIndexWriterConfig().setSoftDeletesField("soft_deletes_1");
  IndexWriter w3 = new IndexWriter(dir3, config);
  w3.addIndexes(dir1);
  for (SegmentCommitInfo si : w3.cloneSegmentInfos()) {
    FieldInfo softDeleteField = IndexWriter.readFieldInfos(si).fieldInfo("soft_deletes_1");
    assertTrue(softDeleteField.isSoftDeletesField());
  }
  w3.close();
  IOUtils.close(dir1, dir2, dir3);
}
 
Example #26
Source File: StringFieldTypeDefinition.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<? extends Field> getFieldsForColumn(String family, Column column) {
  String name = getName(family, column.getName());
  Field field = new Field(name, column.getValue(), StringField.TYPE_STORED);
  if (isSortEnable()) {
    return addSort(column, name, field);
  }
  return makeIterable(field);
}
 
Example #27
Source File: SearchEquivalenceTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  Random random = random();
  directory = newDirectory();
  stopword = "" + randomChar();
  CharacterRunAutomaton stopset = new CharacterRunAutomaton(Automata.makeString(stopword));
  analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false, stopset);
  RandomIndexWriter iw = new RandomIndexWriter(random, directory, analyzer);
  Document doc = new Document();
  Field id = new StringField("id", "", Field.Store.NO);
  Field field = new TextField("field", "", Field.Store.NO);
  doc.add(id);
  doc.add(field);
  
  // index some docs
  int numDocs = TEST_NIGHTLY ? atLeast(1000) : atLeast(100);
  for (int i = 0; i < numDocs; i++) {
    id.setStringValue(Integer.toString(i));
    field.setStringValue(randomFieldContents());
    iw.addDocument(doc);
  }
  
  // delete some docs
  int numDeletes = numDocs/20;
  for (int i = 0; i < numDeletes; i++) {
    Term toDelete = new Term("id", Integer.toString(random.nextInt(numDocs)));
    if (random.nextBoolean()) {
      iw.deleteDocuments(toDelete);
    } else {
      iw.deleteDocuments(new TermQuery(toDelete));
    }
  }
  
  reader = iw.getReader();
  s1 = newSearcher(reader);
  s2 = newSearcher(reader);
  iw.close();
}
 
Example #28
Source File: TestTermQueryPrefixGridStrategy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Test
  public void testNGramPrefixGridLosAngeles() throws IOException {
    SpatialContext ctx = SpatialContext.GEO;
    TermQueryPrefixTreeStrategy prefixGridStrategy = new TermQueryPrefixTreeStrategy(new QuadPrefixTree(ctx), "geo");

    Shape point = ctx.makePoint(-118.243680, 34.052230);

    Document losAngeles = new Document();
    losAngeles.add(new StringField("name", "Los Angeles", Field.Store.YES));
    for (Field field : prefixGridStrategy.createIndexableFields(point)) {
      losAngeles.add(field);
    }
    losAngeles.add(new StoredField(prefixGridStrategy.getFieldName(), point.toString()));//just for diagnostics

    addDocumentsAndCommit(Arrays.asList(losAngeles));

    // This won't work with simple spatial context...
    SpatialArgsParser spatialArgsParser = new SpatialArgsParser();
    // TODO... use a non polygon query
//    SpatialArgs spatialArgs = spatialArgsParser.parse(
//        "Intersects(POLYGON((-127.00390625 39.8125,-112.765625 39.98828125,-111.53515625 31.375,-125.94921875 30.14453125,-127.00390625 39.8125)))",
//        new SimpleSpatialContext());

//    Query query = prefixGridStrategy.makeQuery(spatialArgs, fieldInfo);
//    SearchResults searchResults = executeQuery(query, 1);
//    assertEquals(1, searchResults.numFound);
  }
 
Example #29
Source File: TestClassicSimilarity.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  directory = newDirectory();
  try (IndexWriter indexWriter = new IndexWriter(directory, newIndexWriterConfig())) {
    Document document = new Document();
    document.add(new StringField("test", "hit", Store.NO));
    indexWriter.addDocument(document);
    indexWriter.commit();
  }
  indexReader = DirectoryReader.open(directory);
  indexSearcher = newSearcher(indexReader);
  indexSearcher.setSimilarity(new ClassicSimilarity());
}
 
Example #30
Source File: BaseDocValuesFormatTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSortedMergeAwayAllValuesLargeSegment() throws IOException {
  Directory directory = newDirectory();
  Analyzer analyzer = new MockAnalyzer(random());
  IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
  iwconfig.setMergePolicy(newLogMergePolicy());
  RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

  Document doc = new Document();
  doc.add(new StringField("id", "1", Field.Store.NO));
  doc.add(new SortedDocValuesField("field", new BytesRef("hello")));
  iwriter.addDocument(doc);
  final int numEmptyDocs = atLeast(1024);
  for (int i = 0; i < numEmptyDocs; ++i) {
    iwriter.addDocument(new Document());
  }
  iwriter.commit();
  iwriter.deleteDocuments(new Term("id", "1"));
  iwriter.forceMerge(1);

  DirectoryReader ireader = iwriter.getReader();
  iwriter.close();

  SortedDocValues dv = getOnlyLeafReader(ireader).getSortedDocValues("field");
  assertEquals(NO_MORE_DOCS, dv.nextDoc());

  ireader.close();
  directory.close();
}