Java Code Examples for org.apache.lucene.document.Document#add()

The following examples show how to use org.apache.lucene.document.Document#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LuceneWorkflowInstanceRepository.java    From oodt with Apache License 2.0 6 votes vote down vote up
private void addTasksToDoc(Document doc, List tasks) {
    if (tasks != null && tasks.size() > 0) {
        for (Object task1 : tasks) {
            WorkflowTask task = (WorkflowTask) task1;
            doc.add(new Field("task_id", task.getTaskId(), StringField.TYPE_STORED));
            doc.add(new Field("task_name", task.getTaskName(),
                    StringField.TYPE_STORED));
            doc.add(new Field("task_order",
                String.valueOf(task.getOrder()), StringField.TYPE_STORED));
            doc.add(new Field("task_class",
                task.getTaskInstanceClassName(), StringField.TYPE_STORED));

            addConditionsToDoc(task.getTaskId(), task.getConditions(), doc);
            addTaskConfigToDoc(task.getTaskId(), task.getTaskConfig(), doc);
        }
    }
}
 
Example 2
Source File: TestBlockPostingsFormat.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
  Directory d = newDirectory();
  IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
  for(int i=0;i<25;i++) {
    Document doc = new Document();
    doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
    doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
    w.addDocument(doc);
  }
  w.forceMerge(1);

  DirectoryReader r = DirectoryReader.open(w);
  assertEquals(1, r.leaves().size());
  FieldReader field = (FieldReader) r.leaves().get(0).reader().terms("field");
  // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
  Stats stats = field.getStats();
  assertEquals(0, stats.floorBlockCount);
  assertEquals(2, stats.nonFloorBlockCount);
  r.close();
  w.close();
  d.close();
}
 
Example 3
Source File: TestDocValuesIndexing.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testAddBinaryTwice() throws IOException {
  Analyzer analyzer = new MockAnalyzer(random());

  Directory directory = newDirectory();
  // we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
  IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
  iwc.setMergePolicy(newLogMergePolicy());
  IndexWriter iwriter = new IndexWriter(directory, iwc);
  Document doc = new Document();
  doc.add(new BinaryDocValuesField("dv", new BytesRef("foo!")));
  iwriter.addDocument(doc);
  
  doc.add(new BinaryDocValuesField("dv", new BytesRef("bar!")));
  expectThrows(IllegalArgumentException.class, () -> {
    iwriter.addDocument(doc);
  });
  
  IndexReader ir = iwriter.getReader();
  assertEquals(1, ir.numDocs());
  ir.close();
  
  iwriter.close();
  directory.close();
}
 
Example 4
Source File: SpatialPrefixTreeTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
/**
 * A PrefixTree pruning optimization gone bad, applicable when optimize=true.
 * See <a href="https://issues.apache.org/jira/browse/LUCENE-4770">LUCENE-4770</a>.
 */
@Test
public void testBadPrefixTreePrune() throws Exception {

  trie = new QuadPrefixTree(ctx, 12);
  TermQueryPrefixTreeStrategy strategy = new TermQueryPrefixTreeStrategy(trie, "geo");
  Document doc = new Document();
  doc.add(new TextField("id", "1", Store.YES));

  Shape area = ctx.makeRectangle(-122.82, -122.78, 48.54, 48.56);

  Field[] fields = strategy.createIndexableFields(area, 0.025);
  for (Field field : fields) {
    doc.add(field);
  }
  addDocument(doc);

  Point upperleft = ctx.makePoint(-122.88, 48.54);
  Point lowerright = ctx.makePoint(-122.82, 48.62);

  Query query = strategy.makeQuery(new SpatialArgs(SpatialOperation.Intersects, ctx.makeRectangle(upperleft, lowerright)));

  commit();

  TopDocs search = indexSearcher.search(query, 10);
  ScoreDoc[] scoreDocs = search.scoreDocs;
  for (ScoreDoc scoreDoc : scoreDocs) {
    System.out.println(indexSearcher.doc(scoreDoc.doc));
  }

  assertEquals(1, search.totalHits.value);
}
 
Example 5
Source File: TestIndexWriterExceptions.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testExceptionOnMergeInit() throws IOException {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()))
    .setMaxBufferedDocs(2)
    .setMergePolicy(newLogMergePolicy());
  ConcurrentMergeScheduler cms = new ConcurrentMergeScheduler();
  cms.setSuppressExceptions();
  conf.setMergeScheduler(cms);
  ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2);
  TestPoint3 testPoint = new TestPoint3();
  IndexWriter w = RandomIndexWriter.mockIndexWriter(random(), dir, conf, testPoint);
  testPoint.doFail = true;
  Document doc = new Document();
  doc.add(newTextField("field", "a field", Field.Store.YES));
  for(int i=0;i<10;i++) {
    try {
      w.addDocument(doc);
    } catch (RuntimeException re) {
      break;
    }
  }

  try {
    ((ConcurrentMergeScheduler) w.getConfig().getMergeScheduler()).sync();
  } catch (IllegalStateException ise) {
    // OK: merge exc causes tragedy
  }
  assertTrue(testPoint.failed);
  w.close();
  dir.close();
}
 
Example 6
Source File: TestTieredMergePolicy.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testForceMergeDeletes() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
  TieredMergePolicy tmp = newTieredMergePolicy();
  conf.setMergePolicy(tmp);
  conf.setMaxBufferedDocs(4);
  tmp.setMaxMergeAtOnce(100);
  tmp.setSegmentsPerTier(100);
  tmp.setDeletesPctAllowed(50.0);
  tmp.setForceMergeDeletesPctAllowed(30.0);
  IndexWriter w = new IndexWriter(dir, conf);
  for(int i=0;i<80;i++) {
    Document doc = new Document();
    doc.add(newTextField("content", "aaa " + (i%4), Field.Store.NO));
    w.addDocument(doc);
  }
  assertEquals(80, w.getDocStats().maxDoc);
  assertEquals(80, w.getDocStats().numDocs);

  if (VERBOSE) {
    System.out.println("\nTEST: delete docs");
  }
  w.deleteDocuments(new Term("content", "0"));
  w.forceMergeDeletes();

  assertEquals(80, w.getDocStats().maxDoc);
  assertEquals(60, w.getDocStats().numDocs);

  if (VERBOSE) {
    System.out.println("\nTEST: forceMergeDeletes2");
  }
  ((TieredMergePolicy) w.getConfig().getMergePolicy()).setForceMergeDeletesPctAllowed(10.0);
  w.forceMergeDeletes();
  assertEquals(60, w.getDocStats().maxDoc);
  assertEquals(60, w.getDocStats().numDocs);
  w.close();
  dir.close();
}
 
Example 7
Source File: TestIndexWriterExceptions.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testExceptionJustBeforeFlush() throws IOException {
  Directory dir = newDirectory();

  final AtomicBoolean doCrash = new AtomicBoolean();

  Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
    @Override
    public TokenStreamComponents createComponents(String fieldName) {
      MockTokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
      tokenizer.setEnableChecks(false); // disable workflow checking as we forcefully close() in exceptional cases.
      TokenStream stream = tokenizer;
      if (doCrash.get()) {
        stream = new CrashingFilter(fieldName, stream);
      }
      return new TokenStreamComponents(tokenizer, stream);
    }
  };

  IndexWriter w = RandomIndexWriter.mockIndexWriter(random(), dir, 
                                                    newIndexWriterConfig(analyzer)
                                                      .setMaxBufferedDocs(2), 
                                                    new TestPoint1());
  Document doc = new Document();
  doc.add(newTextField("field", "a field", Field.Store.YES));
  w.addDocument(doc);

  Document crashDoc = new Document();
  crashDoc.add(newTextField("crash", "do it on token 4", Field.Store.YES));
  doCrash.set(true);
  expectThrows(IOException.class, () -> {
    w.addDocument(crashDoc);
  });

  w.addDocument(doc);
  w.close();
  dir.close();
}
 
Example 8
Source File: TestRangeFacetCounts.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testOverlappedEndStart() throws Exception {
  Directory d = newDirectory();
  RandomIndexWriter w = new RandomIndexWriter(random(), d);
  Document doc = new Document();
  NumericDocValuesField field = new NumericDocValuesField("field", 0L);
  doc.add(field);
  for(long l=0;l<100;l++) {
    field.setLongValue(l);
    w.addDocument(doc);
  }
  field.setLongValue(Long.MAX_VALUE);
  w.addDocument(doc);

  IndexReader r = w.getReader();
  w.close();

  FacetsCollector fc = new FacetsCollector();
  IndexSearcher s = newSearcher(r);
  s.search(new MatchAllDocsQuery(), fc);

  Facets facets = new LongRangeFacetCounts("field", fc,
      new LongRange("0-10", 0L, true, 10L, true),
      new LongRange("10-20", 10L, true, 20L, true),
      new LongRange("20-30", 20L, true, 30L, true),
      new LongRange("30-40", 30L, true, 40L, true));
  
  FacetResult result = facets.getTopChildren(10, "field");
  assertEquals("dim=field path=[] value=41 childCount=4\n  0-10 (11)\n  10-20 (11)\n  20-30 (11)\n  30-40 (11)\n",
               result.toString());
  
  r.close();
  d.close();
}
 
Example 9
Source File: FieldFilterPresearcherComponentTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDebugQueries() throws Exception {
  try (Monitor monitor = newMonitor()) {
    monitor.register(new MonitorQuery("1", parse("test"), null, Collections.singletonMap("language", "en")));

    Document enDoc = new Document();
    enDoc.add(newTextField(TEXTFIELD, "this is a test", Field.Store.NO));
    enDoc.add(newTextField("language", "en", Field.Store.NO));

    PresearcherMatches<QueryMatch> matches = monitor.debug(enDoc, QueryMatch.SIMPLE_MATCHER);
    assertFalse(matches.match("1", 0).presearcherMatches.isEmpty());
  }
}
 
Example 10
Source File: LuceneIndexFromTriples.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * This class indexes the file passed as a parameter, writing to the index passed as a parameter.
 * Each predication is indexed as an individual document, with the fields "subject", "predicate", and "object"

 * @throws IOException
 */
static void indexDoc(IndexWriter fsWriter, File triplesTextFile) throws IOException {
  BufferedReader theReader = new BufferedReader(new FileReader(triplesTextFile));
  int linecnt = 0;
  String lineIn;
  while ((lineIn = theReader.readLine()) != null)  {   
    java.util.StringTokenizer theTokenizer = new java.util.StringTokenizer(lineIn,"\t");
    // Output progress counter.
    if( ( ++linecnt % 10000 == 0 ) || ( linecnt < 10000 && linecnt % 1000 == 0 ) ){
      VerbatimLogger.info((linecnt) + " ... ");
    }
    try {
      if (theTokenizer.countTokens() < 3) {
        VerbatimLogger.warning(
            "Line in predication file does not have three delimited fields: " + lineIn + "\n");
        continue;
      }

      String subject = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");
      String predicate = theTokenizer.nextToken().trim().toUpperCase().replaceAll(" ", "_");
      String object = theTokenizer.nextToken().trim().toLowerCase().replaceAll(" ", "_");

      Document doc = new Document();
      doc.add(new TextField("subject", subject, Field.Store.YES));
      doc.add(new TextField("predicate", predicate, Field.Store.YES));
      doc.add(new TextField("object", object, Field.Store.YES));
      doc.add(new TextField("predication",subject+predicate+object, Field.Store.NO));
      fsWriter.addDocument(doc);
    }
    catch (Exception e) {
      System.out.println(lineIn);
      e.printStackTrace();
    }
  }
  VerbatimLogger.info("\n");  // Newline after line counter prints.
  theReader.close();
}
 
Example 11
Source File: TestPerFieldPostingsFormat2.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void addDocs2(IndexWriter writer, int numDocs) throws IOException {
  for (int i = 0; i < numDocs; i++) {
    Document doc = new Document();
    doc.add(newTextField("content", "bbb", Field.Store.NO));
    writer.addDocument(doc);
  }
}
 
Example 12
Source File: IndexApi.java    From Pydev with Eclipse Public License 1.0 5 votes vote down vote up
private Document createDocument(Map<String, String> fieldsToIndex) {
    Document doc = new Document();

    Set<Entry<String, String>> entrySet = fieldsToIndex.entrySet();
    for (Entry<String, String> entry : entrySet) {
        doc.add(new StringField(entry.getKey(), entry.getValue(), Field.Store.YES));
    }

    return doc;
}
 
Example 13
Source File: IndexBilingualFiles.java    From semanticvectors with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
protected Document fileBilingualDocument(File file1, File file2)
		throws java.io.IOException {
	/** Makes a document for a File.
       <p>
       The document has three fields:
       <ul>
       <li><code>filename</code>--name of the file, as a stored,
       untokenized field; to get the full path for each pair,
       add the language specific prefix.
       <li><code>contents_LANGUAGE1</code>--containing the full contents
       of the file in LANGUAGE1, as a Reader field; e.g., contents_en.
       <li><code>contents_LANGUAGE2</code>--containing the full contents
       of the file in LANGUAGE2, as a Reader field; e.g., contents_fr.
	 */

	// make a new, empty document
	Document doc = new Document();

	// Add the path of the file as a field named "filename".  Use a field that is
	// indexed (i.e. searchable), but don't tokenize the field into words.
	doc.add(new StoredField("filename", file1.getPath()));

	// Add the contents of the file to a fields named
	// "contents_LANGUAGE1" and "contents_LANGUAGE2".  Specify a
	// Reader, so that the text of the file is tokenized and
	// indexed, but not stored.  Note that FileReader expects the
	// file to be in the system's default encoding.  If that's not
	// the case searching for special characters will fail.
	doc.add(new TextField("contents_" + LANGUAGE1, new FileReader(file1)));
	doc.add(new TextField("contents_" + LANGUAGE2, new FileReader(file2)));

	// return the document
	return doc;
}
 
Example 14
Source File: TestTermVectorsWriter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDoubleOffsetCounting() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd", customType);
  doc.add(f);
  doc.add(f);
  Field f2 = newField("field", "", customType);
  doc.add(f2);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  Terms vector = r.getTermVectors(0).terms("field");
  assertNotNull(vector);
  TermsEnum termsEnum = vector.iterator();
  assertNotNull(termsEnum.next());
  assertEquals("", termsEnum.term().utf8ToString());

  // Token "" occurred once
  assertEquals(1, termsEnum.totalTermFreq());

  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(8, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  // Token "abcd" occurred three times
  assertEquals(new BytesRef("abcd"), termsEnum.next());
  dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
  assertEquals(3, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(4, dpEnum.startOffset());
  assertEquals(8, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());

  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
  assertNull(termsEnum.next());
  r.close();
  dir.close();
}
 
Example 15
Source File: TestCachingTokenFilter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCaching() throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  AtomicInteger resetCount = new AtomicInteger(0);
  TokenStream stream = new TokenStream() {
    private int index = 0;
    private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
    private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);

    @Override
    public void reset() throws IOException {
      super.reset();
      resetCount.incrementAndGet();
    }

    @Override
    public boolean incrementToken() {
      if (index == tokens.length) {
        return false;
      } else {
        clearAttributes();
        termAtt.append(tokens[index++]);
        offsetAtt.setOffset(0,0);
        return true;
      }        
    }
    
  };

  stream = new CachingTokenFilter(stream);

  doc.add(new TextField("preanalyzed", stream));

  // 1) we consume all tokens twice before we add the doc to the index
  assertFalse(((CachingTokenFilter)stream).isCached());
  stream.reset();
  assertFalse(((CachingTokenFilter) stream).isCached());
  checkTokens(stream);
  stream.reset();  
  checkTokens(stream);
  assertTrue(((CachingTokenFilter)stream).isCached());

  // 2) now add the document to the index and verify if all tokens are indexed
  //    don't reset the stream here, the DocumentWriter should do that implicitly
  writer.addDocument(doc);
  
  IndexReader reader = writer.getReader();
  PostingsEnum termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                                        "preanalyzed",
                                                                        new BytesRef("term1"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, termPositions.freq());
  assertEquals(0, termPositions.nextPosition());

  termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                   "preanalyzed",
                                                   new BytesRef("term2"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(2, termPositions.freq());
  assertEquals(1, termPositions.nextPosition());
  assertEquals(3, termPositions.nextPosition());
  
  termPositions = MultiTerms.getTermPostingsEnum(reader,
                                                   "preanalyzed",
                                                   new BytesRef("term3"));
  assertTrue(termPositions.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  assertEquals(1, termPositions.freq());
  assertEquals(2, termPositions.nextPosition());
  reader.close();
  writer.close();
  // 3) reset stream and consume tokens again
  stream.reset();
  checkTokens(stream);

  assertEquals(1, resetCount.get());

  dir.close();
}
 
Example 16
Source File: TestSloppyPhraseQuery.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testInfiniteFreq2() throws Exception {
  String document = 
    "So much fun to be had in my head " +
    "No more sunshine " +
    "So much fun just lying in my bed " +
    "No more sunshine " +
    "I can't face the sunlight and the dirt outside " +
    "Wanna stay in 666 where this darkness don't lie " +
    "Drug drug druggy " +
    "Got a feeling sweet like honey " +
    "Drug drug druggy " +
    "Need sensation like my baby " +
    "Show me your scars you're so aware " +
    "I'm not barbaric I just care " +
    "Drug drug drug " +
    "I need a reflection to prove I exist " +
    "No more sunshine " +
    "I am a victim of designer blitz " +
    "No more sunshine " +
    "Dance like a robot when you're chained at the knee " +
    "The C.I.A say you're all they'll ever need " +
    "Drug drug druggy " +
    "Got a feeling sweet like honey " +
    "Drug drug druggy " +
    "Need sensation like my baby " +
    "Snort your lines you're so aware " +
    "I'm not barbaric I just care " +
    "Drug drug druggy " +
    "Got a feeling sweet like honey " +
    "Drug drug druggy " +
    "Need sensation like my baby";
      
   Directory dir = newDirectory();

   RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
   Document doc = new Document();
   doc.add(newField("lyrics", document, new FieldType(TextField.TYPE_NOT_STORED)));
   iw.addDocument(doc);
   IndexReader ir = iw.getReader();
   iw.close();
      
   IndexSearcher is = newSearcher(ir);
   
   PhraseQuery.Builder builder = new PhraseQuery.Builder();
   builder.add(new Term("lyrics", "drug"), 1);
   builder.add(new Term("lyrics", "drug"), 4);
   builder.setSlop(5);
   PhraseQuery pq = builder.build();
   // "drug the drug"~5
   assertSaneScoring(pq, is);
   ir.close();
   dir.close();
}
 
Example 17
Source File: TestIndexSorting.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testAddIndexes(boolean withDeletes, boolean useReaders) throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc1 = newIndexWriterConfig();
  Sort indexSort = new Sort(new SortField("foo", SortField.Type.LONG), new SortField("bar", SortField.Type.LONG));
  iwc1.setIndexSort(indexSort);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc1);
  final int numDocs = atLeast(100);
  for (int i = 0; i < numDocs; ++i) {
    Document doc = new Document();
    doc.add(new StringField("id", Integer.toString(i), Store.NO));
    doc.add(new NumericDocValuesField("foo", random().nextInt(20)));
    doc.add(new NumericDocValuesField("bar", random().nextInt(20)));
    w.addDocument(doc);
  }
  if (withDeletes) {
    for (int i = random().nextInt(5); i < numDocs; i += TestUtil.nextInt(random(), 1, 5)) {
      w.deleteDocuments(new Term("id", Integer.toString(i)));
    }
  }
  if (random().nextBoolean()) {
    w.forceMerge(1);
  }
  final IndexReader reader = w.getReader();
  w.close();

  Directory dir2 = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  if (indexSort != null && random().nextBoolean()) {
    // test congruent index sort
    iwc.setIndexSort(new Sort(new SortField("foo", SortField.Type.LONG)));
  } else {
    iwc.setIndexSort(indexSort);
  }
  IndexWriter w2 = new IndexWriter(dir2, iwc);

  if (useReaders) {
    CodecReader[] codecReaders = new CodecReader[reader.leaves().size()];
    for (int i = 0; i < codecReaders.length; ++i) {
      codecReaders[i] = (CodecReader) reader.leaves().get(i).reader();
    }
    w2.addIndexes(codecReaders);
  } else {
    w2.addIndexes(dir);
  }
  final IndexReader reader2 = w2.getReader();
  final IndexSearcher searcher = newSearcher(reader);
  final IndexSearcher searcher2 = newSearcher(reader2);
  for (int i = 0; i < numDocs; ++i) {
    Query query = new TermQuery(new Term("id", Integer.toString(i)));
    final TopDocs topDocs = searcher.search(query, 1);
    final TopDocs topDocs2 = searcher2.search(query, 1);
    assertEquals(topDocs.totalHits.value, topDocs2.totalHits.value);
    if (topDocs.totalHits.value == 1) {
      NumericDocValues dvs1 = MultiDocValues.getNumericValues(reader, "foo");
      int hitDoc1 = topDocs.scoreDocs[0].doc;
      assertEquals(hitDoc1, dvs1.advance(hitDoc1));
      long value1 = dvs1.longValue();
      NumericDocValues dvs2 = MultiDocValues.getNumericValues(reader2, "foo");
      int hitDoc2 = topDocs2.scoreDocs[0].doc;
      assertEquals(hitDoc2, dvs2.advance(hitDoc2));
      long value2 = dvs2.longValue();
      assertEquals(value1, value2);
    }
  }

  IOUtils.close(reader, reader2, w2, dir, dir2);
}
 
Example 18
Source File: LuceneSearcher.java    From SnowGraph with Apache License 2.0 4 votes vote down vote up
public void index(boolean overWrite) throws IOException {

        if (!overWrite && new File(path).exists())
            return;

        Directory dir = FSDirectory.open(Paths.get(path));
        Analyzer analyzer = new EnglishAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(dir, iwc);

        Session session = context.connection.session();
        String stat = "match (n) where exists(n." + TextExtractor.IS_TEXT + ") and n." + TextExtractor.IS_TEXT + "=true return id(n), labels(n)[0], n." + TextExtractor.TITLE + ", n." + TextExtractor.TEXT;
        StatementResult rs = session.run(stat);
        int c=0;
        while (rs.hasNext()) {
            Record item = rs.next();
            String label=item.get("labels(n)[0]").asString();
            if (!(label.equals(StackOverflowExtractor.QUESTION)||label.equals(StackOverflowExtractor.ANSWER)||label.equals(JiraExtractor.ISSUE)))
                continue;
            String org_content = item.get("n." + TextExtractor.TEXT).asString();
            String title = item.get("n." + TextExtractor.TITLE).asString();
            String content = dealWithDocument("<html><title>" + title + "</title>" + org_content + "</html>");
            if (content.length() > 0) {
                Document document = new Document();
                long id=item.get("id(n)").asLong();
                document.add(new StringField("id", "" + id, Field.Store.YES));
                document.add(new StringField("type", item.get("labels(n)[0]").asString(), Field.Store.YES));
                document.add(new StringField("title", title, Field.Store.YES));
                document.add(new TextField("content", content, Field.Store.YES));
                document.add(new TextField("org_content", org_content, Field.Store.YES));
                Set<Long> nodes=new HashSet<>();
                Session session1=context.connection.session();
                StatementResult rs1=session1.run("match (a)-[:"+ ApiMentionExtractor.API_NAME_MENTION+"|"+ ReferenceExtractor.REFERENCE+"]->(b) where id(a)="+id+" and exists(b."+ LINEExtractor.LINE_VEC+") return distinct id(b)");
                while (rs1.hasNext()){
                    Record item1=rs1.next();
                    nodes.add(item1.get("id(b)").asLong());
                }
                session1.close();
                String nodeSet = StringUtils.join(nodes, " ").trim();
                document.add(new StringField("node_set", nodeSet, Field.Store.YES));
                writer.addDocument(document);
                System.out.println(c+": "+nodes.size());
                c++;
            }
        }
        session.close();

        writer.close();
    }
 
Example 19
Source File: TestAddIndexes.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void addDoc(IndexWriter writer) throws IOException
{
    Document doc = new Document();
    doc.add(newTextField("content", "aaa", Field.Store.NO));
    writer.addDocument(doc);
}
 
Example 20
Source File: ClusteringKeyMapper.java    From stratio-cassandra with Apache License 2.0 2 votes vote down vote up
/**
 * Adds to the specified document the clustering key contained in the specified cell name.
 *
 * @param document The document where the clustering key is going to be added.
 * @param cellName A cell name containing the clustering key to be added.
 */
public final void addFields(Document document, CellName cellName) {
    String serializedKey = ByteBufferUtils.toString(cellName.toByteBuffer());
    Field field = new StringField(FIELD_NAME, serializedKey, Field.Store.YES);
    document.add(field);
}