Java Code Examples for org.apache.lucene.document.TextField#TYPE_NOT_STORED

The following examples show how to use org.apache.lucene.document.TextField#TYPE_NOT_STORED . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFieldInvertState() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
  IndexWriter w = new IndexWriter(dir, iwc);

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  w.addDocument(doc);
  FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
  assertEquals(228, fis.getMaxTermFrequency());
  assertEquals(2, fis.getUniqueTermCount());
  assertEquals(0, fis.getNumOverlap());
  assertEquals(287, fis.getLength());

  IOUtils.close(w, dir);
}
 
Example 2
Source File: TestPostingsOffsets.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void checkTokens(Token[] tokens) throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
  boolean success = false;
  try {
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    // store some term vectors for the checkindex cross-check
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorOffsets(true);
   
    Document doc = new Document();
    doc.add(new Field("body", new CannedTokenStream(tokens), ft));
    riw.addDocument(doc);
    riw.close();
    success = true;
  } finally {
    if (success) {
      IOUtils.close(dir);
    } else {
      IOUtils.closeWhileHandlingException(riw, dir);
    }
  }
}
 
Example 3
Source File: TestSimilarity2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** make sure all sims work if TF is omitted */
public void testOmitTF() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS);
  ft.freeze();
  Field f = newField("foo", "bar", ft);
  doc.add(f);
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  
  for (Similarity sim : sims) {
    is.setSimilarity(sim);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
    assertEquals(1, is.search(query.build(), 10).totalHits.value);
  }
  ir.close();
  dir.close();
}
 
Example 4
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testOverflowInt() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS);
  
  Document doc = new Document();
  doc.add(new Field("field", "this field should be indexed", fieldType));
  w.addDocument(doc);

  Document doc2 = new Document();
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar"},
                                              new int[] {3, Integer.MAX_VALUE}),
                          fieldType);
  doc2.add(field);
  expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc2);});

  IndexReader r = DirectoryReader.open(w);
  assertEquals(1, r.numDocs());

  IOUtils.close(r, w, dir);
}
 
Example 5
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInvalidTermVectorPositions() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  fieldType.setStoreTermVectors(true);
  fieldType.setStoreTermVectorPositions(true);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}
 
Example 6
Source File: TestTermVectorsReader.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testIllegalOffsetsWithoutVectors() throws Exception {
  Directory dir = newDirectory();
  MockAnalyzer a = new MockAnalyzer(random());
  a.setEnableChecks(false);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(false);
  ft.setStoreTermVectorOffsets(true);
  Document doc = new Document();
  doc.add(new Field("field", "value", ft));
  
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
  });
  assertEquals("cannot index term vector offsets when term vectors are not indexed (field=\"field\")", expected.getMessage());
  
  w.close();
  dir.close();
}
 
Example 7
Source File: TestFieldInvertState.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testBasic() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
  IndexWriter w = new IndexWriter(dir, iwc);
  Document doc = new Document();
  Field field = new Field("field",
                          new CannedTokenStream(new Token("a", 0, 1),
                                                new Token("b", 2, 3),
                                                new Token("c", 4, 5)),
                          TextField.TYPE_NOT_STORED);
  doc.add(field);
  w.addDocument(doc);
  FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
  assertEquals(1, fis.getMaxTermFrequency());
  assertEquals(3, fis.getUniqueTermCount());
  assertEquals(0, fis.getNumOverlap());
  assertEquals(3, fis.getLength());
  IOUtils.close(w, dir);
}
 
Example 8
Source File: TestSimilarity2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
/** make sure we can retrieve when norms are disabled */
public void testNoNorms() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setOmitNorms(true);
  ft.freeze();
  doc.add(newField("foo", "bar", ft));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);
  
  for (Similarity sim : sims) {
    is.setSimilarity(sim);
    BooleanQuery.Builder query = new BooleanQuery.Builder();
    query.add(new TermQuery(new Term("foo", "bar")), BooleanClause.Occur.SHOULD);
    assertEquals(1, is.search(query.build(), 10).totalHits.value);
  }
  ir.close();
  dir.close();
}
 
Example 9
Source File: TestTermVectorsWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testDoubleOffsetCounting2() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd", customType);
  doc.add(f);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
  assertNotNull(termsEnum.next());
  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(5, dpEnum.startOffset());
  assertEquals(9, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
 
Example 10
Source File: HighlighterPhraseTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, new TokenStreamSparse(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        new SpanTermQuery(new Term(FIELD, "did")),
        new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);

    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(0, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals(
        highlighter.getBestFragment(new TokenStreamSparse(), TEXT),
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 11
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTermVectorWithoutOffsetsDoesntWork()
    throws IOException, InvalidTokenOffsetsException {
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorOffsets(false);
    customType.setStoreTermVectorPositions(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertNull(tokenStream);
  }
  finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 12
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testRepeatTermsOneDoc() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  w.addDocument(doc);
  IndexReader r = DirectoryReader.open(w);
  PostingsEnum postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef("bar"), (int) PostingsEnum.FREQS);
  assertNotNull(postings);
  assertEquals(0, postings.nextDoc());
  assertEquals(228, postings.freq());
  assertEquals(NO_MORE_DOCS, postings.nextDoc());

  postings = MultiTerms.getTermPostingsEnum(r, "field", new BytesRef("foo"), (int) PostingsEnum.FREQS);
  assertNotNull(postings);
  assertEquals(0, postings.nextDoc());
  assertEquals(59, postings.freq());
  assertEquals(NO_MORE_DOCS, postings.nextDoc());
  
  IOUtils.close(r, w, dir);
}
 
Example 13
Source File: Test2BPostings.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Nightly
public void test() throws Exception {
  BaseDirectoryWrapper dir = newFSDirectory(createTempDir("2BPostings"));
  if (dir instanceof MockDirectoryWrapper) {
    ((MockDirectoryWrapper)dir).setThrottling(MockDirectoryWrapper.Throttling.NEVER);
  }

  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()))
      .setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH)
      .setRAMBufferSizeMB(256.0)
      .setMergeScheduler(new ConcurrentMergeScheduler())
      .setMergePolicy(newLogMergePolicy(false, 10))
      .setOpenMode(IndexWriterConfig.OpenMode.CREATE);
  
  IndexWriter w = new IndexWriter(dir, iwc);

  MergePolicy mp = w.getConfig().getMergePolicy();
  if (mp instanceof LogByteSizeMergePolicy) {
   // 1 petabyte:
   ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
  }

  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setOmitNorms(true);
  ft.setIndexOptions(IndexOptions.DOCS);
  Field field = new Field("field", new MyTokenStream(), ft);
  doc.add(field);
  
  final int numDocs = (Integer.MAX_VALUE / 26) + 1;
  for (int i = 0; i < numDocs; i++) {
    w.addDocument(doc);
    if (VERBOSE && i % 100000 == 0) {
      System.out.println(i + " of " + numDocs + "...");
    }
  }
  w.forceMerge(1);
  w.close();
  dir.close();
}
 
Example 14
Source File: SimpleFragmentsBuilderTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void makeUnstoredIndex() throws Exception {
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzerW).setOpenMode(OpenMode.CREATE));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorOffsets(true);
  customType.setStoreTermVectorPositions(true);
  doc.add( new Field( F, "aaa", customType) );
  //doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
  writer.addDocument( doc );
  writer.close();
  if (reader != null) reader.close();
  reader = DirectoryReader.open(dir);
}
 
Example 15
Source File: TestPostingsOffsets.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBasic() throws Exception {
  Directory dir = newDirectory();
  
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
  Document doc = new Document();

  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  if (random().nextBoolean()) {
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(random().nextBoolean());
    ft.setStoreTermVectorOffsets(random().nextBoolean());
  }
  Token[] tokens = new Token[] {
    makeToken("a", 1, 0, 6),
    makeToken("b", 1, 8, 9),
    makeToken("a", 1, 9, 17),
    makeToken("c", 1, 19, 50),
  };
  doc.add(new Field("content", new CannedTokenStream(tokens), ft));

  w.addDocument(doc);
  IndexReader r = w.getReader();
  w.close();

  PostingsEnum dp = MultiTerms.getTermPostingsEnum(r, "content", new BytesRef("a"));
  assertNotNull(dp);
  assertEquals(0, dp.nextDoc());
  assertEquals(2, dp.freq());
  assertEquals(0, dp.nextPosition());
  assertEquals(0, dp.startOffset());
  assertEquals(6, dp.endOffset());
  assertEquals(2, dp.nextPosition());
  assertEquals(9, dp.startOffset());
  assertEquals(17, dp.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc());

  dp = MultiTerms.getTermPostingsEnum(r, "content", new BytesRef("b"));
  assertNotNull(dp);
  assertEquals(0, dp.nextDoc());
  assertEquals(1, dp.freq());
  assertEquals(1, dp.nextPosition());
  assertEquals(8, dp.startOffset());
  assertEquals(9, dp.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc());

  dp = MultiTerms.getTermPostingsEnum(r, "content", new BytesRef("c"));
  assertNotNull(dp);
  assertEquals(0, dp.nextDoc());
  assertEquals(1, dp.freq());
  assertEquals(3, dp.nextPosition());
  assertEquals(19, dp.startOffset());
  assertEquals(50, dp.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dp.nextDoc());

  r.close();
  dir.close();
}
 
Example 16
Source File: TestFieldInvertState.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testRandom() throws Exception {
  int numUniqueTokens = TestUtil.nextInt(random(), 1, 25);
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
  IndexWriter w = new IndexWriter(dir, iwc);
  Document doc = new Document();

  int numTokens = atLeast(10000);
  Token[] tokens = new Token[numTokens];
  Map<Character,Integer> counts = new HashMap<>();
  int numStacked = 0;
  int maxTermFreq = 0;
  int pos = -1;
  for (int i=0;i<numTokens;i++) {
    char tokenChar = (char) ('a' + random().nextInt(numUniqueTokens));
    Integer oldCount = counts.get(tokenChar);
    int newCount;
    if (oldCount == null) {
      newCount = 1;
    } else {
      newCount = 1 + oldCount;
    }
    counts.put(tokenChar, newCount);
    maxTermFreq = Math.max(maxTermFreq, newCount);
    
    Token token = new Token(Character.toString(tokenChar), 2*i, 2*i+1);
    
    if (i > 0 && random().nextInt(7) == 3) {
      token.setPositionIncrement(0);
      numStacked++;
    } else {
      pos++;
    }
    tokens[i] = token;
  }

  Field field = new Field("field",
                          new CannedTokenStream(tokens),
                          TextField.TYPE_NOT_STORED);
  doc.add(field);
  w.addDocument(doc);
  FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
  assertEquals(maxTermFreq, fis.getMaxTermFrequency());
  assertEquals(counts.size(), fis.getUniqueTermCount());
  assertEquals(numStacked, fis.getNumOverlap());
  assertEquals(numTokens, fis.getLength());
  assertEquals(pos, fis.getPosition());
  
  IOUtils.close(w, dir);
}
 
Example 17
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testBooleanPhraseWithSynonym() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_NOT_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Token syn = new Token("httpwwwfacebookcom", 6, 29);
  syn.setPositionIncrement(0);
  CannedTokenStream ts = new CannedTokenStream(
      new Token("test", 0, 4),
      new Token("http", 6, 10),
      syn,
      new Token("www", 13, 16),
      new Token("facebook", 17, 25),
      new Token("com", 26, 29)
  );
  Field field = new Field("field", ts, type);
  doc.add(field);
  doc.add(new StoredField("field", "Test: http://www.facebook.com"));
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  
  // query1: match
  PhraseQuery pq = new PhraseQuery("field", "test", "http", "www", "facebook", "com");
  FieldQuery fieldQuery  = highlighter.getFieldQuery(pq, reader);
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
  
  // query2: match
  PhraseQuery pq2 = new PhraseQuery("field", "test", "httpwwwfacebookcom", "www", "facebook", "com");
  fieldQuery  = highlighter.getFieldQuery(pq2, reader);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
  
  // query3: OR query1 and query2 together
  BooleanQuery.Builder bq = new BooleanQuery.Builder();
  bq.add(pq, BooleanClause.Occur.SHOULD);
  bq.add(pq2, BooleanClause.Occur.SHOULD);
  fieldQuery  = highlighter.getFieldQuery(bq.build(), reader);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  assertEquals("<b>Test: http://www.facebook.com</b>", bestFragments[0]);
  
  reader.close();
  writer.close();
  dir.close();
}
 
Example 18
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testPayloads() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  FieldType myFieldType = new FieldType(TextField.TYPE_NOT_STORED);
  myFieldType.setStoreTermVectors(true);
  myFieldType.setStoreTermVectorOffsets(true);
  myFieldType.setStoreTermVectorPositions(true);
  myFieldType.setStoreTermVectorPayloads(true);

  curOffset = 0;

  Token[] tokens = new Token[] {
    getToken("foxes"),
    getToken("can"),
    getToken("jump"),
    getToken("high")
  };

  Document doc = new Document();
  doc.add(new Field("field", new CannedTokenStream(tokens), myFieldType));
  writer.addDocument(doc);

  IndexReader reader = writer.getReader();
  writer.close();
  assertEquals(1, reader.numDocs());

  TokenStream ts = TokenSources.getTermVectorTokenStreamOrNull("field", reader.getTermVectors(0), -1);

  CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
  PositionIncrementAttribute posIncAtt = ts.getAttribute(PositionIncrementAttribute.class);
  OffsetAttribute offsetAtt = ts.getAttribute(OffsetAttribute.class);
  PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);

  ts.reset();
  for(Token token : tokens) {
    assertTrue(ts.incrementToken());
    assertEquals(token.toString(), termAtt.toString());
    assertEquals(token.getPositionIncrement(), posIncAtt.getPositionIncrement());
    assertEquals(token.getPayload(), payloadAtt.getPayload());
    assertEquals(token.startOffset(), offsetAtt.startOffset());
    assertEquals(token.endOffset(), offsetAtt.endOffset());
  }

  assertFalse(ts.incrementToken());

  reader.close();
  dir.close();
}
 
Example 19
Source File: BaseTermVectorsFormatTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testPostingsEnumFreqs() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new Analyzer() {
    @Override
    protected TokenStreamComponents createComponents(String fieldName) {
      return new TokenStreamComponents(new MockTokenizer());
    }
  });
  IndexWriter iw = new IndexWriter(dir, iwc);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setStoreTermVectors(true);
  doc.add(new Field("foo", "bar bar", ft));
  iw.addDocument(doc);
  DirectoryReader reader = DirectoryReader.open(iw);
  
  Terms terms = getOnlyLeafReader(reader).getTermVector(0, "foo");
  TermsEnum termsEnum = terms.iterator();
  assertNotNull(termsEnum);
  assertEquals(new BytesRef("bar"), termsEnum.next());
  
  // simple use (FREQS)
  PostingsEnum postings = termsEnum.postings(null);
  assertEquals(-1, postings.docID());
  assertEquals(0, postings.nextDoc());
  assertEquals(2, postings.freq());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
  
  // termsenum reuse (FREQS)
  PostingsEnum postings2 = termsEnum.postings(postings);
  assertNotNull(postings2);
  // and it had better work
  assertEquals(-1, postings2.docID());
  assertEquals(0, postings2.nextDoc());
  assertEquals(2, postings2.freq());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
  
  // asking for docs only: ok
  PostingsEnum docsOnly = termsEnum.postings(null, PostingsEnum.NONE);
  assertEquals(-1, docsOnly.docID());
  assertEquals(0, docsOnly.nextDoc());
  // we don't define what it is, but if its something else, we should look into it?
  assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly.nextDoc());
  // reuse that too
  PostingsEnum docsOnly2 = termsEnum.postings(docsOnly, PostingsEnum.NONE);
  assertNotNull(docsOnly2);
  // and it had better work
  assertEquals(-1, docsOnly2.docID());
  assertEquals(0, docsOnly2.nextDoc());
  // we don't define what it is, but if its something else, we should look into it?
  assertTrue(docsOnly.freq() == 1 || docsOnly.freq() == 2);
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, docsOnly2.nextDoc());
  
  // asking for any flags: ok
  for (int flag : new int[] { NONE, FREQS, POSITIONS, PAYLOADS, OFFSETS, ALL }) {
    postings = termsEnum.postings(null, flag);
    assertEquals(-1, postings.docID());
    assertEquals(0, postings.nextDoc());
    if (flag != NONE) {
      assertEquals(2, postings.freq());
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings.nextDoc());
    // reuse that too
    postings2 = termsEnum.postings(postings, flag);
    assertNotNull(postings2);
    // and it had better work
    assertEquals(-1, postings2.docID());
    assertEquals(0, postings2.nextDoc());
    if (flag != NONE) {
      assertEquals(2, postings2.freq());
    }
    assertEquals(DocIdSetIterator.NO_MORE_DOCS, postings2.nextDoc());
  }
  
  iw.close();
  reader.close();
  dir.close();
}
 
Example 20
Source File: TestIndexWriterMerging.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testForceMergeDeletes3() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(
      dir,
      newIndexWriterConfig(new MockAnalyzer(random()))
          .setMaxBufferedDocs(2)
          .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH)
          .setMergePolicy(newLogMergePolicy(50))
  );

  FieldType customType = new FieldType();
  customType.setStored(true);

  FieldType customType1 = new FieldType(TextField.TYPE_NOT_STORED);
  customType1.setTokenized(false);
  customType1.setStoreTermVectors(true);
  customType1.setStoreTermVectorPositions(true);
  customType1.setStoreTermVectorOffsets(true);
  
  Document document = new Document();
  Field storedField = newField("stored", "stored", customType);
  document.add(storedField);
  Field termVectorField = newField("termVector", "termVector", customType1);
  document.add(termVectorField);
  Field idField = newStringField("id", "", Field.Store.NO);
  document.add(idField);
  for(int i=0;i<98;i++) {
    idField.setStringValue("" + i);
    writer.addDocument(document);
  }
  writer.close();

  IndexReader ir = DirectoryReader.open(dir);
  assertEquals(98, ir.maxDoc());
  assertEquals(98, ir.numDocs());
  ir.close();
  
  IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random()))
    .setMergePolicy(NoMergePolicy.INSTANCE);
  writer = new IndexWriter(dir, dontMergeConfig);
  for(int i=0;i<98;i+=2) {
    writer.deleteDocuments(new Term("id", "" + i));
  }
  writer.close();
  ir = DirectoryReader.open(dir);
  assertEquals(49, ir.numDocs());
  ir.close();

  writer = new IndexWriter(
      dir,
      newIndexWriterConfig(new MockAnalyzer(random()))
         .setMergePolicy(newLogMergePolicy(3))
  );
  writer.forceMergeDeletes(false);
  writer.close();
  ir = DirectoryReader.open(dir);
  assertEquals(49, ir.maxDoc());
  assertEquals(49, ir.numDocs());
  ir.close();
  dir.close();
}