Java Code Examples for org.apache.lucene.document.FieldType#setStoreTermVectorPositions()

The following examples show how to use org.apache.lucene.document.FieldType#setStoreTermVectorPositions() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestPayloadsOnVectors.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testPayloadsWithoutPositions() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(false);
  customType.setStoreTermVectorPayloads(true);
  customType.setStoreTermVectorOffsets(random().nextBoolean());
  doc.add(new Field("field", "foo", customType));

  expectThrows(IllegalArgumentException.class, () -> {
    writer.addDocument(doc);
  });

  writer.close();
  dir.close();
}
 
Example 2
Source File: TestBlockPostingsFormat2.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private Document newDocument() {
  Document doc = new Document();
  for (IndexOptions option : IndexOptions.values()) {
    if (option == IndexOptions.NONE) {
      continue;
    }
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setIndexOptions(option);
    doc.add(new Field(option.toString(), "", ft));
  }
  return doc;
}
 
Example 3
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFlushWithNoMerging() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(
                                       dir,
                                       newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setMaxBufferedDocs(2)
                                       .setMergePolicy(newLogMergePolicy(10))
                                       );
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  doc.add(newField("field", "aaa", customType));
  for(int i=0;i<19;i++)
    writer.addDocument(doc);
  writer.flush(false, true);
  writer.close();
  SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
  // Since we flushed w/o allowing merging we should now
  // have 10 segments
  assertEquals(10, sis.size());
  dir.close();
}
 
Example 4
Source File: TestPostingsOffsets.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
private void checkTokens(Token[] tokens) throws IOException {
  Directory dir = newDirectory();
  RandomIndexWriter riw = new RandomIndexWriter(random(), dir, iwc);
  boolean success = false;
  try {
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    // store some term vectors for the checkindex cross-check
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorOffsets(true);
   
    Document doc = new Document();
    doc.add(new Field("body", new CannedTokenStream(tokens), ft));
    riw.addDocument(doc);
    riw.close();
    success = true;
  } finally {
    if (success) {
      IOUtils.close(dir);
    } else {
      IOUtils.closeWhileHandlingException(riw, dir);
    }
  }
}
 
Example 5
Source File: TestCustomTermFreq.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testInvalidTermVectorPositions() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  fieldType.setStoreTermVectors(true);
  fieldType.setStoreTermVectorPositions(true);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalArgumentException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index term vector positions while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}
 
Example 6
Source File: ClassificationTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
@Before
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();
  indexWriter = new RandomIndexWriter(random(), dir);
  textFieldName = "text";
  categoryFieldName = "cat";
  booleanFieldName = "bool";
  ft = new FieldType(TextField.TYPE_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
}
 
Example 7
Source File: TestTermVectors.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private Document createDoc() {
  Document doc = new Document();
  final FieldType ft = new FieldType(TextField.TYPE_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
  doc.add(newField("c", "aaa", ft));
  return doc;
}
 
Example 8
Source File: TestPostingsOffsets.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testAddFieldTwice() throws Exception {
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  FieldType customType3 = new FieldType(TextField.TYPE_STORED);
  customType3.setStoreTermVectors(true);
  customType3.setStoreTermVectorPositions(true);
  customType3.setStoreTermVectorOffsets(true);    
  customType3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  doc.add(new Field("content3", "here is more content with aaa aaa aaa", customType3));
  doc.add(new Field("content3", "here is more content with aaa aaa aaa", customType3));
  iw.addDocument(doc);
  iw.close();
  dir.close(); // checkindex
}
 
Example 9
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTermVectorWithoutOffsetsDoesntWork()
    throws IOException, InvalidTokenOffsetsException {
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorOffsets(false);
    customType.setStoreTermVectorPositions(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertNull(tokenStream);
  }
  finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 10
Source File: HighlighterPhraseTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testSparseSpan() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, new TokenStreamSparse(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
        new SpanTermQuery(new Term(FIELD, "did")),
        new SpanTermQuery(new Term(FIELD, "jump")) }, 0, true);

    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(0, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals(
        highlighter.getBestFragment(new TokenStreamSparse(), TEXT),
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 11
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testHighFreqTerm() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setRAMBufferSizeMB(0.01));
  // Massive doc that has 128 K a's
  StringBuilder b = new StringBuilder(1024*1024);
  for(int i=0;i<4096;i++) {
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
  }
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  doc.add(newField("field", b.toString(), customType));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = DirectoryReader.open(dir);
  assertEquals(1, reader.maxDoc());
  assertEquals(1, reader.numDocs());
  Term t = new Term("field", "a");
  assertEquals(1, reader.docFreq(t));
  PostingsEnum td = TestUtil.docs(random(), reader,
                                  "field",
                                  new BytesRef("a"),
                                  null,
                                  PostingsEnum.FREQS);
  td.nextDoc();
  assertEquals(128*1024, td.freq());
  reader.close();
  dir.close();
}
 
Example 12
Source File: TestTermVectors.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {                  
  directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy()));
  //writer.setNoCFSRatio(1.0);
  //writer.infoStream = System.out;
  for (int i = 0; i < 1000; i++) {
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_STORED);
    int mod3 = i % 3;
    int mod2 = i % 2;
    if (mod2 == 0 && mod3 == 0) {
      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorOffsets(true);
      ft.setStoreTermVectorPositions(true);
    } else if (mod2 == 0) {
      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorPositions(true);
    } else if (mod3 == 0) {
      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorOffsets(true);
    } else {
      ft.setStoreTermVectors(true);
    }
    doc.add(new Field("field", English.intToEnglish(i), ft));
    //test no term vectors too
    doc.add(new TextField("noTV", English.intToEnglish(i), Field.Store.YES));
    writer.addDocument(doc);
  }
  reader = writer.getReader();
  writer.close();
}
 
Example 13
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFunctionScoreQueryHighlight() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);

  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();

  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
Example 14
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTermVectors() throws Exception {
  Directory d = newDirectory();
  // set up writer
  IndexWriter writer = new IndexWriter(
                                       d,
                                       newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setMergePolicy(newLogMergePolicy())
                                       );
  // want to get some more segments here
  // new termvector fields
  int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor();
  FieldType customType5 = new FieldType(TextField.TYPE_STORED);
  customType5.setStoreTermVectors(true);
  FieldType customType6 = new FieldType(TextField.TYPE_STORED);
  customType6.setStoreTermVectors(true);
  customType6.setStoreTermVectorOffsets(true);
  FieldType customType7 = new FieldType(TextField.TYPE_STORED);
  customType7.setStoreTermVectors(true);
  customType7.setStoreTermVectorPositions(true);
  FieldType customType8 = new FieldType(TextField.TYPE_STORED);
  customType8.setStoreTermVectors(true);
  customType8.setStoreTermVectorOffsets(true);
  customType8.setStoreTermVectorPositions(true);
  for (int i = 0; i < 5 * mergeFactor; i++) {
    Document doc = new Document();
    doc.add(new TextField("tvnot", "one two two three three three", Field.Store.YES));
    doc.add(new Field("termvector", "one two two three three three", customType5));
    doc.add(new Field("tvoffset", "one two two three three three", customType6));
    doc.add(new Field("tvposition", "one two two three three three", customType7));
    doc.add(new Field("tvpositionoffset", "one two two three three three", customType8));
    
    writer.addDocument(doc);
  }
  writer.close();
  d.close();
}
 
Example 15
Source File: TestTermVectorsWriter.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testDoubleOffsetCounting() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType customType = new FieldType(StringField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd", customType);
  doc.add(f);
  doc.add(f);
  Field f2 = newField("field", "", customType);
  doc.add(f2);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  Terms vector = r.getTermVectors(0).terms("field");
  assertNotNull(vector);
  TermsEnum termsEnum = vector.iterator();
  assertNotNull(termsEnum.next());
  assertEquals("", termsEnum.term().utf8ToString());

  // Token "" occurred once
  assertEquals(1, termsEnum.totalTermFreq());

  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(8, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  // Token "abcd" occurred three times
  assertEquals(new BytesRef("abcd"), termsEnum.next());
  dpEnum = termsEnum.postings(dpEnum, PostingsEnum.ALL);
  assertEquals(3, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(4, dpEnum.startOffset());
  assertEquals(8, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());

  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());
  assertNull(termsEnum.next());
  r.close();
  dir.close();
}
 
Example 16
Source File: TestAddIndexes.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testHangOnClose() throws IOException {

    Directory dir = newDirectory();
    LogByteSizeMergePolicy lmp = new LogByteSizeMergePolicy();
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(100);
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(
        new MockAnalyzer(random()))
        .setMaxBufferedDocs(5).setMergePolicy(lmp));

    Document doc = new Document();
    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectorOffsets(true);
    doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType));
    for(int i=0;i<60;i++)
      writer.addDocument(doc);

    Document doc2 = new Document();
    FieldType customType2 = new FieldType();
    customType2.setStored(true);
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2));
    for(int i=0;i<10;i++)
      writer.addDocument(doc2);
    writer.close();

    Directory dir2 = newDirectory();
    lmp = new LogByteSizeMergePolicy();
    lmp.setMinMergeMB(0.0001);
    lmp.setNoCFSRatio(0.0);
    lmp.setMergeFactor(4);
    writer = new IndexWriter(dir2, newIndexWriterConfig(new MockAnalyzer(random()))
        .setMergeScheduler(new SerialMergeScheduler()).setMergePolicy(lmp));
    writer.addIndexes(dir);
    writer.close();
    dir.close();
    dir2.close();
  }
 
Example 17
Source File: TokenSourcesTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testOverlapWithPositionsAndOffset()
    throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(null));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectors(true);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final DisjunctionMaxQuery query = new DisjunctionMaxQuery(
        Arrays.asList(
            new SpanTermQuery(new Term(FIELD, "{fox}")),
            new SpanTermQuery(new Term(FIELD, "fox"))),
        1);
    // final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
    // new SpanTermQuery(new Term(FIELD, "{fox}")),
    // new SpanTermQuery(new Term(FIELD, "fox")) }, 0, true);

    TopDocs hits = indexSearcher.search(query, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(query));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("<B>the fox</B> did not jump",
        highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 18
Source File: LuceneTestCase.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public synchronized static Field newField(Random random, String name, Object value, FieldType type) {

    // Defeat any consumers that illegally rely on intern'd
    // strings (we removed this from Lucene a while back):
    name = new String(name);

    FieldType prevType = fieldToType.get(name);

    if (usually(random) || type.indexOptions() == IndexOptions.NONE || prevType != null) {
      // most of the time, don't modify the params
      if (prevType == null) {
        fieldToType.put(name, new FieldType(type));
      } else {
        type = mergeTermVectorOptions(type, prevType);
      }

      return createField(name, value, type);
    }

    // TODO: once all core & test codecs can index
    // offsets, sometimes randomly turn on offsets if we are
    // already indexing positions...

    FieldType newType = new FieldType(type);
    if (!newType.stored() && random.nextBoolean()) {
      newType.setStored(true); // randomly store it
    }

    // Randomly turn on term vector options, but always do
    // so consistently for the same field name:
    if (!newType.storeTermVectors() && random.nextBoolean()) {
      newType.setStoreTermVectors(true);
      if (!newType.storeTermVectorPositions()) {
        newType.setStoreTermVectorPositions(random.nextBoolean());
        
        if (newType.storeTermVectorPositions()) {
          if (!newType.storeTermVectorPayloads()) {
            newType.setStoreTermVectorPayloads(random.nextBoolean());
          }
        }
      }
      
      if (!newType.storeTermVectorOffsets()) {
        newType.setStoreTermVectorOffsets(random.nextBoolean());
      }

      if (VERBOSE) {
        System.out.println("NOTE: LuceneTestCase: upgrade name=" + name + " type=" + newType);
      }
    }
    newType.freeze();
    fieldToType.put(name, newType);

    // TODO: we need to do this, but smarter, ie, most of
    // the time we set the same value for a given field but
    // sometimes (rarely) we change it up:
    /*
    if (newType.omitNorms()) {
      newType.setOmitNorms(random.nextBoolean());
    }
    */
    
    return createField(name, value, newType);
  }
 
Example 19
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCommonTermsQueryHighlight() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
      .setMergePolicy(newLogMergePolicy())); // don't reorder doc ids
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  String[] texts = {
      "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
      "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
      "JFK has been shot", "John Kennedy has been shot",
      "This text has a typo in referring to Keneddy",
      "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
  for (int i = 0; i < texts.length; i++) {
    Document doc = new Document();
    Field field = new Field("field", texts[i], type);
    doc.add(field);
    writer.addDocument(doc);
  }
  CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
  query.add(new Term("field", "text"));
  query.add(new Term("field", "long"));
  query.add(new Term("field", "very"));
 
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  IndexReader reader = DirectoryReader.open(writer);
  IndexSearcher searcher = newSearcher(reader);
  TopDocs hits = searcher.search(query, 10);
  assertEquals(2, hits.totalHits.value);
  FieldQuery fieldQuery  = highlighter.getFieldQuery(query, reader);
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, 1, "field", 1000, 1);
  assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);

  fieldQuery  = highlighter.getFieldQuery(query, reader);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, 0, "field", 1000, 1);
  assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);

  reader.close();
  writer.close();
  dir.close();
}
 
Example 20
Source File: TestTermVectorsReader.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Override
public void setUp() throws Exception {
  super.setUp();
  /*
  for (int i = 0; i < testFields.length; i++) {
    fieldInfos.add(testFields[i], true, true, testFieldsStorePos[i], testFieldsStoreOff[i]);
  }
  */

  Arrays.sort(testTerms);
  int tokenUpto = 0;
  Random rnd = random();
  for (int i = 0; i < testTerms.length; i++) {
    positions[i] = new int[TERM_FREQ];
    // first position must be 0
    for (int j = 0; j < TERM_FREQ; j++) {
      // positions are always sorted in increasing order
      positions[i][j] = (int) (j * 10 + rnd.nextDouble() * 10);
      TestToken token = tokens[tokenUpto++] = new TestToken();
      token.text = testTerms[i];
      token.pos = positions[i][j];
      token.startOffset = j * 10;
      token.endOffset = j * 10 + testTerms[i].length();
    }
  }
  Arrays.sort(tokens);

  dir = newDirectory();
  IndexWriter writer = new IndexWriter(
      dir,
      newIndexWriterConfig(new MyAnalyzer()).
          setMaxBufferedDocs(-1).
          setMergePolicy(newLogMergePolicy(false, 10))
          .setUseCompoundFile(false)
  );

  Document doc = new Document();
  for(int i=0;i<testFields.length;i++) {
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    if (testFieldsStorePos[i] && testFieldsStoreOff[i]) {
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorPositions(true);
      customType.setStoreTermVectorOffsets(true);
    }
    else if (testFieldsStorePos[i] && !testFieldsStoreOff[i]) {
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorPositions(true);
    }
    else if (!testFieldsStorePos[i] && testFieldsStoreOff[i]) {
      customType.setStoreTermVectors(true);
      customType.setStoreTermVectorPositions(true);
      customType.setStoreTermVectorOffsets(true);
    }
    else {
      customType.setStoreTermVectors(true);
    }
    doc.add(new Field(testFields[i], "", customType));
  }

  //Create 5 documents for testing, they all have the same
  //terms
  for(int j=0;j<5;j++) {
    writer.addDocument(doc);
  }
  writer.commit();
  seg = writer.newestSegment();
  writer.close();

  fieldInfos = IndexWriter.readFieldInfos(seg);
}