Java Code Examples for org.apache.lucene.document.TextField#TYPE_STORED

The following examples show how to use org.apache.lucene.document.TextField#TYPE_STORED . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: TestDirectoryReaderReopen.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static Document createDocument(int n, int numFields) {
  StringBuilder sb = new StringBuilder();
  Document doc = new Document();
  sb.append("a");
  sb.append(n);
  FieldType customType2 = new FieldType(TextField.TYPE_STORED);
  customType2.setTokenized(false);
  customType2.setOmitNorms(true);
  FieldType customType3 = new FieldType();
  customType3.setStored(true);
  doc.add(new TextField("field1", sb.toString(), Field.Store.YES));
  doc.add(new Field("fielda", sb.toString(), customType2));
  doc.add(new Field("fieldb", sb.toString(), customType3));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(new TextField("field" + (i+1), sb.toString(), Field.Store.YES));
  }
  return doc;
}
 
Example 2
Source File: TestIndexWriterOnDiskFull.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testImmediateDiskFull() throws IOException {
  MockDirectoryWrapper dir = newMockDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                              .setMaxBufferedDocs(2)
                                              .setMergeScheduler(new ConcurrentMergeScheduler())
                                              .setCommitOnClose(false));
  writer.commit(); // empty commit, to not create confusing situation with first commit
  dir.setMaxSizeInBytes(Math.max(1, dir.sizeInBytes()));
  final Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType));
  expectThrows(IOException.class, () -> {
    writer.addDocument(doc);
  });
  assertTrue(writer.isDeleterClosed());
  assertTrue(writer.isClosed());

  dir.close();
}
 
Example 3
Source File: TestMultiTermConstantScore.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {
  String[] data = new String[] { "A 1 2 3 4 5 6", "Z       4 5 6", null,
      "B   2   4 5 6", "Y     3   5 6", null, "C     3     6",
      "X       4 5 6" };

  small = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), small, 
      newIndexWriterConfig(
          new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy()));

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  for (int i = 0; i < data.length; i++) {
    Document doc = new Document();
    doc.add(newField("id", String.valueOf(i), customType));// Field.Keyword("id",String.valueOf(i)));
    doc.add(newField("all", "all", customType));// Field.Keyword("all","all"));
    if (null != data[i]) {
      doc.add(newTextField("data", data[i], Field.Store.YES));// Field.Text("data",data[i]));
    }
    writer.addDocument(doc);
  }

  reader = writer.getReader();
  writer.close();
}
 
Example 4
Source File: DocHelper.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public static Document createDocument(int n, String indexName, int numFields) {
  StringBuilder sb = new StringBuilder();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);

  FieldType customType1 = new FieldType(StringField.TYPE_STORED);
  customType1.setStoreTermVectors(true);
  customType1.setStoreTermVectorPositions(true);
  customType1.setStoreTermVectorOffsets(true);

  final Document doc = new Document();
  doc.add(new Field("id", Integer.toString(n), customType1));
  doc.add(new Field("indexname", indexName, customType1));
  sb.append("a");
  sb.append(n);
  doc.add(new Field("field1", sb.toString(), customType));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(new Field("field" + (i + 1), sb.toString(), customType));
  }
  return doc;
}
 
Example 5
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 6 votes vote down vote up
public void testFlushWithNoMerging() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(
                                       dir,
                                       newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setMaxBufferedDocs(2)
                                       .setMergePolicy(newLogMergePolicy(10))
                                       );
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  doc.add(newField("field", "aaa", customType));
  for(int i=0;i<19;i++)
    writer.addDocument(doc);
  writer.flush(false, true);
  writer.close();
  SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
  // Since we flushed w/o allowing merging we should now
  // have 10 segments
  assertEquals(10, sis.size());
  dir.close();
}
 
Example 6
Source File: ClassificationTestBase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@Override
@Before
public void setUp() throws Exception {
  super.setUp();
  dir = newDirectory();
  indexWriter = new RandomIndexWriter(random(), dir);
  textFieldName = "text";
  categoryFieldName = "cat";
  booleanFieldName = "bool";
  ft = new FieldType(TextField.TYPE_STORED);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorOffsets(true);
  ft.setStoreTermVectorPositions(true);
}
 
Example 7
Source File: ResetableDocumentStoredFieldVisitor.java    From incubator-retired-blur with Apache License 2.0 5 votes vote down vote up
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
  final FieldType ft = new FieldType(TextField.TYPE_STORED);
  ft.setStoreTermVectors(fieldInfo.hasVectors());
  ft.setIndexed(fieldInfo.isIndexed());
  ft.setOmitNorms(fieldInfo.omitsNorms());
  ft.setIndexOptions(fieldInfo.getIndexOptions());
  doc.add(new Field(fieldInfo.name, value, ft));
  size += _emptyString * 2;
  size += fieldInfo.name.length() * 2;
  size += value.length() * 2;
}
 
Example 8
Source File: TestDirectoryReader.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testTermVectors() throws Exception {
  Directory d = newDirectory();
  // set up writer
  IndexWriter writer = new IndexWriter(
                                       d,
                                       newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setMergePolicy(newLogMergePolicy())
                                       );
  // want to get some more segments here
  // new termvector fields
  int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor();
  FieldType customType5 = new FieldType(TextField.TYPE_STORED);
  customType5.setStoreTermVectors(true);
  FieldType customType6 = new FieldType(TextField.TYPE_STORED);
  customType6.setStoreTermVectors(true);
  customType6.setStoreTermVectorOffsets(true);
  FieldType customType7 = new FieldType(TextField.TYPE_STORED);
  customType7.setStoreTermVectors(true);
  customType7.setStoreTermVectorPositions(true);
  FieldType customType8 = new FieldType(TextField.TYPE_STORED);
  customType8.setStoreTermVectors(true);
  customType8.setStoreTermVectorOffsets(true);
  customType8.setStoreTermVectorPositions(true);
  for (int i = 0; i < 5 * mergeFactor; i++) {
    Document doc = new Document();
    doc.add(new TextField("tvnot", "one two two three three three", Field.Store.YES));
    doc.add(new Field("termvector", "one two two three three three", customType5));
    doc.add(new Field("tvoffset", "one two two three three three", customType6));
    doc.add(new Field("tvposition", "one two two three three three", customType7));
    doc.add(new Field("tvpositionoffset", "one two two three three three", customType8));
    
    writer.addDocument(doc);
  }
  writer.close();
  d.close();
}
 
Example 9
Source File: TestTermVectors.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
@BeforeClass
public static void beforeClass() throws Exception {                  
  directory = newDirectory();
  RandomIndexWriter writer = new RandomIndexWriter(random(), directory, newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true)).setMergePolicy(newLogMergePolicy()));
  //writer.setNoCFSRatio(1.0);
  //writer.infoStream = System.out;
  for (int i = 0; i < 1000; i++) {
    Document doc = new Document();
    FieldType ft = new FieldType(TextField.TYPE_STORED);
    int mod3 = i % 3;
    int mod2 = i % 2;
    if (mod2 == 0 && mod3 == 0) {
      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorOffsets(true);
      ft.setStoreTermVectorPositions(true);
    } else if (mod2 == 0) {
      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorPositions(true);
    } else if (mod3 == 0) {
      ft.setStoreTermVectors(true);
      ft.setStoreTermVectorOffsets(true);
    } else {
      ft.setStoreTermVectors(true);
    }
    doc.add(new Field("field", English.intToEnglish(i), ft));
    //test no term vectors too
    doc.add(new TextField("noTV", English.intToEnglish(i), Field.Store.YES));
    writer.addDocument(doc);
  }
  reader = writer.getReader();
  writer.close();
}
 
Example 10
Source File: TestOrdValues.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private static void addDoc(RandomIndexWriter iw, int i) throws Exception {
  Document d = new Document();
  Field f;
  int scoreAndID = i + 1;

  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setTokenized(false);
  customType.setOmitNorms(true);
  
  f = newField(ID_FIELD, id2String(scoreAndID), customType); // for debug purposes
  d.add(f);
  d.add(new SortedDocValuesField(ID_FIELD, new BytesRef(id2String(scoreAndID))));

  FieldType customType2 = new FieldType(TextField.TYPE_NOT_STORED);
  customType2.setOmitNorms(true);
  f = newField(TEXT_FIELD, "text of doc" + scoreAndID + textLine(i), customType2); // for regular search
  d.add(f);

  f = new LegacyIntField(INT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(INT_FIELD, scoreAndID));

  f = new LegacyFloatField(FLOAT_FIELD, scoreAndID, Store.YES); // for function scoring
  d.add(f);
  d.add(new NumericDocValuesField(FLOAT_FIELD, Float.floatToRawIntBits(scoreAndID)));

  iw.addDocument(d);
  log("added: " + d);
}
 
Example 11
Source File: TestIndexWriter.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testHighFreqTerm() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setRAMBufferSizeMB(0.01));
  // Massive doc that has 128 K a's
  StringBuilder b = new StringBuilder(1024*1024);
  for(int i=0;i<4096;i++) {
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
    b.append(" a a a a a a a a");
  }
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  doc.add(newField("field", b.toString(), customType));
  writer.addDocument(doc);
  writer.close();

  IndexReader reader = DirectoryReader.open(dir);
  assertEquals(1, reader.maxDoc());
  assertEquals(1, reader.numDocs());
  Term t = new Term("field", "a");
  assertEquals(1, reader.docFreq(t));
  PostingsEnum td = TestUtil.docs(random(), reader,
                                  "field",
                                  new BytesRef("a"),
                                  null,
                                  PostingsEnum.FREQS);
  td.nextDoc();
  assertEquals(128*1024, td.freq());
  reader.close();
  dir.close();
}
 
Example 12
Source File: AbstractTestCase.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
protected void make1dmfIndex( Analyzer analyzer, String... values ) throws Exception {
  IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(analyzer).setOpenMode(OpenMode.CREATE));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorOffsets(true);
  customType.setStoreTermVectorPositions(true);
  for( String value: values ) {
    doc.add( new Field( F, value, customType) );
  }
  writer.addDocument( doc );
  writer.close();
  if (reader != null) reader.close();
  reader = DirectoryReader.open(dir);
}
 
Example 13
Source File: TestIndexOptions.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private void doTestChangeIndexOptionsAddIndexesCodecReader(IndexOptions from, IndexOptions to) throws IOException {
  Directory dir1 = newDirectory();
  IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig());
  FieldType ft1 = new FieldType(TextField.TYPE_STORED);
  ft1.setIndexOptions(from);
  w1.addDocument(Collections.singleton(new Field("foo", "bar", ft1)));

  Directory dir2 = newDirectory();
  IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig());
  FieldType ft2 = new FieldType(TextField.TYPE_STORED);
  ft2.setIndexOptions(to);
  w2.addDocument(Collections.singleton(new Field("foo", "bar", ft2)));

  try (CodecReader cr = (CodecReader) getOnlyLeafReader(DirectoryReader.open(w2))) {
    if (from == IndexOptions.NONE || to == IndexOptions.NONE || from == to) {
      w1.addIndexes(cr); // no exception
      w1.forceMerge(1);
      try (LeafReader r = getOnlyLeafReader(DirectoryReader.open(w1))) {
        IndexOptions expected = from == IndexOptions.NONE ? to : from;
        assertEquals(expected, r.getFieldInfos().fieldInfo("foo").getIndexOptions());
      }
    } else {
      IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
          () -> w1.addIndexes(cr));
      assertEquals("cannot change field \"foo\" from index options=" + from +
          " to inconsistent index options=" + to, e.getMessage());
    }
  }

  IOUtils.close(w1, w2, dir1, dir2);
}
 
Example 14
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
public void testFunctionScoreQueryHighlight() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);

  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();

  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
Example 15
Source File: TestIndexWriterMerging.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testForceMergeDeletes() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                              .setMaxBufferedDocs(2)
                                              .setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH));
  Document document = new Document();

  FieldType customType = new FieldType();
  customType.setStored(true);

  FieldType customType1 = new FieldType(TextField.TYPE_STORED);
  customType1.setTokenized(false);
  customType1.setStoreTermVectors(true);
  customType1.setStoreTermVectorPositions(true);
  customType1.setStoreTermVectorOffsets(true);
  
  Field idField = newStringField("id", "", Field.Store.NO);
  document.add(idField);
  Field storedField = newField("stored", "stored", customType);
  document.add(storedField);
  Field termVectorField = newField("termVector", "termVector", customType1);
  document.add(termVectorField);
  for(int i=0;i<10;i++) {
    idField.setStringValue("" + i);
    writer.addDocument(document);
  }
  writer.close();

  IndexReader ir = DirectoryReader.open(dir);
  assertEquals(10, ir.maxDoc());
  assertEquals(10, ir.numDocs());
  ir.close();

  IndexWriterConfig dontMergeConfig = new IndexWriterConfig(new MockAnalyzer(random()))
    .setMergePolicy(NoMergePolicy.INSTANCE);
  writer = new IndexWriter(dir, dontMergeConfig);
  writer.deleteDocuments(new Term("id", "0"));
  writer.deleteDocuments(new Term("id", "7"));
  writer.close();
  
  ir = DirectoryReader.open(dir);
  assertEquals(8, ir.numDocs());
  ir.close();

  writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                  .setMergePolicy(newLogMergePolicy()));
  assertEquals(8, writer.getDocStats().numDocs);
  assertEquals(10, writer.getDocStats().maxDoc);
  writer.forceMergeDeletes();
  assertEquals(8, writer.getDocStats().numDocs);
  writer.close();
  ir = DirectoryReader.open(dir);
  assertEquals(8, ir.maxDoc());
  assertEquals(8, ir.numDocs());
  ir.close();
  dir.close();
}
 
Example 16
Source File: TestConsistentFieldNumbers.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
@Test
public void testSameFieldNumbersAcrossSegments() throws Exception {
  for (int i = 0; i < 2; i++) {
    Directory dir = newDirectory();
    IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                                 .setMergePolicy(NoMergePolicy.INSTANCE));

    Document d1 = new Document();
    d1.add(new TextField("f1", "first field", Field.Store.YES));
    d1.add(new TextField("f2", "second field", Field.Store.YES));
    writer.addDocument(d1);

    if (i == 1) {
      writer.close();
      writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))
                                       .setMergePolicy(NoMergePolicy.INSTANCE));
    } else {
      writer.commit();
    }

    Document d2 = new Document();
    FieldType customType2 = new FieldType(TextField.TYPE_STORED);
    customType2.setStoreTermVectors(true);
    d2.add(new TextField("f2", "second field", Field.Store.NO));
    d2.add(new Field("f1", "first field", customType2));
    d2.add(new TextField("f3", "third field", Field.Store.NO));
    d2.add(new TextField("f4", "fourth field", Field.Store.NO));
    writer.addDocument(d2);

    writer.close();

    SegmentInfos sis = SegmentInfos.readLatestCommit(dir);
    assertEquals(2, sis.size());

    FieldInfos fis1 = IndexWriter.readFieldInfos(sis.info(0));
    FieldInfos fis2 = IndexWriter.readFieldInfos(sis.info(1));

    assertEquals("f1", fis1.fieldInfo(0).name);
    assertEquals("f2", fis1.fieldInfo(1).name);
    assertEquals("f1", fis2.fieldInfo(0).name);
    assertEquals("f2", fis2.fieldInfo(1).name);
    assertEquals("f3", fis2.fieldInfo(2).name);
    assertEquals("f4", fis2.fieldInfo(3).name);

    writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
    writer.forceMerge(1);
    writer.close();

    sis = SegmentInfos.readLatestCommit(dir);
    assertEquals(1, sis.size());

    FieldInfos fis3 = IndexWriter.readFieldInfos(sis.info(0));

    assertEquals("f1", fis3.fieldInfo(0).name);
    assertEquals("f2", fis3.fieldInfo(1).name);
    assertEquals("f3", fis3.fieldInfo(2).name);
    assertEquals("f4", fis3.fieldInfo(3).name);


    dir.close();
  }
}
 
Example 17
Source File: TestConsistentFieldNumbers.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private Field getField(int number) {
  int mode = number % 16;
  String fieldName = "" + number;
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  
  FieldType customType2 = new FieldType(TextField.TYPE_STORED);
  customType2.setTokenized(false);
  
  FieldType customType3 = new FieldType(TextField.TYPE_NOT_STORED);
  customType3.setTokenized(false);
  
  FieldType customType4 = new FieldType(TextField.TYPE_NOT_STORED);
  customType4.setTokenized(false);
  customType4.setStoreTermVectors(true);
  customType4.setStoreTermVectorOffsets(true);
  
  FieldType customType5 = new FieldType(TextField.TYPE_NOT_STORED);
  customType5.setStoreTermVectors(true);
  customType5.setStoreTermVectorOffsets(true);

  FieldType customType6 = new FieldType(TextField.TYPE_STORED);
  customType6.setTokenized(false);
  customType6.setStoreTermVectors(true);
  customType6.setStoreTermVectorOffsets(true);

  FieldType customType7 = new FieldType(TextField.TYPE_NOT_STORED);
  customType7.setTokenized(false);
  customType7.setStoreTermVectors(true);
  customType7.setStoreTermVectorOffsets(true);

  FieldType customType8 = new FieldType(TextField.TYPE_STORED);
  customType8.setTokenized(false);
  customType8.setStoreTermVectors(true);
  customType8.setStoreTermVectorPositions(true);

  FieldType customType9 = new FieldType(TextField.TYPE_NOT_STORED);
  customType9.setStoreTermVectors(true);
  customType9.setStoreTermVectorPositions(true);

  FieldType customType10 = new FieldType(TextField.TYPE_STORED);
  customType10.setTokenized(false);
  customType10.setStoreTermVectors(true);
  customType10.setStoreTermVectorPositions(true);

  FieldType customType11 = new FieldType(TextField.TYPE_NOT_STORED);
  customType11.setTokenized(false);
  customType11.setStoreTermVectors(true);
  customType11.setStoreTermVectorPositions(true);

  FieldType customType12 = new FieldType(TextField.TYPE_STORED);
  customType12.setStoreTermVectors(true);
  customType12.setStoreTermVectorOffsets(true);
  customType12.setStoreTermVectorPositions(true);

  FieldType customType13 = new FieldType(TextField.TYPE_NOT_STORED);
  customType13.setStoreTermVectors(true);
  customType13.setStoreTermVectorOffsets(true);
  customType13.setStoreTermVectorPositions(true);

  FieldType customType14 = new FieldType(TextField.TYPE_STORED);
  customType14.setTokenized(false);
  customType14.setStoreTermVectors(true);
  customType14.setStoreTermVectorOffsets(true);
  customType14.setStoreTermVectorPositions(true);

  FieldType customType15 = new FieldType(TextField.TYPE_NOT_STORED);
  customType15.setTokenized(false);
  customType15.setStoreTermVectors(true);
  customType15.setStoreTermVectorOffsets(true);
  customType15.setStoreTermVectorPositions(true);
  
  switch (mode) {
    case 0: return new Field(fieldName, "some text", customType);
    case 1: return new TextField(fieldName, "some text", Field.Store.NO);
    case 2: return new Field(fieldName, "some text", customType2);
    case 3: return new Field(fieldName, "some text", customType3);
    case 4: return new Field(fieldName, "some text", customType4);
    case 5: return new Field(fieldName, "some text", customType5);
    case 6: return new Field(fieldName, "some text", customType6);
    case 7: return new Field(fieldName, "some text", customType7);
    case 8: return new Field(fieldName, "some text", customType8);
    case 9: return new Field(fieldName, "some text", customType9);
    case 10: return new Field(fieldName, "some text", customType10);
    case 11: return new Field(fieldName, "some text", customType11);
    case 12: return new Field(fieldName, "some text", customType12);
    case 13: return new Field(fieldName, "some text", customType13);
    case 14: return new Field(fieldName, "some text", customType14);
    case 15: return new Field(fieldName, "some text", customType15);
    default: return null;
  }
}
 
Example 18
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
private void matchedFieldsTestCase( boolean useMatchedFields, boolean fieldMatch, String fieldValue, String expected, Query... queryClauses ) throws IOException {
  Document doc = new Document();
  FieldType stored = new FieldType( TextField.TYPE_STORED );
  stored.setStoreTermVectorOffsets( true );
  stored.setStoreTermVectorPositions( true );
  stored.setStoreTermVectors( true );
  stored.freeze();
  FieldType matched = new FieldType( TextField.TYPE_NOT_STORED );
  matched.setStoreTermVectorOffsets( true );
  matched.setStoreTermVectorPositions( true );
  matched.setStoreTermVectors( true );
  matched.freeze();
  doc.add( new Field( "field", fieldValue, stored ) );               // Whitespace tokenized with English stop words
  doc.add( new Field( "field_exact", fieldValue, matched ) );        // Whitespace tokenized without stop words
  doc.add( new Field( "field_super_exact", fieldValue, matched ) );  // Whitespace tokenized without toLower
  doc.add( new Field( "field_characters", fieldValue, matched ) );   // Each letter is a token
  doc.add( new Field( "field_tripples", fieldValue, matched ) );     // Every three letters is a token
  doc.add( new Field( "field_sliced", fieldValue.substring( 0,       // Sliced at 10 chars then analyzed just like field
    Math.min( fieldValue.length() - 1 , 10 ) ), matched ) );
  doc.add( new Field( "field_der_red", new CannedTokenStream(        // Hacky field containing "der" and "red" at pos = 0
        token( "der", 1, 0, 3 ),
        token( "red", 0, 0, 3 )
      ), matched ) );

  final Map<String, Analyzer> fieldAnalyzers = new TreeMap<>();
  fieldAnalyzers.put( "field", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET ) );
  fieldAnalyzers.put( "field_exact", new MockAnalyzer( random() ) );
  fieldAnalyzers.put( "field_super_exact", new MockAnalyzer( random(), MockTokenizer.WHITESPACE, false ) );
  fieldAnalyzers.put( "field_characters", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp(".").toAutomaton() ), true ) );
  fieldAnalyzers.put( "field_tripples", new MockAnalyzer( random(), new CharacterRunAutomaton( new RegExp("...").toAutomaton() ), true ) );
  fieldAnalyzers.put( "field_sliced", fieldAnalyzers.get( "field" ) );
  fieldAnalyzers.put( "field_der_red", fieldAnalyzers.get( "field" ) );  // This is required even though we provide a token stream
  Analyzer analyzer = new DelegatingAnalyzerWrapper(Analyzer.PER_FIELD_REUSE_STRATEGY) {
    public Analyzer getWrappedAnalyzer(String fieldName) {
      return fieldAnalyzers.get( fieldName );
    }
  };

  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(analyzer));
  writer.addDocument( doc );

  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  FragListBuilder fragListBuilder = new SimpleFragListBuilder();
  FragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();
  IndexReader reader = DirectoryReader.open(writer);
  String[] preTags = new String[] { "<b>" };
  String[] postTags = new String[] { "</b>" };
  Encoder encoder = new DefaultEncoder();
  int docId = 0;
  BooleanQuery.Builder query = new BooleanQuery.Builder();
  for ( Query clause : queryClauses ) {
    query.add( clause, Occur.MUST );
  }
  FieldQuery fieldQuery = new FieldQuery( query.build(), reader, true, fieldMatch );
  String[] bestFragments;
  if ( useMatchedFields ) {
    Set< String > matchedFields = new HashSet<>();
    matchedFields.add( "field" );
    matchedFields.add( "field_exact" );
    matchedFields.add( "field_super_exact" );
    matchedFields.add( "field_characters" );
    matchedFields.add( "field_tripples" );
    matchedFields.add( "field_sliced" );
    matchedFields.add( "field_der_red" );
    bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", matchedFields, 25, 1,
      fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
  } else {
    bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 25, 1,
      fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
  }
  assertEquals( expected, bestFragments[ 0 ] );

  reader.close();
  writer.close();
  dir.close();
}
 
Example 19
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testMultiValuedSortByScore() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter( dir, newIndexWriterConfig(new MockAnalyzer( random() ) ) );
  Document doc = new Document();
  FieldType type = new FieldType( TextField.TYPE_STORED );
  type.setStoreTermVectorOffsets( true );
  type.setStoreTermVectorPositions( true );
  type.setStoreTermVectors( true );
  type.freeze();
  doc.add( new Field( "field", "zero if naught", type ) ); // The first two fields contain the best match
  doc.add( new Field( "field", "hero of legend", type ) ); // but total a lower score (3) than the bottom
  doc.add( new Field( "field", "naught of hero", type ) ); // two fields (4)
  doc.add( new Field( "field", "naught of hero", type ) );
  writer.addDocument(doc);

  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  ScoreOrderFragmentsBuilder fragmentsBuilder = new ScoreOrderFragmentsBuilder();    
  fragmentsBuilder.setDiscreteMultiValueHighlighting( true );
  IndexReader reader = DirectoryReader.open(writer);
  String[] preTags = new String[] { "<b>" };
  String[] postTags = new String[] { "</b>" };
  Encoder encoder = new DefaultEncoder();
  int docId = 0;
  BooleanQuery.Builder query = new BooleanQuery.Builder();
  query.add( clause( "field", "hero" ), Occur.SHOULD);
  query.add( clause( "field", "of" ), Occur.SHOULD);
  query.add( clause( "field", "legend" ), Occur.SHOULD);
  FieldQuery fieldQuery = highlighter.getFieldQuery( query.build(), reader );

  for ( FragListBuilder fragListBuilder : new FragListBuilder[] {
    new SimpleFragListBuilder(), new WeightedFragListBuilder() } ) {
    String[] bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 20, 1,
        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
    assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
    bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 28, 1,
        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
    assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
    bestFragments = highlighter.getBestFragments( fieldQuery, reader, docId, "field", 30000, 1,
        fragListBuilder, fragmentsBuilder, preTags, postTags, encoder );
    assertEquals("<b>hero</b> <b>of</b> <b>legend</b>", bestFragments[0]);
  }

  reader.close();
  writer.close();
  dir.close();
}
 
Example 20
Source File: FastVectorHighlighterTest.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testCommonTermsQueryHighlight() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET))
      .setMergePolicy(newLogMergePolicy())); // don't reorder doc ids
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  String[] texts = {
      "Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot",
      "This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy",
      "JFK has been shot", "John Kennedy has been shot",
      "This text has a typo in referring to Keneddy",
      "wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", "y z x y z a b", "lets is a the lets is a the lets is a the lets" };
  for (int i = 0; i < texts.length; i++) {
    Document doc = new Document();
    Field field = new Field("field", texts[i], type);
    doc.add(field);
    writer.addDocument(doc);
  }
  CommonTermsQuery query = new CommonTermsQuery(Occur.MUST, Occur.SHOULD, 2);
  query.add(new Term("field", "text"));
  query.add(new Term("field", "long"));
  query.add(new Term("field", "very"));
 
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  IndexReader reader = DirectoryReader.open(writer);
  IndexSearcher searcher = newSearcher(reader);
  TopDocs hits = searcher.search(query, 10);
  assertEquals(2, hits.totalHits.value);
  FieldQuery fieldQuery  = highlighter.getFieldQuery(query, reader);
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, 1, "field", 1000, 1);
  assertEquals("This piece of <b>text</b> refers to Kennedy at the beginning then has a longer piece of <b>text</b> that is <b>very</b> <b>long</b> in the middle and finally ends with another reference to Kennedy", bestFragments[0]);

  fieldQuery  = highlighter.getFieldQuery(query, reader);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, 0, "field", 1000, 1);
  assertEquals("Hello this is a piece of <b>text</b> that is <b>very</b> <b>long</b> and contains too much preamble and the meat is really here which says kennedy has been shot", bestFragments[0]);

  reader.close();
  writer.close();
  dir.close();
}