Java Code Examples for org.apache.lucene.document.FieldType

The following examples show how to use org.apache.lucene.document.FieldType. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: lucene-solr   Source File: TestBlockPostingsFormat2.java    License: Apache License 2.0 6 votes vote down vote up
private Document newDocument() {
  Document doc = new Document();
  for (IndexOptions option : IndexOptions.values()) {
    if (option == IndexOptions.NONE) {
      continue;
    }
    FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
    // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectorPositions(true);
    ft.setStoreTermVectorPayloads(true);
    ft.setIndexOptions(option);
    doc.add(new Field(option.toString(), "", ft));
  }
  return doc;
}
 
Example 2
Source Project: Elasticsearch   Source File: FieldMapper.java    License: Apache License 2.0 6 votes vote down vote up
public static String termVectorOptionsToString(FieldType fieldType) {
    if (!fieldType.storeTermVectors()) {
        return "no";
    } else if (!fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) {
        return "yes";
    } else if (fieldType.storeTermVectorOffsets() && !fieldType.storeTermVectorPositions()) {
        return "with_offsets";
    } else {
        StringBuilder builder = new StringBuilder("with");
        if (fieldType.storeTermVectorPositions()) {
            builder.append("_positions");
        }
        if (fieldType.storeTermVectorOffsets()) {
            builder.append("_offsets");
        }
        if (fieldType.storeTermVectorPayloads()) {
            builder.append("_payloads");
        }
        return builder.toString();
    }
}
 
Example 3
Source Project: lucene-solr   Source File: TestTermVectorsWriter.java    License: Apache License 2.0 6 votes vote down vote up
public void testNoAbortOnBadTVSettings() throws Exception {
  Directory dir = newDirectory();
  // Don't use RandomIndexWriter because we want to be sure both docs go to 1 seg:
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter iw = new IndexWriter(dir, iwc);

  Document doc = new Document();
  iw.addDocument(doc);
  FieldType ft = new FieldType(StoredField.TYPE);
  ft.setStoreTermVectors(true);
  ft.freeze();
  doc.add(new Field("field", "value", ft));

  expectThrows(IllegalArgumentException.class, () -> {
    iw.addDocument(doc);
  });

  IndexReader r = DirectoryReader.open(iw);

  // Make sure the exc didn't lose our first document:
  assertEquals(1, r.numDocs());
  iw.close();
  r.close();
  dir.close();
}
 
Example 4
public static Document Document(File f)
     throws java.io.FileNotFoundException {
  Document doc = new Document();
  doc.add(new StoredField("path", f.getPath()));
  doc.add(new StoredField("modified",
                    DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE)));
  
  //create new FieldType to store term positions (TextField is not sufficiently configurable)
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  ft.setTokenized(true);
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(true);
  Field contentsField = new Field("contents", new FileReader(f), ft);

  doc.add(contentsField);
  return doc;
}
 
Example 5
Source Project: lucene-solr   Source File: TestDocValuesIndexing.java    License: Apache License 2.0 6 votes vote down vote up
public void testExcIndexingDocBeforeDocValues() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  IndexWriter w = new IndexWriter(dir, iwc);
  Document doc = new Document();
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setDocValuesType(DocValuesType.SORTED);
  ft.freeze();
  Field field = new Field("test", "value", ft);
  field.setTokenStream(new TokenStream() {
      @Override
      public boolean incrementToken() {
        throw new RuntimeException("no");
      }
    });
  doc.add(field);
  expectThrows(RuntimeException.class, () -> {
    w.addDocument(doc);
  });

  w.addDocument(new Document());
  w.close();
  dir.close();
}
 
Example 6
Source Project: lucene-solr   Source File: TestSloppyPhraseQuery.java    License: Apache License 2.0 6 votes vote down vote up
public void testInfiniteFreq1() throws Exception {
  String document = "drug druggy drug drug drug";
  
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  Document doc = new Document();
  doc.add(newField("lyrics", document, new FieldType(TextField.TYPE_NOT_STORED)));
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  
  IndexSearcher is = newSearcher(ir);
  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("lyrics", "drug"), 1);
  builder.add(new Term("lyrics", "drug"), 3);
  builder.setSlop(1);
  PhraseQuery pq = builder.build();
  // "drug the drug"~1
  assertSaneScoring(pq, is);
  ir.close();
  dir.close();
}
 
Example 7
Source Project: lucene-solr   Source File: TestCustomTermFreq.java    License: Apache License 2.0 6 votes vote down vote up
public void testFieldInvertState() throws Exception {
  Directory dir = newDirectory();
  IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
  iwc.setSimilarity(NeverForgetsSimilarity.INSTANCE);
  IndexWriter w = new IndexWriter(dir, iwc);

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  w.addDocument(doc);
  FieldInvertState fis = NeverForgetsSimilarity.INSTANCE.lastState;
  assertEquals(228, fis.getMaxTermFrequency());
  assertEquals(2, fis.getUniqueTermCount());
  assertEquals(0, fis.getNumOverlap());
  assertEquals(287, fis.getLength());

  IOUtils.close(w, dir);
}
 
Example 8
Source Project: lucene-solr   Source File: TestPostingsOffsets.java    License: Apache License 2.0 6 votes vote down vote up
public void testLegalbutVeryLargeOffsets() throws Exception {
  Directory dir = newDirectory();
  IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
  Document doc = new Document();
  Token t1 = new Token("foo", 0, Integer.MAX_VALUE-500);
  if (random().nextBoolean()) {
    t1.setPayload(new BytesRef("test"));
  }
  Token t2 = new Token("foo", Integer.MAX_VALUE-500, Integer.MAX_VALUE);
  TokenStream tokenStream = new CannedTokenStream(
      new Token[] { t1, t2 }
  );
  FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
  ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
  // store some term vectors for the checkindex cross-check
  ft.setStoreTermVectors(true);
  ft.setStoreTermVectorPositions(true);
  ft.setStoreTermVectorOffsets(true);
  Field field = new Field("foo", tokenStream, ft);
  doc.add(field);
  iw.addDocument(doc);
  iw.close();
  dir.close();
}
 
Example 9
private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}
 
Example 10
Source Project: lucene-solr   Source File: TestUnifiedHighlighter.java    License: Apache License 2.0 6 votes vote down vote up
private IndexReader indexSomeFields() throws IOException {
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
  FieldType ft = new FieldType();
  ft.setIndexOptions(IndexOptions.NONE);
  ft.setTokenized(false);
  ft.setStored(true);
  ft.freeze();

  Field title = new Field("title", "", fieldType);
  Field text = new Field("text", "", fieldType);
  Field category = new Field("category", "", fieldType);

  Document doc = new Document();
  doc.add(title);
  doc.add(text);
  doc.add(category);
  title.setStringValue("This is the title field.");
  text.setStringValue("This is the text field. You can put some text if you want.");
  category.setStringValue("This is the category field.");
  iw.addDocument(doc);

  IndexReader ir = iw.getReader();
  iw.close();
  return ir;
}
 
Example 11
Source Project: lucene-solr   Source File: DocMaker.java    License: Apache License 2.0 6 votes vote down vote up
public DocState(boolean reuseFields, FieldType ft, FieldType bodyFt) {

      this.reuseFields = reuseFields;
      
      if (reuseFields) {
        fields =  new HashMap<>();
        numericFields = new HashMap<>();
        
        // Initialize the map with the default fields.
        fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyFt));
        fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", ft));
        fields.put(DATE_FIELD, new Field(DATE_FIELD, "", ft));
        fields.put(ID_FIELD, new StringField(ID_FIELD, "", Field.Store.YES));
        fields.put(NAME_FIELD, new Field(NAME_FIELD, "", ft));

        numericFields.put(DATE_MSEC_FIELD, new LongPoint(DATE_MSEC_FIELD, 0L));
        numericFields.put(TIME_SEC_FIELD, new IntPoint(TIME_SEC_FIELD, 0));
        
        doc = new Document();
      } else {
        numericFields = null;
        fields = null;
        doc = null;
      }
    }
 
Example 12
private Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) {

    FieldType ft = new FieldType();
    ft.setIndexed(true);
    ft.setStored(true);

    Document doc = new Document();

    doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES,
        Field.Index.NOT_ANALYZED_NO_NORMS));
    for (IndexableField f : strategy.createIndexableFields(shape)) {
      doc.add(f);
    }

    doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));

    return doc;
  }
 
Example 13
Source Project: lucene-solr   Source File: TestMemoryIndex.java    License: Apache License 2.0 6 votes vote down vote up
public void testIndexingPointsAndDocValues() throws Exception {
  FieldType type = new FieldType();
  type.setDimensions(1, 4);
  type.setDocValuesType(DocValuesType.BINARY);
  type.freeze();
  Document doc = new Document();
  byte[] packedPoint = "term".getBytes(StandardCharsets.UTF_8);
  doc.add(new BinaryPoint("field", packedPoint, type));
  MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
  LeafReader leafReader = mi.createSearcher().getIndexReader().leaves().get(0).reader();

  assertEquals(1, leafReader.getPointValues("field").size());
  assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMinPackedValue());
  assertArrayEquals(packedPoint, leafReader.getPointValues("field").getMaxPackedValue());

  BinaryDocValues dvs = leafReader.getBinaryDocValues("field");
  assertEquals(0, dvs.nextDoc());
  assertEquals("term", dvs.binaryValue().utf8ToString());
}
 
Example 14
Source Project: lucene-solr   Source File: AnalyzingInfixSuggester.java    License: Apache License 2.0 6 votes vote down vote up
private Document buildDocument(BytesRef text, Set<BytesRef> contexts, long weight, BytesRef payload) throws IOException {
  String textString = text.utf8ToString();
  Document doc = new Document();
  FieldType ft = getTextFieldType();
  doc.add(new Field(TEXT_FIELD_NAME, textString, ft));
  if (minPrefixChars>0) {
    doc.add(new Field(TEXTGRAMS_FIELD_NAME, textString, ft));
  }
  doc.add(new StringField(EXACT_TEXT_FIELD_NAME, textString, Field.Store.NO));
  doc.add(new BinaryDocValuesField(TEXT_FIELD_NAME, text));
  doc.add(new NumericDocValuesField("weight", weight));
  if (payload != null) {
    doc.add(new BinaryDocValuesField("payloads", payload));
  }
  if (contexts != null) {
    for(BytesRef context : contexts) {
      doc.add(new StringField(CONTEXTS_FIELD_NAME, context, Field.Store.NO));
      doc.add(new SortedSetDocValuesField(CONTEXTS_FIELD_NAME, context));
    }
  }
  return doc;
}
 
Example 15
Source Project: lucene-solr   Source File: TestTermVectorsReader.java    License: Apache License 2.0 6 votes vote down vote up
public void testIllegalVectorPositionsWithoutIndexed() throws Exception {
  Directory dir = newDirectory();
  MockAnalyzer a = new MockAnalyzer(random());
  a.setEnableChecks(false);
  RandomIndexWriter w = new RandomIndexWriter(random(), dir, a);
  FieldType ft = new FieldType(StoredField.TYPE);
  ft.setStoreTermVectorPositions(true);
  Document doc = new Document();
  doc.add(new Field("field", "value", ft));
  
  IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
    w.addDocument(doc);
  });
  assertEquals("cannot store term vector positions for a field that is not indexed (field=\"field\")", expected.getMessage());
  
  w.close();
  dir.close();
}
 
Example 16
Source Project: lucene-solr   Source File: DocHelper.java    License: Apache License 2.0 6 votes vote down vote up
public static Document createDocument(int n, String indexName, int numFields) {
  StringBuilder sb = new StringBuilder();
  FieldType customType = new FieldType(TextField.TYPE_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);

  FieldType customType1 = new FieldType(StringField.TYPE_STORED);
  customType1.setStoreTermVectors(true);
  customType1.setStoreTermVectorPositions(true);
  customType1.setStoreTermVectorOffsets(true);

  final Document doc = new Document();
  doc.add(new Field("id", Integer.toString(n), customType1));
  doc.add(new Field("indexname", indexName, customType1));
  sb.append("a");
  sb.append(n);
  doc.add(new Field("field1", sb.toString(), customType));
  sb.append(" b");
  sb.append(n);
  for (int i = 1; i < numFields; i++) {
    doc.add(new Field("field" + (i + 1), sb.toString(), customType));
  }
  return doc;
}
 
Example 17
private void addDocumentBlock(int id, int count, IndexWriter writer) throws IOException {
  FieldType fieldType = new FieldType();
  fieldType.setIndexed(true);
  fieldType.setOmitNorms(true);
  fieldType.setTokenized(false);
  fieldType.setStored(true);

  FieldType fieldTypeNoIndex = new FieldType();
  fieldTypeNoIndex.setStored(true);
  fieldTypeNoIndex.setIndexed(false);

  for (int i = 0; i < count; i++) {
    Document document = new Document();
    document.add(new Field("id", Integer.toString(id), fieldType));
    document.add(new Field("field", Integer.toString(i), fieldType));
    for (int j = 0; j < 100; j++) {
      document.add(new Field("field" + j, "testing here testing here testing here testing here testing here testing here testing here", fieldTypeNoIndex));
    }
    writer.addDocument(document);
  }
}
 
Example 18
@Override
public void configure(String fieldNameForThisInstance, Map<String, String> properties, Configuration configuration) {
  String precisionStepStr = properties.get(NUMERIC_PRECISION_STEP);
  if (precisionStepStr != null) {
    _precisionStep = Integer.parseInt(precisionStepStr);
    _typeStored = new FieldType(LongField.TYPE_STORED);
    _typeStored.setNumericPrecisionStep(_precisionStep);
    _typeStored.freeze();
    _typeNotStored = new FieldType(LongField.TYPE_NOT_STORED);
    _typeNotStored.setNumericPrecisionStep(_precisionStep);
    _typeNotStored.freeze();
  } else {
    _typeStored = LongField.TYPE_STORED;
    _typeNotStored = LongField.TYPE_NOT_STORED;
  }
}
 
Example 19
Source Project: lucene-solr   Source File: PresearcherTestBase.java    License: Apache License 2.0 6 votes vote down vote up
public void testNonStringTermHandling() throws IOException {

    FieldType ft = new FieldType();
    ft.setTokenized(true);
    ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    try (Monitor monitor = newMonitor()) {
      monitor.register(new MonitorQuery("1", new TermQuery(new Term("f", NON_STRING_TERM))));

      Document doc = new Document();
      doc.add(new Field("f", new NonStringTokenStream(), ft));
      MatchingQueries<QueryMatch> m = monitor.match(doc, QueryMatch.SIMPLE_MATCHER);
      assertEquals(1, m.getMatchCount());
      assertEquals(1, m.getQueriesRun());
    }

  }
 
Example 20
Source Project: jstarcraft-core   Source File: StringIndexConverter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public Iterable<IndexableField> convert(LuceneContext context, String path, Field field, LuceneIndex annotation, Type type, Object data) {
    Collection<IndexableField> indexables = new LinkedList<>();
    FieldType configuration = new FieldType();
    configuration.setIndexOptions(IndexOptions.DOCS);
    if (annotation.analyze()) {
        configuration.setTokenized(true);

        LuceneTerm negative = annotation.negative();
        if (negative.offset()) {
            configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        } else if (negative.position()) {
            configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        } else if (negative.frequency()) {
            configuration.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        }

        LuceneTerm positive = annotation.positive();
        if (positive.offset()) {
            configuration.setStoreTermVectorOffsets(true);
        }
        if (positive.position()) {
            configuration.setStoreTermVectorPositions(true);
        }
        if (positive.frequency()) {
            configuration.setStoreTermVectors(true);
        }
    }
    indexables.add(new org.apache.lucene.document.Field(path, (String) data, configuration));
    return indexables;
}
 
Example 21
Source Project: Elasticsearch   Source File: GeoPointFieldMapper.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public GeoPointFieldMapper build(BuilderContext context, String simpleName, MappedFieldType fieldType,
                                 MappedFieldType defaultFieldType, Settings indexSettings, ContentPath.Type pathType, DoubleFieldMapper latMapper,
                                 DoubleFieldMapper lonMapper, StringFieldMapper geoHashMapper, MultiFields multiFields, Explicit<Boolean> ignoreMalformed,
                                 CopyTo copyTo) {
    fieldType.setTokenized(false);
    if (context.indexCreatedVersion().before(Version.V_2_3_0)) {
        fieldType.setNumericPrecisionStep(GeoPointField.PRECISION_STEP);
        fieldType.setNumericType(FieldType.NumericType.LONG);
    }
    setupFieldType(context);
    return new GeoPointFieldMapper(simpleName, fieldType, defaultFieldType, indexSettings, pathType, latMapper, lonMapper,
            geoHashMapper, multiFields, ignoreMalformed, copyTo);
}
 
Example 22
Source Project: lucene-solr   Source File: TestCustomTermFreq.java    License: Apache License 2.0 5 votes vote down vote up
public void testInvalidProx() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())));

  Document doc = new Document();
  FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
  Field field = new Field("field",
                          new CannedTermFreqs(new String[] {"foo", "bar", "foo", "bar"},
                                              new int[] {42, 128, 17, 100}),
                          fieldType);
  doc.add(field);
  Exception e = expectThrows(IllegalStateException.class, () -> {w.addDocument(doc);});
  assertEquals("field \"field\": cannot index positions while using custom TermFrequencyAttribute", e.getMessage());
  IOUtils.close(w, dir);
}
 
Example 23
Source Project: lucene-solr   Source File: TestSloppyPhraseQuery.java    License: Apache License 2.0 5 votes vote down vote up
public void testSlopWithHoles() throws Exception {  
  Directory dir = newDirectory();
  RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setOmitNorms(true);
  Field f = new Field("lyrics", "", customType);
  Document doc = new Document();
  doc.add(f);
  f.setStringValue("drug drug");
  iw.addDocument(doc);
  f.setStringValue("drug druggy drug");
  iw.addDocument(doc);
  f.setStringValue("drug druggy druggy drug");
  iw.addDocument(doc);
  f.setStringValue("drug druggy drug druggy drug");
  iw.addDocument(doc);
  IndexReader ir = iw.getReader();
  iw.close();
  IndexSearcher is = newSearcher(ir);

  PhraseQuery.Builder builder = new PhraseQuery.Builder();
  builder.add(new Term("lyrics", "drug"), 1);
  builder.add(new Term("lyrics", "drug"), 4);
  PhraseQuery pq = builder.build();
  // "drug the drug"~1
  assertEquals(1, is.search(pq, 4).totalHits.value);
  builder.setSlop(1);
  pq = builder.build();
  assertEquals(3, is.search(pq, 4).totalHits.value);
  builder.setSlop(2);
  pq = builder.build();
  assertEquals(4, is.search(pq, 4).totalHits.value);
  ir.close();
  dir.close();
}
 
Example 24
Source Project: lucene-solr   Source File: FastVectorHighlighterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testSimpleHighlightTest() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);
  
  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();
  
  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new TermQuery(new Term("field", "foo")), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered 
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
Example 25
Source Project: lucene-solr   Source File: FastVectorHighlighterTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testFunctionScoreQueryHighlight() throws IOException {
  Directory dir = newDirectory();
  IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType type = new FieldType(TextField.TYPE_STORED);
  type.setStoreTermVectorOffsets(true);
  type.setStoreTermVectorPositions(true);
  type.setStoreTermVectors(true);
  type.freeze();
  Field field = new Field("field", "This is a test where foo is highlighed and should be highlighted", type);

  doc.add(field);
  writer.addDocument(doc);
  FastVectorHighlighter highlighter = new FastVectorHighlighter();

  IndexReader reader = DirectoryReader.open(writer);
  int docId = 0;
  FieldQuery fieldQuery  = highlighter.getFieldQuery( new FunctionScoreQuery(new TermQuery(new Term("field", "foo")), DoubleValuesSource.constant(1)), reader );
  String[] bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 54, 1);
  // highlighted results are centered
  assertEquals("This is a test where <b>foo</b> is highlighed and should be highlighted", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 52, 1);
  assertEquals("This is a test where <b>foo</b> is highlighed and should be", bestFragments[0]);
  bestFragments = highlighter.getBestFragments(fieldQuery, reader, docId, "field", 30, 1);
  assertEquals("a test where <b>foo</b> is highlighed", bestFragments[0]);
  reader.close();
  writer.close();
  dir.close();
}
 
Example 26
Source Project: lucene-solr   Source File: TestTermVectorsWriter.java    License: Apache License 2.0 5 votes vote down vote up
public void testEndOffsetPositionCharAnalyzer() throws Exception {
  Directory dir = newDirectory();
  IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
  Document doc = new Document();
  FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
  customType.setStoreTermVectors(true);
  customType.setStoreTermVectorPositions(true);
  customType.setStoreTermVectorOffsets(true);
  Field f = newField("field", "abcd   ", customType);
  doc.add(f);
  doc.add(f);
  w.addDocument(doc);
  w.close();

  IndexReader r = DirectoryReader.open(dir);
  TermsEnum termsEnum = r.getTermVectors(0).terms("field").iterator();
  assertNotNull(termsEnum.next());
  PostingsEnum dpEnum = termsEnum.postings(null, PostingsEnum.ALL);
  assertEquals(2, termsEnum.totalTermFreq());

  assertTrue(dpEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS);
  dpEnum.nextPosition();
  assertEquals(0, dpEnum.startOffset());
  assertEquals(4, dpEnum.endOffset());

  dpEnum.nextPosition();
  assertEquals(8, dpEnum.startOffset());
  assertEquals(12, dpEnum.endOffset());
  assertEquals(DocIdSetIterator.NO_MORE_DOCS, dpEnum.nextDoc());

  r.close();
  dir.close();
}
 
Example 27
Source Project: lucene-solr   Source File: HighlighterPhraseTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testConcurrentPhrase() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox jumped";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();
    FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectorPositions(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, new TokenStreamConcurrent(), customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final PhraseQuery phraseQuery = new PhraseQuery(FIELD, "fox", "jumped");
    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));

    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals(highlighter.getBestFragment(new TokenStreamConcurrent(),
        TEXT), highlighter.getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 28
Source Project: lucene-solr   Source File: HighlighterPhraseTest.java    License: Apache License 2.0 5 votes vote down vote up
public void testSparsePhraseWithNoPositions() throws IOException, InvalidTokenOffsetsException {
  final String TEXT = "the fox did not jump";
  final Directory directory = newDirectory();
  final IndexWriter indexWriter = new IndexWriter(directory,
      newIndexWriterConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false)));
  try {
    final Document document = new Document();

    FieldType customType = new FieldType(TextField.TYPE_STORED);
    customType.setStoreTermVectorOffsets(true);
    customType.setStoreTermVectors(true);
    document.add(new Field(FIELD, TEXT, customType));
    indexWriter.addDocument(document);
  } finally {
    indexWriter.close();
  }
  final IndexReader indexReader = DirectoryReader.open(directory);
  try {
    assertEquals(1, indexReader.numDocs());
    final IndexSearcher indexSearcher = newSearcher(indexReader);
    final PhraseQuery phraseQuery = new PhraseQuery(1, FIELD, "did", "jump");
    TopDocs hits = indexSearcher.search(phraseQuery, 1);
    assertEquals(1, hits.totalHits.value);
    final Highlighter highlighter = new Highlighter(
        new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
        new QueryScorer(phraseQuery));
    final TokenStream tokenStream =
        TokenSources.getTermVectorTokenStreamOrNull(FIELD, indexReader.getTermVectors(0), -1);
    assertEquals("the fox <B>did</B> not <B>jump</B>", highlighter
        .getBestFragment(tokenStream, TEXT));
  } finally {
    indexReader.close();
    directory.close();
  }
}
 
Example 29
Source Project: lucene-solr   Source File: SolrDocumentFetcher.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
  Predicate<String> readAsBytes = ResultContext.READASBYTES.get();
  if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) {
    final FieldType ft = new FieldType(TextField.TYPE_STORED);
    ft.setStoreTermVectors(fieldInfo.hasVectors());
    ft.setOmitNorms(fieldInfo.omitsNorms());
    ft.setIndexOptions(fieldInfo.getIndexOptions());
    Objects.requireNonNull(value, "String value should not be null");
    doc.add(new StoredField(fieldInfo.name, value, ft));
  } else {
    super.stringField(fieldInfo, value);
  }

}
 
Example 30
Source Project: mmseg4j-solr   Source File: UseLucene.java    License: Apache License 2.0 5 votes vote down vote up
private Document createDoc(int id) {
	Document doc = new Document();
	FieldType ft = new FieldType();
	ft.setTokenized(true);
	ft.setStored(true);
	ft.setIndexOptions(IndexOptions.DOCS);
	doc.add(new Field("id", "" + id, ft));

	FieldType ft2 = new FieldType();
	ft2.setTokenized(true);
	ft.setStored(true);
	ft2.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
	doc.add(new Field("name", "echo ensh id " + id, ft2));
	return doc;
}