Java Code Examples for org.apache.lucene.index.memory.MemoryIndex#addField()

The following examples show how to use org.apache.lucene.index.memory.MemoryIndex#addField() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: CrawlerTask.java    From JPPF with Apache License 2.0 6 votes vote down vote up
/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}
 
Example 2
Source File: MultiDocumentPercolatorIndex.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
    for (IndexableField field : d.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
    return memoryIndex;
}
 
Example 3
Source File: SingleDocumentPercolatorIndex.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
    MemoryIndex memoryIndex = cache.get();
    for (IndexableField field : parsedDocument.rootDoc().getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (Exception e) {
            throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
        }
    }
    context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}
 
Example 4
Source File: ShardTermVectorsService.java    From Elasticsearch with Apache License 2.0 6 votes vote down vote up
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
        throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
 
Example 5
Source File: DocumentBatch.java    From lucene-solr with Apache License 2.0 5 votes vote down vote up
private SingletonDocumentBatch(Analyzer analyzer, Document doc) {
  MemoryIndex memoryindex = new MemoryIndex(true, true);
  for (IndexableField field : doc) {
    memoryindex.addField(field, analyzer);
  }
  memoryindex.freeze();
  reader = (LeafReader) memoryindex.createSearcher().getIndexReader();
}
 
Example 6
Source File: TestMultipassPresearcher.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testQueryBuilder() throws IOException {

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Presearcher presearcher = createPresearcher();

    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };
    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build()))
                .build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);
      }

    }

  }
 
Example 7
Source File: TestTermPresearcher.java    From lucene-solr with Apache License 2.0 4 votes vote down vote up
public void testQueryBuilder() throws IOException {

    Presearcher presearcher = createPresearcher();

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };

    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        mindex.addField("g", "#######", ANALYZER); // analyzes away to empty field
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(should(new TermInSetQuery("f", new BytesRef("test")))).build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);

      }

    }

  }