Java Code Examples for org.apache.lucene.index.memory.MemoryIndex

The following examples show how to use org.apache.lucene.index.memory.MemoryIndex. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: JPPF   Source File: CrawlerTask.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}
 
Example 2
MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
    for (IndexableField field : d.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
    return memoryIndex;
}
 
Example 3
@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
    MemoryIndex memoryIndex = cache.get();
    for (IndexableField field : parsedDocument.rootDoc().getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (Exception e) {
            throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
        }
    }
    context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}
 
Example 4
Source Project: Elasticsearch   Source File: ShardTermVectorsService.java    License: Apache License 2.0 6 votes vote down vote up
private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
        throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}
 
Example 5
Source Project: Elasticsearch   Source File: PercolatorService.java    License: Apache License 2.0 5 votes vote down vote up
@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
                         PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
                         HighlightPhase highlightPhase, ClusterService clusterService,
                         AggregationPhase aggregationPhase, ScriptService scriptService,
                         MappingUpdatedAction mappingUpdatedAction) {
    super(settings);
    this.indexNameExpressionResolver = indexNameExpressionResolver;
    this.parseFieldMatcher = new ParseFieldMatcher(settings);
    this.indicesService = indicesService;
    this.pageCacheRecycler = pageCacheRecycler;
    this.bigArrays = bigArrays;
    this.clusterService = clusterService;
    this.highlightPhase = highlightPhase;
    this.aggregationPhase = aggregationPhase;
    this.scriptService = scriptService;
    this.mappingUpdatedAction = mappingUpdatedAction;
    this.sortParseElement = new SortParseElement();

    final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
    cache = new CloseableThreadLocal<MemoryIndex>() {
        @Override
        protected MemoryIndex initialValue() {
            // TODO: should we expose payloads as an option? should offsets be turned on always?
            return new ExtendedMemoryIndex(true, false, maxReuseBytes);
        }
    };
    single = new SingleDocumentPercolatorIndex(cache);
    multi = new MultiDocumentPercolatorIndex(cache);

    percolatorTypes = new IntObjectHashMap<>(6);
    percolatorTypes.put(countPercolator.id(), countPercolator);
    percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
    percolatorTypes.put(matchPercolator.id(), matchPercolator);
    percolatorTypes.put(queryPercolator.id(), queryPercolator);
    percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
    percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}
 
Example 6
Source Project: lucene-solr   Source File: MemoryIndexOffsetStrategy.java    License: Apache License 2.0 5 votes vote down vote up
public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer) {
  super(components, analyzer);
  boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
  memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
  memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
  // preFilter for MemoryIndex
  preMemIndexFilterAutomaton = buildCombinedAutomaton(components);
}
 
Example 7
Source Project: lucene-solr   Source File: DocumentBatch.java    License: Apache License 2.0 5 votes vote down vote up
private SingletonDocumentBatch(Analyzer analyzer, Document doc) {
  MemoryIndex memoryindex = new MemoryIndex(true, true);
  for (IndexableField field : doc) {
    memoryindex.addField(field, analyzer);
  }
  memoryindex.freeze();
  reader = (LeafReader) memoryindex.createSearcher().getIndexReader();
}
 
Example 8
Source Project: lucene4ir   Source File: ExampleStatsApp.java    License: Apache License 2.0 5 votes vote down vote up
public void buildTermVector(int docid) throws IOException {
    /*

    */

    Set<String> fieldList = new HashSet<>();
    fieldList.add("content");

    Document doc = reader.document(docid, fieldList);
    MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
    IndexReader mr = mi.createSearcher().getIndexReader();

    Terms t = mr.leaves().get(0).reader().terms("content");

    if ((t != null) && (t.size()>0)) {
        TermsEnum te = t.iterator();
        BytesRef term = null;

        System.out.println(t.size());

        while ((term = te.next()) != null) {
            System.out.println("BytesRef: " + term.utf8ToString());
            System.out.println("docFreq: " + te.docFreq());
            System.out.println("totalTermFreq: " + te.totalTermFreq());

        }

    }
}
 
Example 9
Source Project: solr-redis   Source File: TestTaggedQuery.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testRewrite() throws IOException {
  MemoryIndex memoryIndex = new MemoryIndex();

  TaggedQuery taggedQuery = new TaggedQuery(new TermQuery(new Term("field", "value")), "tag");
  Query rewrittenQuery = taggedQuery.rewrite(memoryIndex.createSearcher().getTopReaderContext().reader());

  assertTrue(rewrittenQuery instanceof TermQuery);
  assertEquals("field", ((TermQuery) rewrittenQuery).getTerm().field());
  assertEquals("value", ((TermQuery) rewrittenQuery).getTerm().text());
}
 
Example 10
MultiDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}
 
Example 11
private DocSearcher(IndexSearcher searcher, MemoryIndex rootDocMemoryIndex) {
    super("percolate", searcher);
    this.rootDocMemoryIndex = rootDocMemoryIndex;
}
 
Example 12
SingleDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}
 
Example 13
public DocEngineSearcher(MemoryIndex memoryIndex) {
    super("percolate", memoryIndex.createSearcher());
    this.memoryIndex = memoryIndex;
}
 
Example 14
Source Project: lucene-solr   Source File: TestMultipassPresearcher.java    License: Apache License 2.0 4 votes vote down vote up
public void testQueryBuilder() throws IOException {

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Presearcher presearcher = createPresearcher();

    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };
    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build()))
                .build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);
      }

    }

  }
 
Example 15
Source Project: lucene-solr   Source File: TestTermPresearcher.java    License: Apache License 2.0 4 votes vote down vote up
public void testQueryBuilder() throws IOException {

    Presearcher presearcher = createPresearcher();

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };

    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        mindex.addField("g", "#######", ANALYZER); // analyzes away to empty field
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(should(new TermInSetQuery("f", new BytesRef("test")))).build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);

      }

    }

  }
 
Example 16
Source Project: lucene4ir   Source File: ExampleStatsApp.java    License: Apache License 2.0 4 votes vote down vote up
public Map<String, Map<String, List<Integer>>> buildTermVectorWithPosition(int docid, Set<String> fields) throws IOException {

	    	Map<String, Map<String, List<Integer>>> fieldToTermVector = new HashMap<>();
	
	    	Document doc = reader.document(docid, fields);
	
	    	MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
	    	IndexReader mr = mi.createSearcher().getIndexReader();
	
	    	for (LeafReaderContext leafContext : mr.leaves()) {
	
	    		LeafReader leaf = leafContext.reader();
	
	    		for (String field : fields) {
	    			Map<String, List<Integer>> termToPositions = new HashMap<>();
	
	    			Terms t = leaf.terms(field);
	
	    			if(t != null) {
	    				fieldToTermVector.put(field, termToPositions);
	    				TermsEnum tenum = t.iterator();
	
	    				BytesRef termBytes = null;
	    				PostingsEnum postings = null;
	    				while ((termBytes = tenum.next()) != null) {
	
	    					List<Integer> positions = new ArrayList<>();
	    					termToPositions.put(termBytes.utf8ToString(), positions);
	    					postings = tenum.postings(postings);
	    					postings.advance(0);
	
	    					for (int i = 0; i < postings.freq(); i++) {
	    						positions.add(postings.nextPosition());
	    					}
	    				}
	    			}
	    		}
	
	    	}
	    	return fieldToTermVector;
    }