org.apache.lucene.index.memory.MemoryIndex Java Examples

The following examples show how to use org.apache.lucene.index.memory.MemoryIndex. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example #1

Source File: CrawlerTask.java From JPPF with Apache License 2.0

6 votes

/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}

Example #2

Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

6 votes

MemoryIndex indexDoc(ParseContext.Document d, Analyzer analyzer, MemoryIndex memoryIndex) {
    for (IndexableField field : d.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
    return memoryIndex;
}

Example #3

Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

6 votes

@Override
public void prepare(PercolateContext context, ParsedDocument parsedDocument) {
    MemoryIndex memoryIndex = cache.get();
    for (IndexableField field : parsedDocument.rootDoc().getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }
        try {
            Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
            // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
            // like the indexer does
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
             }
        } catch (Exception e) {
            throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
        }
    }
    context.initialize(new DocEngineSearcher(memoryIndex), parsedDocument);
}

Example #4

Source File: ShardTermVectorsService.java From Elasticsearch with Apache License 2.0

6 votes

private Fields generateTermVectors(Collection<GetField> getFields, boolean withOffsets, @Nullable Map<String, String> perFieldAnalyzer, Set<String> fields)
        throws IOException {
    /* store document in memory index */
    MemoryIndex index = new MemoryIndex(withOffsets);
    for (GetField getField : getFields) {
        String field = getField.getName();
        if (fields.contains(field) == false) {
            // some fields are returned even when not asked for, eg. _timestamp
            continue;
        }
        Analyzer analyzer = getAnalyzerAtField(field, perFieldAnalyzer);
        for (Object text : getField.getValues()) {
            index.addField(field, text.toString(), analyzer);
        }
    }
    /* and read vectors from it */
    return MultiFields.getFields(index.createSearcher().getIndexReader());
}

Example #5

Source File: PercolatorService.java From Elasticsearch with Apache License 2.0

5 votes

@Inject
public PercolatorService(Settings settings, IndexNameExpressionResolver indexNameExpressionResolver, IndicesService indicesService,
                         PageCacheRecycler pageCacheRecycler, BigArrays bigArrays,
                         HighlightPhase highlightPhase, ClusterService clusterService,
                         AggregationPhase aggregationPhase, ScriptService scriptService,
                         MappingUpdatedAction mappingUpdatedAction) {
    super(settings);
    this.indexNameExpressionResolver = indexNameExpressionResolver;
    this.parseFieldMatcher = new ParseFieldMatcher(settings);
    this.indicesService = indicesService;
    this.pageCacheRecycler = pageCacheRecycler;
    this.bigArrays = bigArrays;
    this.clusterService = clusterService;
    this.highlightPhase = highlightPhase;
    this.aggregationPhase = aggregationPhase;
    this.scriptService = scriptService;
    this.mappingUpdatedAction = mappingUpdatedAction;
    this.sortParseElement = new SortParseElement();

    final long maxReuseBytes = settings.getAsBytesSize("indices.memory.memory_index.size_per_thread", new ByteSizeValue(1, ByteSizeUnit.MB)).bytes();
    cache = new CloseableThreadLocal<MemoryIndex>() {
        @Override
        protected MemoryIndex initialValue() {
            // TODO: should we expose payloads as an option? should offsets be turned on always?
            return new ExtendedMemoryIndex(true, false, maxReuseBytes);
        }
    };
    single = new SingleDocumentPercolatorIndex(cache);
    multi = new MultiDocumentPercolatorIndex(cache);

    percolatorTypes = new IntObjectHashMap<>(6);
    percolatorTypes.put(countPercolator.id(), countPercolator);
    percolatorTypes.put(queryCountPercolator.id(), queryCountPercolator);
    percolatorTypes.put(matchPercolator.id(), matchPercolator);
    percolatorTypes.put(queryPercolator.id(), queryPercolator);
    percolatorTypes.put(scoringPercolator.id(), scoringPercolator);
    percolatorTypes.put(topMatchingPercolator.id(), topMatchingPercolator);
}

Example #6

Source File: MemoryIndexOffsetStrategy.java From lucene-solr with Apache License 2.0

5 votes

public MemoryIndexOffsetStrategy(UHComponents components, Analyzer analyzer) {
  super(components, analyzer);
  boolean storePayloads = components.getPhraseHelper().hasPositionSensitivity(); // might be needed
  memoryIndex = new MemoryIndex(true, storePayloads);//true==store offsets
  memIndexLeafReader = (LeafReader) memoryIndex.createSearcher().getIndexReader(); // appears to be re-usable
  // preFilter for MemoryIndex
  preMemIndexFilterAutomaton = buildCombinedAutomaton(components);
}

Example #7

Source File: DocumentBatch.java From lucene-solr with Apache License 2.0

5 votes

private SingletonDocumentBatch(Analyzer analyzer, Document doc) {
  MemoryIndex memoryindex = new MemoryIndex(true, true);
  for (IndexableField field : doc) {
    memoryindex.addField(field, analyzer);
  }
  memoryindex.freeze();
  reader = (LeafReader) memoryindex.createSearcher().getIndexReader();
}

Example #8

Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0

5 votes

public void buildTermVector(int docid) throws IOException {
    /*

    */

    Set<String> fieldList = new HashSet<>();
    fieldList.add("content");

    Document doc = reader.document(docid, fieldList);
    MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
    IndexReader mr = mi.createSearcher().getIndexReader();

    Terms t = mr.leaves().get(0).reader().terms("content");

    if ((t != null) && (t.size()>0)) {
        TermsEnum te = t.iterator();
        BytesRef term = null;

        System.out.println(t.size());

        while ((term = te.next()) != null) {
            System.out.println("BytesRef: " + term.utf8ToString());
            System.out.println("docFreq: " + te.docFreq());
            System.out.println("totalTermFreq: " + te.totalTermFreq());

        }

    }
}

Example #9

Source File: TestTaggedQuery.java From solr-redis with Apache License 2.0

5 votes

@Test
public void testRewrite() throws IOException {
  MemoryIndex memoryIndex = new MemoryIndex();

  TaggedQuery taggedQuery = new TaggedQuery(new TermQuery(new Term("field", "value")), "tag");
  Query rewrittenQuery = taggedQuery.rewrite(memoryIndex.createSearcher().getTopReaderContext().reader());

  assertTrue(rewrittenQuery instanceof TermQuery);
  assertEquals("field", ((TermQuery) rewrittenQuery).getTerm().field());
  assertEquals("value", ((TermQuery) rewrittenQuery).getTerm().text());
}

Example #10

Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

MultiDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}

Example #11

Source File: MultiDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

private DocSearcher(IndexSearcher searcher, MemoryIndex rootDocMemoryIndex) {
    super("percolate", searcher);
    this.rootDocMemoryIndex = rootDocMemoryIndex;
}

Example #12

Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

SingleDocumentPercolatorIndex(CloseableThreadLocal<MemoryIndex> cache) {
    this.cache = cache;
}

Example #13

Source File: SingleDocumentPercolatorIndex.java From Elasticsearch with Apache License 2.0

4 votes

public DocEngineSearcher(MemoryIndex memoryIndex) {
    super("percolate", memoryIndex.createSearcher());
    this.memoryIndex = memoryIndex;
}

Example #14

Source File: TestMultipassPresearcher.java From lucene-solr with Apache License 2.0

4 votes

public void testQueryBuilder() throws IOException {

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Presearcher presearcher = createPresearcher();

    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };
    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_0", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_1", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_2", new BytesRef("test")))).build()))
                .add(must(new BooleanQuery.Builder().add(should(new TermInSetQuery("f_3", new BytesRef("test")))).build()))
                .build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);
      }

    }

  }

Example #15

Source File: TestTermPresearcher.java From lucene-solr with Apache License 2.0

4 votes

public void testQueryBuilder() throws IOException {

    Presearcher presearcher = createPresearcher();

    IndexWriterConfig iwc = new IndexWriterConfig(new KeywordAnalyzer());
    Directory dir = new ByteBuffersDirectory();
    IndexWriter writer = new IndexWriter(dir, iwc);
    MonitorConfiguration config = new MonitorConfiguration(){
      @Override
      public IndexWriter buildIndexWriter() {
        return writer;
      }
    };

    try (Monitor monitor = new Monitor(ANALYZER, presearcher, config)) {

      monitor.register(new MonitorQuery("1", parse("f:test")));

      try (IndexReader reader = DirectoryReader.open(writer, false, false)) {

        MemoryIndex mindex = new MemoryIndex();
        mindex.addField("f", "this is a test document", WHITESPACE);
        mindex.addField("g", "#######", ANALYZER); // analyzes away to empty field
        LeafReader docsReader = (LeafReader) mindex.createSearcher().getIndexReader();

        QueryIndex.QueryTermFilter termFilter = new QueryIndex.QueryTermFilter(reader);

        BooleanQuery q = (BooleanQuery) presearcher.buildQuery(docsReader, termFilter);
        BooleanQuery expected = new BooleanQuery.Builder()
            .add(should(new BooleanQuery.Builder()
                .add(should(new TermInSetQuery("f", new BytesRef("test")))).build()))
            .add(should(new TermQuery(new Term("__anytokenfield", "__ANYTOKEN__"))))
            .build();

        assertEquals(expected, q);

      }

    }

  }

Example #16

Source File: ExampleStatsApp.java From lucene4ir with Apache License 2.0

4 votes

public Map<String, Map<String, List<Integer>>> buildTermVectorWithPosition(int docid, Set<String> fields) throws IOException {

	    	Map<String, Map<String, List<Integer>>> fieldToTermVector = new HashMap<>();
	
	    	Document doc = reader.document(docid, fields);
	
	    	MemoryIndex mi = MemoryIndex.fromDocument(doc, new StandardAnalyzer());
	    	IndexReader mr = mi.createSearcher().getIndexReader();
	
	    	for (LeafReaderContext leafContext : mr.leaves()) {
	
	    		LeafReader leaf = leafContext.reader();
	
	    		for (String field : fields) {
	    			Map<String, List<Integer>> termToPositions = new HashMap<>();
	
	    			Terms t = leaf.terms(field);
	
	    			if(t != null) {
	    				fieldToTermVector.put(field, termToPositions);
	    				TermsEnum tenum = t.iterator();
	
	    				BytesRef termBytes = null;
	    				PostingsEnum postings = null;
	    				while ((termBytes = tenum.next()) != null) {
	
	    					List<Integer> positions = new ArrayList<>();
	    					termToPositions.put(termBytes.utf8ToString(), positions);
	    					postings = tenum.postings(postings);
	    					postings.advance(0);
	
	    					for (int i = 0; i < postings.freq(); i++) {
	    						positions.add(postings.nextPosition());
	    					}
	    				}
	    			}
	    		}
	
	    	}
	    	return fieldToTermVector;
    }