Java Code Examples for org.apache.lucene.search.Hits

The following examples show how to use org.apache.lucene.search.Hits. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: uyuni   Source File: NGramQueryParserTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
public Hits performSearch(Directory dir, String query, boolean useMust)
    throws Exception {

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), useMust);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    log.info("Original Query = " + query);
    log.info("Parsed Query = " + q.toString());
    log.info("Hits.length() = " + hits.length());
    for (int i=0; i < hits.length(); i++) {
        log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
        //Explanation explain = searcher.explain(q, hits.id(i));
        //log.debug("explain = " + explain.toString());
    }
    return hits;
}
 
Example 2
Source Project: uyuni   Source File: NGramTestSetup.java    License: GNU General Public License v2.0 6 votes vote down vote up
protected int thresholdHits(Hits hits) throws IOException {
    /** We could consider doing thresholding as a relative thing...
     * instead of checking against an absolute value, we grab top score
     * then filter based on difference from that...
     */
    int counter = 0;
    for (int i=0; i < hits.length(); i++) {
        if (hits.score(i) >= score_threshold) {
            counter++;
        }
        else {
            break;
        }
    }
    return counter;
}
 
Example 3
Source Project: uyuni   Source File: IndexManager.java    License: GNU General Public License v2.0 6 votes vote down vote up
private void debugExplainResults(String indexName, Hits hits, IndexSearcher searcher,
        Query q, Set<Term> queryTerms)
    throws IOException {
    log.debug("Parsed Query is " + q.toString());
    log.debug("Looking at index:  " + indexName);
    for (int i = 0; i < hits.length(); i++) {
        if ((i < 10)) {
            Document doc = hits.doc(i);
            Float score = hits.score(i);
            Explanation ex = searcher.explain(q, hits.id(i));
            log.debug("Looking at hit<" + i + ", " + hits.id(i) + ", " + score +
                    ">: " + doc);
            log.debug("Explanation: " + ex);
            MatchingField match = new MatchingField(q.toString(), doc, queryTerms);
            String fieldName = match.getFieldName();
            String fieldValue = match.getFieldValue();
            log.debug("Guessing that matched fieldName is " + fieldName + " = " +
                    fieldValue);
        }
    }
}
 
Example 4
Source Project: JPPF   Source File: CrawlerTask.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}
 
Example 5
Source Project: JPPF   Source File: CrawlerTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Test searching with Lucene.
 * @param search the Lucene query text.
 * @param max the maximum number of results to show.
 * @throws Exception if an error is thrown while executing.
 */
public static void luceneSearch(final String search, final int max) throws Exception {
  print("Searching for: " + search);
  print("  max results: " + max);

  final IndexSearcher is = new IndexSearcher(index);
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());

  final Query query = parser.parse(search);
  final Hits hits = is.search(query);

  print("    results: " + hits.length());

  for (int i = 0; i < Math.min(hits.length(), max); i++) {
    final float relevance = ((float) Math.round(hits.score(i) * 1000)) / 10;
    final String url = hits.doc(i).getField("url").stringValue();
    print("No " + (i + 1) + " with relevance " + relevance + "% : " + url);
  }

  is.close();
}
 
Example 6
public Hits performSearch(Directory dir, String query, boolean useMust)
    throws Exception {

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), useMust);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    log.info("Original Query = " + query);
    log.info("Parsed Query = " + q.toString());
    log.info("Hits.length() = " + hits.length());
    for (int i=0; i < hits.length(); i++) {
        log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
        //Explanation explain = searcher.explain(q, hits.id(i));
        //log.debug("explain = " + explain.toString());
    }
    return hits;
}
 
Example 7
Source Project: spacewalk   Source File: NGramTestSetup.java    License: GNU General Public License v2.0 6 votes vote down vote up
protected int thresholdHits(Hits hits) throws IOException {
    /** We could consider doing thresholding as a relative thing...
     * instead of checking against an absolute value, we grab top score
     * then filter based on difference from that...
     */
    int counter = 0;
    for (int i=0; i < hits.length(); i++) {
        if (hits.score(i) >= score_threshold) {
            counter++;
        }
        else {
            break;
        }
    }
    return counter;
}
 
Example 8
Source Project: spacewalk   Source File: IndexManager.java    License: GNU General Public License v2.0 6 votes vote down vote up
private void debugExplainResults(String indexName, Hits hits, IndexSearcher searcher,
        Query q, Set<Term> queryTerms)
    throws IOException {
    log.debug("Parsed Query is " + q.toString());
    log.debug("Looking at index:  " + indexName);
    for (int i = 0; i < hits.length(); i++) {
        if ((i < 10)) {
            Document doc = hits.doc(i);
            Float score = hits.score(i);
            Explanation ex = searcher.explain(q, hits.id(i));
            log.debug("Looking at hit<" + i + ", " + hits.id(i) + ", " + score +
                    ">: " + doc);
            log.debug("Explanation: " + ex);
            MatchingField match = new MatchingField(q.toString(), doc, queryTerms);
            String fieldName = match.getFieldName();
            String fieldValue = match.getFieldValue();
            log.debug("Guessing that matched fieldName is " + fieldName + " = " +
                    fieldValue);
        }
    }
}
 
Example 9
Source Project: RDFS   Source File: TestMixedDirectory.java    License: Apache License 2.0 6 votes vote down vote up
private void verify(Directory dir, int expectedHits) throws IOException {
  IndexSearcher searcher = new IndexSearcher(dir);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  int numHits = hits.length();

  assertEquals(expectedHits, numHits);

  int[] docs = new int[numHits];
  for (int i = 0; i < numHits; i++) {
    Document hit = hits.doc(i);
    docs[Integer.parseInt(hit.get("id"))]++;
  }
  for (int i = 0; i < numHits; i++) {
    assertEquals(1, docs[i]);
  }

  searcher.close();
}
 
Example 10
Source Project: hadoop-gpu   Source File: TestMixedDirectory.java    License: Apache License 2.0 6 votes vote down vote up
private void verify(Directory dir, int expectedHits) throws IOException {
  IndexSearcher searcher = new IndexSearcher(dir);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  int numHits = hits.length();

  assertEquals(expectedHits, numHits);

  int[] docs = new int[numHits];
  for (int i = 0; i < numHits; i++) {
    Document hit = hits.doc(i);
    docs[Integer.parseInt(hit.get("id"))]++;
  }
  for (int i = 0; i < numHits; i++) {
    assertEquals(1, docs[i]);
  }

  searcher.close();
}
 
Example 11
Source Project: uyuni   Source File: NGramQueryParserTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
public void testFreeFormSearch() throws Exception {
    Hits hits = null;
    String query = null;
    boolean useMust = true;
    // Grab all packages with name "spell" AND
    //  description does NOT contain "another"
    query = "name:spell -description:another";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt" AND
    //  description MUST have "factory" in it
    query = "name:virt +description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt"
    query = "name:virt description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt OR description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt AND description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 1);

    query = "name:virt -description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);
}
 
Example 12
Source Project: uyuni   Source File: NGramTestSetup.java    License: GNU General Public License v2.0 5 votes vote down vote up
public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
    QueryParser parser = new QueryParser("name", alyz);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    return hits;
}
 
Example 13
Source Project: uyuni   Source File: NGramTestSetup.java    License: GNU General Public License v2.0 5 votes vote down vote up
protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}
 
Example 14
/**
 * Wrap a lucene seach result with node support
 * 
 * @param hits Hits
 * @param searcher Searcher
 * @param nodeService nodeService
 * @param tenantService tenant service
 * @param searchParameters SearchParameters
 * @param config - lucene config
 */
public LuceneResultSet(Hits hits, Searcher searcher, NodeService nodeService, TenantService tenantService, SearchParameters searchParameters,
        LuceneConfig config)
{
    super();
    this.hits = hits;
    this.searcher = searcher;
    this.nodeService = nodeService;
    this.tenantService = tenantService;
    this.searchParameters = searchParameters;
    this.config = config;
    prefetch = new BitSet(hits.length());
}
 
Example 15
public void testFreeFormSearch() throws Exception {
    Hits hits = null;
    String query = null;
    boolean useMust = true;
    // Grab all packages with name "spell" AND
    //  description does NOT contain "another"
    query = "name:spell -description:another";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt" AND
    //  description MUST have "factory" in it
    query = "name:virt +description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt"
    query = "name:virt description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt OR description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt AND description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 1);

    query = "name:virt -description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);
}
 
Example 16
Source Project: spacewalk   Source File: NGramTestSetup.java    License: GNU General Public License v2.0 5 votes vote down vote up
public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
    QueryParser parser = new QueryParser("name", alyz);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    return hits;
}
 
Example 17
Source Project: spacewalk   Source File: NGramTestSetup.java    License: GNU General Public License v2.0 5 votes vote down vote up
protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}
 
Example 18
Source Project: RDFS   Source File: TestDistributionPolicy.java    License: Apache License 2.0 5 votes vote down vote up
private void verify(Shard[] shards) throws IOException {
  // verify the index
  IndexReader[] readers = new IndexReader[shards.length];
  for (int i = 0; i < shards.length; i++) {
    Directory dir =
        new FileSystemDirectory(fs, new Path(shards[i].getDirectory()),
            false, conf);
    readers[i] = IndexReader.open(dir);
  }

  IndexReader reader = new MultiReader(readers);
  IndexSearcher searcher = new IndexSearcher(reader);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  assertEquals(0, hits.length());

  hits = searcher.search(new TermQuery(new Term("content", "hadoop")));
  assertEquals(numDocsPerRun / 2, hits.length());

  int[] counts = new int[numDocsPerRun];
  for (int i = 0; i < hits.length(); i++) {
    Document doc = hits.doc(i);
    counts[Integer.parseInt(doc.get("id"))]++;
  }

  for (int i = 0; i < numDocsPerRun; i++) {
    if (i % 2 == 0) {
      assertEquals(0, counts[i]);
    } else {
      assertEquals(1, counts[i]);
    }
  }

  searcher.close();
  reader.close();
}
 
Example 19
Source Project: hadoop-gpu   Source File: TestDistributionPolicy.java    License: Apache License 2.0 5 votes vote down vote up
private void verify(Shard[] shards) throws IOException {
  // verify the index
  IndexReader[] readers = new IndexReader[shards.length];
  for (int i = 0; i < shards.length; i++) {
    Directory dir =
        new FileSystemDirectory(fs, new Path(shards[i].getDirectory()),
            false, conf);
    readers[i] = IndexReader.open(dir);
  }

  IndexReader reader = new MultiReader(readers);
  IndexSearcher searcher = new IndexSearcher(reader);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  assertEquals(0, hits.length());

  hits = searcher.search(new TermQuery(new Term("content", "hadoop")));
  assertEquals(numDocsPerRun / 2, hits.length());

  int[] counts = new int[numDocsPerRun];
  for (int i = 0; i < hits.length(); i++) {
    Document doc = hits.doc(i);
    counts[Integer.parseInt(doc.get("id"))]++;
  }

  for (int i = 0; i < numDocsPerRun; i++) {
    if (i % 2 == 0) {
      assertEquals(0, counts[i]);
    } else {
      assertEquals(1, counts[i]);
    }
  }

  searcher.close();
  reader.close();
}
 
Example 20
Source Project: uyuni   Source File: NGramQueryParserTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * */
public void testBasicSearch() throws Exception {
    Hits hits;
    String query;
    boolean useMust = false;
    query = "spell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 5);
    assertTrue(hits.length() == 16);

    query = "aspelll";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "aspell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "pel";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 8);
    assertTrue(hits.length() == 16);

    query = "gtk";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 7);
    assertTrue(hits.length() == 17);


    // We want a search for kernel-hugemem to return kernel-hugemem as top hit
    //   but currently, kernel-hugemem-devel is matchin instead.  This test
    //   is a placeholder as we explore ways to fix this.
    query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
        "(filename:kernel-hugemem))";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 3);
    assertTrue(hits.length() == 20);
    String firstHitName = hits.doc(0).get("name");
    assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}
 
Example 21
Source Project: uyuni   Source File: IndexManager.java    License: GNU General Public License v2.0 4 votes vote down vote up
private List<Result> processHits(String indexName, Hits hits, Set<Term> queryTerms,
        String query, String lang)
    throws IOException {
    List<Result> retval = new ArrayList<Result>();
    for (int x = 0; x < hits.length(); x++) {
        Document doc = hits.doc(x);
        Result pr = null;
        if (!isScoreAcceptable(indexName, hits, x, query)) {
            break;
        }
        if (indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) {
            pr = new DocResult(x, hits.score(x), doc);
            String summary = lookupDocSummary(doc, query, lang);
            if (summary != null) {
                ((DocResult)pr).setSummary(summary);
            }
        }
        else if (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0) {
            pr = new HardwareDeviceResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) {
            pr = new SnapshotTagResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) {
            pr = new ServerCustomInfoResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.XCCDF_IDENT_TYPE) == 0) {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("identifier").stringValue(),
                    hits.score(x));
        }
        else {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("name").stringValue(),
                    hits.score(x));
        }
        if (log.isDebugEnabled()) {
            log.debug("Hit[" + x + "] Score = " + hits.score(x) + ", Result = " + pr);
        }
        /**
         * matchingField will help the webUI to understand what field was responsible
         * for this match.  Later implementation should use "Explanation" to determine
         * field, for now we will simply grab one term and return it's field.
         */
        try {
            MatchingField match = new MatchingField(query, doc, queryTerms);
            pr.setMatchingField(match.getFieldName());
            pr.setMatchingFieldValue(match.getFieldValue());
            log.info("hit[" + x + "] matchingField is being set to: <" +
                pr.getMatchingField() + "> based on passed in query field.  " +
                "matchingFieldValue = " + pr.getMatchingFieldValue());
        }
        catch (Exception e) {
            log.error("Caught exception: ", e);
        }
        if (pr != null) {
            retval.add(pr);
        }
        if (maxHits > 0 && x == maxHits) {
            break;
        }
    }
    return retval;
}
 
Example 22
Source Project: uyuni   Source File: IndexManager.java    License: GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * @param indexName
 * @param hits
 * @param x
 * @param query
 * @return  true - score is acceptable
 *          false - score is NOT acceptable
 * @throws IOException
 */
private boolean isScoreAcceptable(String indexName, Hits hits, int x, String queryIn)
    throws IOException {
    String guessMainQueryTerm = MatchingField.getFirstFieldName(queryIn);

    if ((indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) &&
            (!filterDocResults)) {
        return true;
    }
    /**
     * Dropping matches which are a poor fit.
     * system searches are filtered based on "system_score_threshold"
     * other searches will return 10 best matches, then filter anything below
     * "score_threshold"
     */
    if ((indexName.compareTo(BuilderFactory.SERVER_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) ||
            (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0)) {
        if (hits.score(x) < system_score_threshold) {
            if (log.isDebugEnabled()) {
                log.debug("hits.score(" + x + ") is " + hits.score(x));
                log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "system_score_threshold = " + system_score_threshold);
            }
            return false;
        }
    }
    else if (indexName.compareTo(BuilderFactory.ERRATA_TYPE) == 0) {
        if (guessMainQueryTerm.compareTo("name") == 0) {
            if (hits.score(x) < errata_advisory_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_advisory_score_threshold = " +
                        errata_advisory_score_threshold);
                }
                return false;
            }
        }
        else {
            if (hits.score(x) < errata_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_score_threshold = " +
                        errata_score_threshold);
                }
                return false;
            }
        }
    }
    else if (((hits.score(x) < score_threshold) && (x > 10)) ||
            (hits.score(x) < 0.001)) {
        /**
         * Dropping matches which are a poor fit.
         * First term is configurable, it allows matches like spelling errors or
         * suggestions to be possible.
         * Second term is intended to get rid of pure and utter crap hits
         */
        if (log.isDebugEnabled()) {
            log.debug("hits.score(" + x + ") is " + hits.score(x));
            log.debug("Filtering out search results from " + x + " to " +
                    hits.length() + ", due to their score being below " +
                    "score_threshold = " + score_threshold);
        }
        return false;
    }
    return true;
}
 
Example 23
public  ArrayList<String> searchType(String s, double thres1, double thres2, int k) throws Exception
{		
	Hits hits = null;
	String queryString = null;
	Query query = null;
	
	IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index");
	
	ArrayList<String> typeNames = new ArrayList<String>(); 
	
	//String[] array = s.split(" ");
	//queryString = array[array.length-1];
	queryString = s;

	Analyzer analyzer = new StandardAnalyzer();
	try {
		QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer);
		query = qp.parse(queryString);
	} catch (ParseException e) {
		e.printStackTrace();
	}
	
	if (searcher != null) {
		hits = searcher.search(query);
		
		System.out.println("find " + hits.length() + " answars!");
		if (hits.length() > 0) {
			for (int i=0; i<hits.length(); i++) {
				if (i < k) {
					System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    if(hits.score(i) >= thres1){
				    	System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    	typeNames.add(hits.doc(i).get("TypeShortName"));
				    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
				    }
				    else {
				    	//break;
				    }
				}
				else {
				    if(hits.score(i) >= thres2){
				    	System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    	typeNames.add(hits.doc(i).get("TypeShortName"));
				    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
				    }
				    else {
				    	break;
				    }						
				}
			}				
		}
	}		
	return typeNames;	
}
 
Example 24
/**
 *
 * */
public void testBasicSearch() throws Exception {
    Hits hits;
    String query;
    boolean useMust = false;
    query = "spell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 5);
    assertTrue(hits.length() == 16);

    query = "aspelll";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "aspell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "pel";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 8);
    assertTrue(hits.length() == 16);

    query = "gtk";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 7);
    assertTrue(hits.length() == 17);


    // We want a search for kernel-hugemem to return kernel-hugemem as top hit
    //   but currently, kernel-hugemem-devel is matchin instead.  This test
    //   is a placeholder as we explore ways to fix this.
    query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
        "(filename:kernel-hugemem))";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 3);
    assertTrue(hits.length() == 20);
    String firstHitName = hits.doc(0).get("name");
    assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}
 
Example 25
Source Project: spacewalk   Source File: IndexManager.java    License: GNU General Public License v2.0 4 votes vote down vote up
private List<Result> processHits(String indexName, Hits hits, Set<Term> queryTerms,
        String query, String lang)
    throws IOException {
    List<Result> retval = new ArrayList<Result>();
    for (int x = 0; x < hits.length(); x++) {
        Document doc = hits.doc(x);
        Result pr = null;
        if (!isScoreAcceptable(indexName, hits, x, query)) {
            break;
        }
        if (indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) {
            pr = new DocResult(x, hits.score(x), doc);
            String summary = lookupDocSummary(doc, query, lang);
            if (summary != null) {
                ((DocResult)pr).setSummary(summary);
            }
        }
        else if (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0) {
            pr = new HardwareDeviceResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) {
            pr = new SnapshotTagResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) {
            pr = new ServerCustomInfoResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.XCCDF_IDENT_TYPE) == 0) {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("identifier").stringValue(),
                    hits.score(x));
        }
        else {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("name").stringValue(),
                    hits.score(x));
        }
        if (log.isDebugEnabled()) {
            log.debug("Hit[" + x + "] Score = " + hits.score(x) + ", Result = " + pr);
        }
        /**
         * matchingField will help the webUI to understand what field was responsible
         * for this match.  Later implementation should use "Explanation" to determine
         * field, for now we will simply grab one term and return it's field.
         */
        try {
            MatchingField match = new MatchingField(query, doc, queryTerms);
            pr.setMatchingField(match.getFieldName());
            pr.setMatchingFieldValue(match.getFieldValue());
            log.info("hit[" + x + "] matchingField is being set to: <" +
                pr.getMatchingField() + "> based on passed in query field.  " +
                "matchingFieldValue = " + pr.getMatchingFieldValue());
        }
        catch (Exception e) {
            log.error("Caught exception: ", e);
        }
        if (pr != null) {
            retval.add(pr);
        }
        if (maxHits > 0 && x == maxHits) {
            break;
        }
    }
    return retval;
}
 
Example 26
Source Project: spacewalk   Source File: IndexManager.java    License: GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * @param indexName
 * @param hits
 * @param x
 * @param query
 * @return  true - score is acceptable
 *          false - score is NOT acceptable
 * @throws IOException
 */
private boolean isScoreAcceptable(String indexName, Hits hits, int x, String queryIn)
    throws IOException {
    String guessMainQueryTerm = MatchingField.getFirstFieldName(queryIn);

    if ((indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) &&
            (!filterDocResults)) {
        return true;
    }
    /**
     * Dropping matches which are a poor fit.
     * system searches are filtered based on "system_score_threshold"
     * other searches will return 10 best matches, then filter anything below
     * "score_threshold"
     */
    if ((indexName.compareTo(BuilderFactory.SERVER_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) ||
            (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0)) {
        if (hits.score(x) < system_score_threshold) {
            if (log.isDebugEnabled()) {
                log.debug("hits.score(" + x + ") is " + hits.score(x));
                log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "system_score_threshold = " + system_score_threshold);
            }
            return false;
        }
    }
    else if (indexName.compareTo(BuilderFactory.ERRATA_TYPE) == 0) {
        if (guessMainQueryTerm.compareTo("name") == 0) {
            if (hits.score(x) < errata_advisory_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_advisory_score_threshold = " +
                        errata_advisory_score_threshold);
                }
                return false;
            }
        }
        else {
            if (hits.score(x) < errata_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_score_threshold = " +
                        errata_score_threshold);
                }
                return false;
            }
        }
    }
    else if (((hits.score(x) < score_threshold) && (x > 10)) ||
            (hits.score(x) < 0.001)) {
        /**
         * Dropping matches which are a poor fit.
         * First term is configurable, it allows matches like spelling errors or
         * suggestions to be possible.
         * Second term is intended to get rid of pure and utter crap hits
         */
        if (log.isDebugEnabled()) {
            log.debug("hits.score(" + x + ") is " + hits.score(x));
            log.debug("Filtering out search results from " + x + " to " +
                    hits.length() + ", due to their score being below " +
                    "score_threshold = " + score_threshold);
        }
        return false;
    }
    return true;
}
 
Example 27
Source Project: coming   Source File: 387581_IndexTaskTest_0_t.java    License: MIT License 3 votes vote down vote up
public void testSearch() throws Exception {
   Query query = new QueryParser("contents",analyzer).parse("test");
 
    Hits hits = searcher.search(query);
 
    assertEquals("Find document(s)", 2, hits.length());
}
 
Example 28
Source Project: coming   Source File: 387581_IndexTaskTest_0_s.java    License: MIT License 3 votes vote down vote up
public void testSearch() throws Exception {
   Query query = QueryParser.parse("test", "contents", analyzer);
 
    Hits hits = searcher.search(query);
 
    assertEquals("Find document(s)", 2, hits.length());
}