org.apache.lucene.search.Hits Java Examples

The following examples show how to use org.apache.lucene.search.Hits. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: NGramQueryParserTest.java    From uyuni with GNU General Public License v2.0 6 votes vote down vote up
public Hits performSearch(Directory dir, String query, boolean useMust)
    throws Exception {

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), useMust);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    log.info("Original Query = " + query);
    log.info("Parsed Query = " + q.toString());
    log.info("Hits.length() = " + hits.length());
    for (int i=0; i < hits.length(); i++) {
        log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
        //Explanation explain = searcher.explain(q, hits.id(i));
        //log.debug("explain = " + explain.toString());
    }
    return hits;
}
 
Example #2
Source File: TestMixedDirectory.java    From hadoop-gpu with Apache License 2.0 6 votes vote down vote up
private void verify(Directory dir, int expectedHits) throws IOException {
  IndexSearcher searcher = new IndexSearcher(dir);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  int numHits = hits.length();

  assertEquals(expectedHits, numHits);

  int[] docs = new int[numHits];
  for (int i = 0; i < numHits; i++) {
    Document hit = hits.doc(i);
    docs[Integer.parseInt(hit.get("id"))]++;
  }
  for (int i = 0; i < numHits; i++) {
    assertEquals(1, docs[i]);
  }

  searcher.close();
}
 
Example #3
Source File: NGramTestSetup.java    From uyuni with GNU General Public License v2.0 6 votes vote down vote up
protected int thresholdHits(Hits hits) throws IOException {
    /** We could consider doing thresholding as a relative thing...
     * instead of checking against an absolute value, we grab top score
     * then filter based on difference from that...
     */
    int counter = 0;
    for (int i=0; i < hits.length(); i++) {
        if (hits.score(i) >= score_threshold) {
            counter++;
        }
        else {
            break;
        }
    }
    return counter;
}
 
Example #4
Source File: TestMixedDirectory.java    From RDFS with Apache License 2.0 6 votes vote down vote up
private void verify(Directory dir, int expectedHits) throws IOException {
  IndexSearcher searcher = new IndexSearcher(dir);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  int numHits = hits.length();

  assertEquals(expectedHits, numHits);

  int[] docs = new int[numHits];
  for (int i = 0; i < numHits; i++) {
    Document hit = hits.doc(i);
    docs[Integer.parseInt(hit.get("id"))]++;
  }
  for (int i = 0; i < numHits; i++) {
    assertEquals(1, docs[i]);
  }

  searcher.close();
}
 
Example #5
Source File: IndexManager.java    From spacewalk with GNU General Public License v2.0 6 votes vote down vote up
private void debugExplainResults(String indexName, Hits hits, IndexSearcher searcher,
        Query q, Set<Term> queryTerms)
    throws IOException {
    log.debug("Parsed Query is " + q.toString());
    log.debug("Looking at index:  " + indexName);
    for (int i = 0; i < hits.length(); i++) {
        if ((i < 10)) {
            Document doc = hits.doc(i);
            Float score = hits.score(i);
            Explanation ex = searcher.explain(q, hits.id(i));
            log.debug("Looking at hit<" + i + ", " + hits.id(i) + ", " + score +
                    ">: " + doc);
            log.debug("Explanation: " + ex);
            MatchingField match = new MatchingField(q.toString(), doc, queryTerms);
            String fieldName = match.getFieldName();
            String fieldValue = match.getFieldValue();
            log.debug("Guessing that matched fieldName is " + fieldName + " = " +
                    fieldValue);
        }
    }
}
 
Example #6
Source File: CrawlerTask.java    From JPPF with Apache License 2.0 6 votes vote down vote up
/**
 * Search for the user-specified query expression in the current page.
 * @throws Exception if an error occurs.
 */
private void search() throws Exception {
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
  final Query q = parser.parse(query);

  final MemoryIndex index = new MemoryIndex();
  final Link link = new Link(url);
  final PageData pageData = new SimpleHttpClientParser().load(link);
  index.addField("contents", pageData.getData().toString(), new StandardAnalyzer());
  final IndexSearcher searcher = index.createSearcher();
  final Hits hits = searcher.search(q);
  @SuppressWarnings("rawtypes")
  final Iterator it = hits.iterator();
  float relevance = 0f;
  if (it.hasNext()) {
    while (it.hasNext()) {
      final Hit hit = (Hit) it.next();
      relevance += ((float) Math.round(hit.getScore() * 1000)) / 10;
    }
    matchedLinks.add(new LinkMatch(url, relevance));
  }
}
 
Example #7
Source File: CrawlerTest.java    From JPPF with Apache License 2.0 6 votes vote down vote up
/**
 * Test searching with Lucene.
 * @param search the Lucene query text.
 * @param max the maximum number of results to show.
 * @throws Exception if an error is thrown while executing.
 */
public static void luceneSearch(final String search, final int max) throws Exception {
  print("Searching for: " + search);
  print("  max results: " + max);

  final IndexSearcher is = new IndexSearcher(index);
  final QueryParser parser = new QueryParser("contents", new StandardAnalyzer());

  final Query query = parser.parse(search);
  final Hits hits = is.search(query);

  print("    results: " + hits.length());

  for (int i = 0; i < Math.min(hits.length(), max); i++) {
    final float relevance = ((float) Math.round(hits.score(i) * 1000)) / 10;
    final String url = hits.doc(i).getField("url").stringValue();
    print("No " + (i + 1) + " with relevance " + relevance + "% : " + url);
  }

  is.close();
}
 
Example #8
Source File: NGramTestSetup.java    From spacewalk with GNU General Public License v2.0 6 votes vote down vote up
protected int thresholdHits(Hits hits) throws IOException {
    /** We could consider doing thresholding as a relative thing...
     * instead of checking against an absolute value, we grab top score
     * then filter based on difference from that...
     */
    int counter = 0;
    for (int i=0; i < hits.length(); i++) {
        if (hits.score(i) >= score_threshold) {
            counter++;
        }
        else {
            break;
        }
    }
    return counter;
}
 
Example #9
Source File: NGramQueryParserTest.java    From spacewalk with GNU General Public License v2.0 6 votes vote down vote up
public Hits performSearch(Directory dir, String query, boolean useMust)
    throws Exception {

    NGramQueryParser parser = new NGramQueryParser("name",
            new NGramAnalyzer(min_ngram, max_ngram), useMust);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    log.info("Original Query = " + query);
    log.info("Parsed Query = " + q.toString());
    log.info("Hits.length() = " + hits.length());
    for (int i=0; i < hits.length(); i++) {
        log.debug("Document<"+hits.id(i)+"> = " + hits.doc(i));
        //Explanation explain = searcher.explain(q, hits.id(i));
        //log.debug("explain = " + explain.toString());
    }
    return hits;
}
 
Example #10
Source File: NGramQueryParserTest.java    From uyuni with GNU General Public License v2.0 5 votes vote down vote up
public void testFreeFormSearch() throws Exception {
    Hits hits = null;
    String query = null;
    boolean useMust = true;
    // Grab all packages with name "spell" AND
    //  description does NOT contain "another"
    query = "name:spell -description:another";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt" AND
    //  description MUST have "factory" in it
    query = "name:virt +description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt"
    query = "name:virt description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt OR description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt AND description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 1);

    query = "name:virt -description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);
}
 
Example #11
Source File: TestDistributionPolicy.java    From hadoop-gpu with Apache License 2.0 5 votes vote down vote up
private void verify(Shard[] shards) throws IOException {
  // verify the index
  IndexReader[] readers = new IndexReader[shards.length];
  for (int i = 0; i < shards.length; i++) {
    Directory dir =
        new FileSystemDirectory(fs, new Path(shards[i].getDirectory()),
            false, conf);
    readers[i] = IndexReader.open(dir);
  }

  IndexReader reader = new MultiReader(readers);
  IndexSearcher searcher = new IndexSearcher(reader);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  assertEquals(0, hits.length());

  hits = searcher.search(new TermQuery(new Term("content", "hadoop")));
  assertEquals(numDocsPerRun / 2, hits.length());

  int[] counts = new int[numDocsPerRun];
  for (int i = 0; i < hits.length(); i++) {
    Document doc = hits.doc(i);
    counts[Integer.parseInt(doc.get("id"))]++;
  }

  for (int i = 0; i < numDocsPerRun; i++) {
    if (i % 2 == 0) {
      assertEquals(0, counts[i]);
    } else {
      assertEquals(1, counts[i]);
    }
  }

  searcher.close();
  reader.close();
}
 
Example #12
Source File: TestDistributionPolicy.java    From RDFS with Apache License 2.0 5 votes vote down vote up
private void verify(Shard[] shards) throws IOException {
  // verify the index
  IndexReader[] readers = new IndexReader[shards.length];
  for (int i = 0; i < shards.length; i++) {
    Directory dir =
        new FileSystemDirectory(fs, new Path(shards[i].getDirectory()),
            false, conf);
    readers[i] = IndexReader.open(dir);
  }

  IndexReader reader = new MultiReader(readers);
  IndexSearcher searcher = new IndexSearcher(reader);
  Hits hits = searcher.search(new TermQuery(new Term("content", "apache")));
  assertEquals(0, hits.length());

  hits = searcher.search(new TermQuery(new Term("content", "hadoop")));
  assertEquals(numDocsPerRun / 2, hits.length());

  int[] counts = new int[numDocsPerRun];
  for (int i = 0; i < hits.length(); i++) {
    Document doc = hits.doc(i);
    counts[Integer.parseInt(doc.get("id"))]++;
  }

  for (int i = 0; i < numDocsPerRun; i++) {
    if (i % 2 == 0) {
      assertEquals(0, counts[i]);
    } else {
      assertEquals(1, counts[i]);
    }
  }

  searcher.close();
  reader.close();
}
 
Example #13
Source File: NGramTestSetup.java    From spacewalk with GNU General Public License v2.0 5 votes vote down vote up
protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}
 
Example #14
Source File: NGramTestSetup.java    From spacewalk with GNU General Public License v2.0 5 votes vote down vote up
public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
    QueryParser parser = new QueryParser("name", alyz);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    return hits;
}
 
Example #15
Source File: NGramQueryParserTest.java    From spacewalk with GNU General Public License v2.0 5 votes vote down vote up
public void testFreeFormSearch() throws Exception {
    Hits hits = null;
    String query = null;
    boolean useMust = true;
    // Grab all packages with name "spell" AND
    //  description does NOT contain "another"
    query = "name:spell -description:another";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt" AND
    //  description MUST have "factory" in it
    query = "name:virt +description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);

    // Grab all packages with name "virt"
    query = "name:virt description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt OR description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 4);

    query = "name:virt AND description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 1);

    query = "name:virt -description:factory";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(hits.length() == 2);
}
 
Example #16
Source File: LuceneResultSet.java    From alfresco-repository with GNU Lesser General Public License v3.0 5 votes vote down vote up
/**
 * Wrap a lucene seach result with node support
 * 
 * @param hits Hits
 * @param searcher Searcher
 * @param nodeService nodeService
 * @param tenantService tenant service
 * @param searchParameters SearchParameters
 * @param config - lucene config
 */
public LuceneResultSet(Hits hits, Searcher searcher, NodeService nodeService, TenantService tenantService, SearchParameters searchParameters,
        LuceneConfig config)
{
    super();
    this.hits = hits;
    this.searcher = searcher;
    this.nodeService = nodeService;
    this.tenantService = tenantService;
    this.searchParameters = searchParameters;
    this.config = config;
    prefetch = new BitSet(hits.length());
}
 
Example #17
Source File: IndexManager.java    From uyuni with GNU General Public License v2.0 5 votes vote down vote up
private void debugExplainResults(String indexName, Hits hits, IndexSearcher searcher,
        Query q, Set<Term> queryTerms)
    throws IOException {
    log.debug("Parsed Query is " + q.toString());
    log.debug("Looking at index:  " + indexName);
    for (int i = 0; i < hits.length(); i++) {
        if ((i < 10)) {
            Document doc = hits.doc(i);
            Float score = hits.score(i);
            Explanation ex = searcher.explain(q, hits.id(i));
            log.debug("Looking at hit<" + i + ", " + hits.id(i) + ", " + score +
                    ">: " + doc);
            log.debug("Explanation: " + ex);
            MatchingField match = new MatchingField(q.toString(), doc, queryTerms);
            String fieldName = match.getFieldName();
            String fieldValue = match.getFieldValue();
            log.debug("Guessing that matched fieldName is " + fieldName + " = " +
                    fieldValue);
        }
    }
}
 
Example #18
Source File: NGramTestSetup.java    From uyuni with GNU General Public License v2.0 5 votes vote down vote up
protected void displayHits(Hits hits) throws IOException {
    for (int i = 0; i < hits.length(); i++) {
        Document doc = hits.doc(i);
        String name = doc.get("name");
        String description = doc.get("description");
        log.info("Hit<" + i + "> Score< " + hits.score(i) + ">  name = <" +
                name + "> description = <" + description + ">");
    }
}
 
Example #19
Source File: NGramTestSetup.java    From uyuni with GNU General Public License v2.0 5 votes vote down vote up
public Hits performSearch(Directory dir, Analyzer alyz, String query) throws Exception {
    QueryParser parser = new QueryParser("name", alyz);
    IndexSearcher searcher = new IndexSearcher(dir);
    Query q = parser.parse(query);
    Hits hits = searcher.search(q);
    return hits;
}
 
Example #20
Source File: NGramQueryParserTest.java    From spacewalk with GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * */
public void testBasicSearch() throws Exception {
    Hits hits;
    String query;
    boolean useMust = false;
    query = "spell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 5);
    assertTrue(hits.length() == 16);

    query = "aspelll";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "aspell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "pel";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 8);
    assertTrue(hits.length() == 16);

    query = "gtk";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 7);
    assertTrue(hits.length() == 17);


    // We want a search for kernel-hugemem to return kernel-hugemem as top hit
    //   but currently, kernel-hugemem-devel is matchin instead.  This test
    //   is a placeholder as we explore ways to fix this.
    query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
        "(filename:kernel-hugemem))";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 3);
    assertTrue(hits.length() == 20);
    String firstHitName = hits.doc(0).get("name");
    assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}
 
Example #21
Source File: SearchInTypeShortName.java    From gAnswer with BSD 3-Clause "New" or "Revised" License 4 votes vote down vote up
public  ArrayList<String> searchType(String s, double thres1, double thres2, int k) throws Exception
{		
	Hits hits = null;
	String queryString = null;
	Query query = null;
	
	IndexSearcher searcher = new IndexSearcher(Globals.localPath+"data/DBpedia2016/lucene/type_fragment_index");
	
	ArrayList<String> typeNames = new ArrayList<String>(); 
	
	//String[] array = s.split(" ");
	//queryString = array[array.length-1];
	queryString = s;

	Analyzer analyzer = new StandardAnalyzer();
	try {
		QueryParser qp = new QueryParser("SplittedTypeShortName", analyzer);
		query = qp.parse(queryString);
	} catch (ParseException e) {
		e.printStackTrace();
	}
	
	if (searcher != null) {
		hits = searcher.search(query);
		
		System.out.println("find " + hits.length() + " answars!");
		if (hits.length() > 0) {
			for (int i=0; i<hits.length(); i++) {
				if (i < k) {
					System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    if(hits.score(i) >= thres1){
				    	System.out.println("Score>=thres1("+thres1+") ---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    	typeNames.add(hits.doc(i).get("TypeShortName"));
				    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
				    }
				    else {
				    	//break;
				    }
				}
				else {
				    if(hits.score(i) >= thres2){
				    	System.out.println("<<<<---" + hits.doc(i).get("TypeShortName") + " : " + hits.score(i));
				    	typeNames.add(hits.doc(i).get("TypeShortName"));
				    	//if (satisfiedStrictly(hits.doc(i).get("SplittedTypeShortName"), queryString)) typeNames.add(hits.doc(i).get("TypeShortName"));
				    }
				    else {
				    	break;
				    }						
				}
			}				
		}
	}		
	return typeNames;	
}
 
Example #22
Source File: IndexManager.java    From spacewalk with GNU General Public License v2.0 4 votes vote down vote up
private List<Result> processHits(String indexName, Hits hits, Set<Term> queryTerms,
        String query, String lang)
    throws IOException {
    List<Result> retval = new ArrayList<Result>();
    for (int x = 0; x < hits.length(); x++) {
        Document doc = hits.doc(x);
        Result pr = null;
        if (!isScoreAcceptable(indexName, hits, x, query)) {
            break;
        }
        if (indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) {
            pr = new DocResult(x, hits.score(x), doc);
            String summary = lookupDocSummary(doc, query, lang);
            if (summary != null) {
                ((DocResult)pr).setSummary(summary);
            }
        }
        else if (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0) {
            pr = new HardwareDeviceResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) {
            pr = new SnapshotTagResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) {
            pr = new ServerCustomInfoResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.XCCDF_IDENT_TYPE) == 0) {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("identifier").stringValue(),
                    hits.score(x));
        }
        else {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("name").stringValue(),
                    hits.score(x));
        }
        if (log.isDebugEnabled()) {
            log.debug("Hit[" + x + "] Score = " + hits.score(x) + ", Result = " + pr);
        }
        /**
         * matchingField will help the webUI to understand what field was responsible
         * for this match.  Later implementation should use "Explanation" to determine
         * field, for now we will simply grab one term and return it's field.
         */
        try {
            MatchingField match = new MatchingField(query, doc, queryTerms);
            pr.setMatchingField(match.getFieldName());
            pr.setMatchingFieldValue(match.getFieldValue());
            log.info("hit[" + x + "] matchingField is being set to: <" +
                pr.getMatchingField() + "> based on passed in query field.  " +
                "matchingFieldValue = " + pr.getMatchingFieldValue());
        }
        catch (Exception e) {
            log.error("Caught exception: ", e);
        }
        if (pr != null) {
            retval.add(pr);
        }
        if (maxHits > 0 && x == maxHits) {
            break;
        }
    }
    return retval;
}
 
Example #23
Source File: IndexManager.java    From spacewalk with GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * @param indexName
 * @param hits
 * @param x
 * @param query
 * @return  true - score is acceptable
 *          false - score is NOT acceptable
 * @throws IOException
 */
private boolean isScoreAcceptable(String indexName, Hits hits, int x, String queryIn)
    throws IOException {
    String guessMainQueryTerm = MatchingField.getFirstFieldName(queryIn);

    if ((indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) &&
            (!filterDocResults)) {
        return true;
    }
    /**
     * Dropping matches which are a poor fit.
     * system searches are filtered based on "system_score_threshold"
     * other searches will return 10 best matches, then filter anything below
     * "score_threshold"
     */
    if ((indexName.compareTo(BuilderFactory.SERVER_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) ||
            (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0)) {
        if (hits.score(x) < system_score_threshold) {
            if (log.isDebugEnabled()) {
                log.debug("hits.score(" + x + ") is " + hits.score(x));
                log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "system_score_threshold = " + system_score_threshold);
            }
            return false;
        }
    }
    else if (indexName.compareTo(BuilderFactory.ERRATA_TYPE) == 0) {
        if (guessMainQueryTerm.compareTo("name") == 0) {
            if (hits.score(x) < errata_advisory_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_advisory_score_threshold = " +
                        errata_advisory_score_threshold);
                }
                return false;
            }
        }
        else {
            if (hits.score(x) < errata_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_score_threshold = " +
                        errata_score_threshold);
                }
                return false;
            }
        }
    }
    else if (((hits.score(x) < score_threshold) && (x > 10)) ||
            (hits.score(x) < 0.001)) {
        /**
         * Dropping matches which are a poor fit.
         * First term is configurable, it allows matches like spelling errors or
         * suggestions to be possible.
         * Second term is intended to get rid of pure and utter crap hits
         */
        if (log.isDebugEnabled()) {
            log.debug("hits.score(" + x + ") is " + hits.score(x));
            log.debug("Filtering out search results from " + x + " to " +
                    hits.length() + ", due to their score being below " +
                    "score_threshold = " + score_threshold);
        }
        return false;
    }
    return true;
}
 
Example #24
Source File: IndexManager.java    From uyuni with GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * @param indexName
 * @param hits
 * @param x
 * @param query
 * @return  true - score is acceptable
 *          false - score is NOT acceptable
 * @throws IOException
 */
private boolean isScoreAcceptable(String indexName, Hits hits, int x, String queryIn)
    throws IOException {
    String guessMainQueryTerm = MatchingField.getFirstFieldName(queryIn);

    if ((indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) &&
            (!filterDocResults)) {
        return true;
    }
    /**
     * Dropping matches which are a poor fit.
     * system searches are filtered based on "system_score_threshold"
     * other searches will return 10 best matches, then filter anything below
     * "score_threshold"
     */
    if ((indexName.compareTo(BuilderFactory.SERVER_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) ||
            (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) ||
            (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0)) {
        if (hits.score(x) < system_score_threshold) {
            if (log.isDebugEnabled()) {
                log.debug("hits.score(" + x + ") is " + hits.score(x));
                log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "system_score_threshold = " + system_score_threshold);
            }
            return false;
        }
    }
    else if (indexName.compareTo(BuilderFactory.ERRATA_TYPE) == 0) {
        if (guessMainQueryTerm.compareTo("name") == 0) {
            if (hits.score(x) < errata_advisory_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_advisory_score_threshold = " +
                        errata_advisory_score_threshold);
                }
                return false;
            }
        }
        else {
            if (hits.score(x) < errata_score_threshold) {
                if (log.isDebugEnabled()) {
                    log.debug("hits.score(" + x + ") is " + hits.score(x));
                    log.debug("Filtering out search results from " + x + " to " +
                        hits.length() + ", due to their score being below " +
                        "errata_score_threshold = " +
                        errata_score_threshold);
                }
                return false;
            }
        }
    }
    else if (((hits.score(x) < score_threshold) && (x > 10)) ||
            (hits.score(x) < 0.001)) {
        /**
         * Dropping matches which are a poor fit.
         * First term is configurable, it allows matches like spelling errors or
         * suggestions to be possible.
         * Second term is intended to get rid of pure and utter crap hits
         */
        if (log.isDebugEnabled()) {
            log.debug("hits.score(" + x + ") is " + hits.score(x));
            log.debug("Filtering out search results from " + x + " to " +
                    hits.length() + ", due to their score being below " +
                    "score_threshold = " + score_threshold);
        }
        return false;
    }
    return true;
}
 
Example #25
Source File: IndexManager.java    From uyuni with GNU General Public License v2.0 4 votes vote down vote up
private List<Result> processHits(String indexName, Hits hits, Set<Term> queryTerms,
        String query, String lang)
    throws IOException {
    List<Result> retval = new ArrayList<Result>();
    for (int x = 0; x < hits.length(); x++) {
        Document doc = hits.doc(x);
        Result pr = null;
        if (!isScoreAcceptable(indexName, hits, x, query)) {
            break;
        }
        if (indexName.compareTo(BuilderFactory.DOCS_TYPE) == 0) {
            pr = new DocResult(x, hits.score(x), doc);
            String summary = lookupDocSummary(doc, query, lang);
            if (summary != null) {
                ((DocResult)pr).setSummary(summary);
            }
        }
        else if (indexName.compareTo(BuilderFactory.HARDWARE_DEVICE_TYPE) == 0) {
            pr = new HardwareDeviceResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SNAPSHOT_TAG_TYPE)  == 0) {
            pr = new SnapshotTagResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.SERVER_CUSTOM_INFO_TYPE) == 0) {
            pr = new ServerCustomInfoResult(x, hits.score(x), doc);
        }
        else if (indexName.compareTo(BuilderFactory.XCCDF_IDENT_TYPE) == 0) {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("identifier").stringValue(),
                    hits.score(x));
        }
        else {
            pr = new Result(x,
                    doc.getField("id").stringValue(),
                    doc.getField("name").stringValue(),
                    hits.score(x));
        }
        if (log.isDebugEnabled()) {
            log.debug("Hit[" + x + "] Score = " + hits.score(x) + ", Result = " + pr);
        }
        /**
         * matchingField will help the webUI to understand what field was responsible
         * for this match.  Later implementation should use "Explanation" to determine
         * field, for now we will simply grab one term and return it's field.
         */
        try {
            MatchingField match = new MatchingField(query, doc, queryTerms);
            pr.setMatchingField(match.getFieldName());
            pr.setMatchingFieldValue(match.getFieldValue());
            log.info("hit[" + x + "] matchingField is being set to: <" +
                pr.getMatchingField() + "> based on passed in query field.  " +
                "matchingFieldValue = " + pr.getMatchingFieldValue());
        }
        catch (Exception e) {
            log.error("Caught exception: ", e);
        }
        if (pr != null) {
            retval.add(pr);
        }
        if (maxHits > 0 && x == maxHits) {
            break;
        }
    }
    return retval;
}
 
Example #26
Source File: NGramQueryParserTest.java    From uyuni with GNU General Public License v2.0 4 votes vote down vote up
/**
 *
 * */
public void testBasicSearch() throws Exception {
    Hits hits;
    String query;
    boolean useMust = false;
    query = "spell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 5);
    assertTrue(hits.length() == 16);

    query = "aspelll";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "aspell";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 4);
    assertTrue(hits.length() == 17);

    query = "pel";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 8);
    assertTrue(hits.length() == 16);

    query = "gtk";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 7);
    assertTrue(hits.length() == 17);


    // We want a search for kernel-hugemem to return kernel-hugemem as top hit
    //   but currently, kernel-hugemem-devel is matchin instead.  This test
    //   is a placeholder as we explore ways to fix this.
    query = "((name:kernel-hugemem)^2 (description:kernel-hugemem) " +
        "(filename:kernel-hugemem))";
    hits = performSearch(this.ngramDir, query, useMust);
    displayHits(hits);
    assertTrue(thresholdHits(hits) == 3);
    assertTrue(hits.length() == 20);
    String firstHitName = hits.doc(0).get("name");
    assertTrue(firstHitName.compareToIgnoreCase("kernel-hugemem-devel") == 0);
}
 
Example #27
Source File: 387581_IndexTaskTest_0_s.java    From coming with MIT License 3 votes vote down vote up
public void testSearch() throws Exception {
   Query query = QueryParser.parse("test", "contents", analyzer);
 
    Hits hits = searcher.search(query);
 
    assertEquals("Find document(s)", 2, hits.length());
}
 
Example #28
Source File: 387581_IndexTaskTest_0_t.java    From coming with MIT License 3 votes vote down vote up
public void testSearch() throws Exception {
   Query query = new QueryParser("contents",analyzer).parse("test");
 
    Hits hits = searcher.search(query);
 
    assertEquals("Find document(s)", 2, hits.length());
}