package org.webdsl.search; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; /** * @author Elmer van Chastelet * * --------------- * 13 July 2012: This parser might do harm for cases where tokens are removed by some analyzer. * Example: If some field 'year' uses an analyzer which only filters year tokens (for simplicity say: [0-9]{4}), * and combined with other fields using a standard analyzer, queries like 'eelco 2007' get transformed to * * (+(tags.name:2007 venue:2007 title:2007 authors.nameTag:2007 year:2007 abstract:2007)) * (+(tags.name:eelco venue:eelco title:eelco authors.nameTag:eelco abstract:eelco) +(tags.name:2007 venue:2007 title:2007 authors.nameTag:2007 year:2007 abstract:2007)) * * -> thus will match all publications with year 2007, no matter what other terms are used! * Workaround is to use an analyzer at query time that respects keeps the other tokens in place, or don't query the * problematic field ('year' in the example) with the other fields, but in a separate clause instead. * * --------------- * * Special implementation of the MultiFieldQueryParser (based on Lucene 3.1.0 version of this class). * It treats a special case, namely when terms are filtered out for some(!) of the fields (when using * a per field analyzer wrapper, and not every field analyzer use a stopword filter), and the default * operator is set to AND. In that case, the parse method creates 2 sub queries, and takes the union * of these. One query will be parsed using the normal implementation in MultiFieldQueryParser, the * other query will not contain the clauses for terms that are filtered out for at least 1 field, but * not all fields. * * Example where fields 'title' and 'description' use a stopword filter: * Query to parse: the best project * parsed: * ( * +(title:best description:best authors.name:best) * +(title:project description:project authors.name:project) * )( * +(authors.name:the) * +(title:best description:best authors.name:best) * +(title:project description:project authors.name:project) * ) * */ public class SpecialMultiFieldQueryParser extends MultiFieldQueryParser { private boolean defaultAndnStopword = false; private boolean inQueryFix = false; public SpecialMultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer) { super(matchVersion, fields, analyzer); } public SpecialMultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer, Map<String, Float> boosts) { super(matchVersion, fields, analyzer, boosts); } @Override protected org.apache.lucene.search.Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { if(inQueryFix) return super.getFieldQuery(field, queryText, quoted); if (field == null) { List<BooleanClause> clauses = new ArrayList<BooleanClause>(); for (int i = 0; i < fields.length; i++) { Query q = super.getFieldQuery(fields[i], queryText, quoted); if (q != null) { //If the user passes a map of boosts if (boosts != null) { //Get the boost from the map and apply them Float boost = boosts.get(fields[i]); if (boost != null) { q.setBoost(boost.floatValue()); } } clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); } } if (getDefaultOperator() == AND_OPERATOR && clauses.size() != fields.length){ // happens for stopwords, special treatment needed in case of AND operator defaultAndnStopword = true; return null; } if (clauses.size() == 0) // happens for stopwords, if default operator is OR return null; return getBooleanQuery(clauses, true); } Query q = super.getFieldQuery(field, queryText, quoted); return q; } @Override public Query parse(String query) throws ParseException{ Query q = super.parse(query); if(defaultAndnStopword){ inQueryFix = true; Query fix = super.parse(query); List<BooleanClause> clauses = new ArrayList<BooleanClause>(); clauses.add(new BooleanClause(q,Occur.SHOULD)); clauses.add(new BooleanClause(fix,Occur.SHOULD)); return getBooleanQuery(clauses); } return q; } }