Java Code Examples for org.jsoup.select.Selector

The following are top voted examples for showing how to use org.jsoup.select.Selector. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: zongtui-webcrawler   File: XPathParser.java   Source Code and License 6 votes vote down vote up
private void findElements() {
    if (tq.matches("@")) {
        consumeAttribute();
    } else if (tq.matches("*")) {
        allElements();
    } else if (tq.matchesRegex("\\w+\\(.*\\).*")) {
        consumeOperatorFunction();
    } else if (tq.matchesWord()) {
        byTag();
    } else if (tq.matchesRegex("\\[\\d+\\]")) {
        byNth();
    } else if (tq.matches("[")) {
        evals.add(consumePredicates(tq.chompBalanced('[', ']')));
    } else {
        // unhandled
        throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
    }

}
 
Example 2
Project: zongtui-webcrawler   File: XPathParser.java   Source Code and License 6 votes vote down vote up
private Evaluator byFunction(XTokenQueue predicatesQueue) {
    for (Map.Entry<String, FunctionEvaluator> entry : FUNCTION_MAPPING.entrySet()) {
        if (predicatesQueue.matchChomp(entry.getKey())) {
            String paramString = predicatesQueue.chompBalanced('(', ')');
            List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(paramString));

            if (params.get(0).startsWith("@")) {
                params.set(0, params.get(0).substring(1));
                return entry.getValue().call(params.toArray(new String[0]));
            } else {
                return null;
            }
        }
    }

    throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
}
 
Example 3
Project: zongtui-webcrawler   File: XPathParser.java   Source Code and License 6 votes vote down vote up
private void functionRegex(String remainder) {
    Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder);
    List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1)));
    if (params.size() == 1) {
        elementOperator = new ElementOperator.Regex(params.get(0));
    } else if (params.size() == 2) {
        if (params.get(0).startsWith("@")) {
            elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1));
        } else {
            elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1)));
        }
    } else if (params.size() == 3) {
        elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2)));
    } else {
        throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder);
    }
}
 
Example 4
Project: xsoup   File: XPathParser.java   Source Code and License 6 votes vote down vote up
private void findElements() {
    if (tq.matches("@")) {
        consumeAttribute();
    } else if (tq.matches("*")) {
        allElements();
    } else if (tq.matchesRegex("\\w+\\(.*\\).*")) {
        consumeOperatorFunction();
    } else if (tq.matchesWord()) {
        byTag();
    } else if (tq.matchesRegex("\\[\\d+\\]")) {
        byNth();
    } else if (tq.matches("[")) {
        evals.add(consumePredicates(tq.chompBalanced('[', ']')));
    } else {
        // unhandled
        throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
    }

}
 
Example 5
Project: xsoup   File: XPathParser.java   Source Code and License 6 votes vote down vote up
private Evaluator byFunction(XTokenQueue predicatesQueue) {
    for (Map.Entry<String, FunctionEvaluator> entry : FUNCTION_MAPPING.entrySet()) {
        if (predicatesQueue.matchChomp(entry.getKey())) {
            String paramString = predicatesQueue.chompBalanced('(', ')');
            List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(paramString));

            if (params.get(0).startsWith("@")) {
                params.set(0, params.get(0).substring(1));
                return entry.getValue().call(params.toArray(new String[0]));
            } else {
                return null;
            }
        }
    }

    throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
}
 
Example 6
Project: xsoup   File: XPathParser.java   Source Code and License 6 votes vote down vote up
private void functionRegex(String remainder) {
    Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder);
    List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1)));
    if (params.size() == 1) {
        elementOperator = new ElementOperator.Regex(params.get(0));
    } else if (params.size() == 2) {
        if (params.get(0).startsWith("@")) {
            elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1));
        } else {
            elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1)));
        }
    } else if (params.size() == 3) {
        elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2)));
    } else {
        throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder);
    }
}
 
Example 7
Project: sipsoup   File: CacheCSSFunction.java   Source Code and License 5 votes vote down vote up
private void byAttribute() {
    TokenQueue cq = new TokenQueue(tq.chompBalanced('[', ']')); // content queue
    String key = cq.consumeToAny(AttributeEvals); // eq, not, start, end, contain, match, (no val)
    Validate.notEmpty(key);
    cq.consumeWhitespace();

    if (cq.isEmpty()) {
        if (key.startsWith("^"))
            evals.add(new Evaluator.AttributeStarting(key.substring(1)));
        else
            evals.add(new Evaluator.Attribute(key));
    } else {
        if (cq.matchChomp("="))
            evals.add(new Evaluator.AttributeWithValue(key, cq.remainder()));

        else if (cq.matchChomp("!="))
            evals.add(new Evaluator.AttributeWithValueNot(key, cq.remainder()));

        else if (cq.matchChomp("^="))
            evals.add(new Evaluator.AttributeWithValueStarting(key, cq.remainder()));

        else if (cq.matchChomp("$="))
            evals.add(new Evaluator.AttributeWithValueEnding(key, cq.remainder()));

        else if (cq.matchChomp("*="))
            evals.add(new Evaluator.AttributeWithValueContaining(key, cq.remainder()));

        else if (cq.matchChomp("~="))
            evals.add(new Evaluator.AttributeWithValueMatching(key, Pattern.compile(cq.remainder())));
        else
            throw new Selector.SelectorParseException(
                    "Could not parse attribute query '%s': unexpected token at '%s'", query, cq.remainder());
    }
}
 
Example 8
Project: sipsoup   File: CacheCSSFunction.java   Source Code and License 5 votes vote down vote up
private void cssNthChild(boolean backwards, boolean ofType) {
    String argS = tq.chompTo(")").trim().toLowerCase();
    Matcher mAB = NTH_AB.matcher(argS);
    Matcher mB = NTH_B.matcher(argS);
    final int a, b;
    if ("odd".equals(argS)) {
        a = 2;
        b = 1;
    } else if ("even".equals(argS)) {
        a = 2;
        b = 0;
    } else if (mAB.matches()) {
        a = mAB.group(3) != null ? Integer.parseInt(mAB.group(1).replaceFirst("^\\+", "")) : 1;
        b = mAB.group(4) != null ? Integer.parseInt(mAB.group(4).replaceFirst("^\\+", "")) : 0;
    } else if (mB.matches()) {
        a = 0;
        b = Integer.parseInt(mB.group().replaceFirst("^\\+", ""));
    } else {
        throw new Selector.SelectorParseException("Could not parse nth-index '%s': unexpected format", argS);
    }
    if (ofType)
        if (backwards)
            evals.add(new Evaluator.IsNthLastOfType(a, b));
        else
            evals.add(new Evaluator.IsNthOfType(a, b));
    else {
        if (backwards)
            evals.add(new Evaluator.IsNthLastChild(a, b));
        else
            evals.add(new Evaluator.IsNthChild(a, b));
    }
}
 
Example 9
Project: zongtui-webcrawler   File: XPathParser.java   Source Code and License 5 votes vote down vote up
private Evaluator consumePredicates(String queue) {
    XTokenQueue predicatesQueue = new XTokenQueue(queue);
    EvaluatorStack evaluatorStack = new EvaluatorStack();
    Operation currentOperation = null;
    predicatesQueue.consumeWhitespace();
    while (!predicatesQueue.isEmpty()) {
        if (predicatesQueue.matchChomp("and")) {
            currentOperation = Operation.AND;
        } else if (predicatesQueue.matchChomp("or")) {
            currentOperation = Operation.OR;
        } else {
            if (currentOperation == null && evaluatorStack.size() > 0) {
                throw new IllegalArgumentException(String.format("Need AND/OR between two predicate! %s", predicatesQueue.remainder()));
            }
            Evaluator evaluator;
            if (predicatesQueue.matches("(")) {
                evaluator = consumePredicates(predicatesQueue.chompBalanced('(', ')'));
            } else if (predicatesQueue.matches("@")) {
                evaluator = byAttribute(predicatesQueue);
            } else if (predicatesQueue.matchesRegex("\\w+.*")) {
                evaluator = byFunction(predicatesQueue);
            } else {
                throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
            }
            evaluatorStack.calc(evaluator, currentOperation);
            //consume operator
            currentOperation = null;
        }
        predicatesQueue.consumeWhitespace();
    }
    evaluatorStack.mergeOr();
    return evaluatorStack.peek();
}
 
Example 10
Project: xsoup   File: XPathParser.java   Source Code and License 5 votes vote down vote up
private Evaluator consumePredicates(String queue) {
    XTokenQueue predicatesQueue = new XTokenQueue(queue);
    EvaluatorStack evaluatorStack = new EvaluatorStack();
    Operation currentOperation = null;
    predicatesQueue.consumeWhitespace();
    while (!predicatesQueue.isEmpty()) {
        if (predicatesQueue.matchChomp("and")) {
            currentOperation = Operation.AND;
        } else if (predicatesQueue.matchChomp("or")) {
            currentOperation = Operation.OR;
        } else {
            if (currentOperation == null && evaluatorStack.size() > 0) {
                throw new IllegalArgumentException(String.format("Need AND/OR between two predicate! %s", predicatesQueue.remainder()));
            }
            Evaluator evaluator;
            if (predicatesQueue.matches("(")) {
                evaluator = consumePredicates(predicatesQueue.chompBalanced('(', ')'));
            } else if (predicatesQueue.matches("@")) {
                evaluator = byAttribute(predicatesQueue);
            } else if (predicatesQueue.matchesRegex("\\w+.*")) {
                evaluator = byFunction(predicatesQueue);
            } else {
                throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
            }
            evaluatorStack.calc(evaluator, currentOperation);
            //consume operator
            currentOperation = null;
        }
        predicatesQueue.consumeWhitespace();
    }
    evaluatorStack.mergeOr();
    return evaluatorStack.peek();
}
 
Example 11
Project: sipsoup   File: CacheCSSFunction.java   Source Code and License 4 votes vote down vote up
private void findElements() {
    if (tq.matchChomp("#"))
        byId();
    else if (tq.matchChomp("."))
        byClass();
    else if (tq.matchesWord())
        byTag();
    else if (tq.matches("["))
        byAttribute();
    else if (tq.matchChomp("*"))
        allElements();
    else if (tq.matchChomp(":lt("))
        indexLessThan();
    else if (tq.matchChomp(":gt("))
        indexGreaterThan();
    else if (tq.matchChomp(":eq("))
        indexEquals();
    else if (tq.matches(":has("))
        has();
    else if (tq.matches(":contains("))
        contains(false);
    else if (tq.matches(":containsOwn("))
        contains(true);
    else if (tq.matches(":matches("))
        matches(false);
    else if (tq.matches(":matchesOwn("))
        matches(true);
    else if (tq.matches(":not("))
        not();
    else if (tq.matchChomp(":nth-child("))
        cssNthChild(false, false);
    else if (tq.matchChomp(":nth-last-child("))
        cssNthChild(true, false);
    else if (tq.matchChomp(":nth-of-type("))
        cssNthChild(false, true);
    else if (tq.matchChomp(":nth-last-of-type("))
        cssNthChild(true, true);
    else if (tq.matchChomp(":first-child"))
        evals.add(new Evaluator.IsFirstChild());
    else if (tq.matchChomp(":last-child"))
        evals.add(new Evaluator.IsLastChild());
    else if (tq.matchChomp(":first-of-type"))
        evals.add(new Evaluator.IsFirstOfType());
    else if (tq.matchChomp(":last-of-type"))
        evals.add(new Evaluator.IsLastOfType());
    else if (tq.matchChomp(":only-child"))
        evals.add(new Evaluator.IsOnlyChild());
    else if (tq.matchChomp(":only-of-type"))
        evals.add(new Evaluator.IsOnlyOfType());
    else if (tq.matchChomp(":empty"))
        evals.add(new Evaluator.IsEmpty());
    else if (tq.matchChomp(":root"))
        evals.add(new Evaluator.IsRoot());
    else // unhandled
        throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query,
                tq.remainder());

}
 
Example 12
Project: zongtui-webcrawler   File: XPathParser.java   Source Code and License 4 votes vote down vote up
private Evaluator byAttribute(XTokenQueue cq) {
    cq.matchChomp("@");
    String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
    Validate.notEmpty(key);
    cq.consumeWhitespace();
    Evaluator evaluator;
    if (cq.isEmpty()) {
        if ("*".equals(key)) {
            evaluator = new XEvaluators.HasAnyAttribute();
        } else {
            evaluator = new Evaluator.Attribute(key);
        }
    } else {
        if (cq.matchChomp("=")) {
            String value = chompEqualValue(cq);
            //to support select one class out of all
            if (key.equals("class")) {
                String className = XTokenQueue.trimQuotes(value);
                if (!className.contains(" ")) {
                    evaluator = new Evaluator.Class(className);
                } else {
                    evaluator = new Evaluator.AttributeWithValue(key, className);
                }
            } else {
                evaluator = new Evaluator.AttributeWithValue(key, XTokenQueue.trimQuotes(value));
            }
        } else if (cq.matchChomp("!="))
            evaluator = new Evaluator.AttributeWithValueNot(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("^="))
            evaluator = new Evaluator.AttributeWithValueStarting(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("$="))
            evaluator = new Evaluator.AttributeWithValueEnding(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("*="))
            evaluator = new Evaluator.AttributeWithValueContaining(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("~="))
            evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(XTokenQueue.trimQuotes(chompEqualValue(cq))));
        else
            throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, chompEqualValue(cq));
    }
    return evaluator;
}
 
Example 13
Project: xsoup   File: XPathParser.java   Source Code and License 4 votes vote down vote up
private Evaluator byAttribute(XTokenQueue cq) {
    cq.matchChomp("@");
    String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
    Validate.notEmpty(key);
    cq.consumeWhitespace();
    Evaluator evaluator;
    if (cq.isEmpty()) {
        if ("*".equals(key)) {
            evaluator = new XEvaluators.HasAnyAttribute();
        } else {
            evaluator = new Evaluator.Attribute(key);
        }
    } else {
        if (cq.matchChomp("=")) {
            String value = chompEqualValue(cq);
            //to support select one class out of all
            if (key.equals("class")) {
                String className = XTokenQueue.trimQuotes(value);
                if (!className.contains(" ")) {
                    evaluator = new Evaluator.Class(className);
                } else {
                    evaluator = new Evaluator.AttributeWithValue(key, className);
                }
            } else {
                evaluator = new Evaluator.AttributeWithValue(key, XTokenQueue.trimQuotes(value));
            }
        } else if (cq.matchChomp("!="))
            evaluator = new Evaluator.AttributeWithValueNot(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("^="))
            evaluator = new Evaluator.AttributeWithValueStarting(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("$="))
            evaluator = new Evaluator.AttributeWithValueEnding(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("*="))
            evaluator = new Evaluator.AttributeWithValueContaining(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("~="))
            evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(XTokenQueue.trimQuotes(chompEqualValue(cq))));
        else
            throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, chompEqualValue(cq));
    }
    return evaluator;
}
 
Example 14
Project: common   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}
 
Example 15
Project: astor   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}
 
Example 16
Project: astor   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}
 
Example 17
Project: astor   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
 * execution stops on the first hit.</p>
 * @param cssQuery cssQuery a {@link Selector} CSS-like query
 * @return the first matching element, or <b>{@code null}</b> if there is no match.
 */
public Element selectFirst(String cssQuery) {
    return Selector.selectFirst(cssQuery, this);
}
 
Example 18
Project: astor   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}
 
Example 19
Project: astor   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
 * execution stops on the first hit.</p>
 * @param cssQuery cssQuery a {@link Selector} CSS-like query
 * @return the first matching element, or <b>{@code null}</b> if there is no match.
 */
public Element selectFirst(String cssQuery) {
    return Selector.selectFirst(cssQuery, this);
}
 
Example 20
Project: JabRefAutocomplete   File: Element.java   Source Code and License 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p/>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p/>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 *
 * @param query a {@link Selector} query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 */
public Elements select(String query) {
    return Selector.select(query, this);
}