org.jsoup.select.Selector Java Examples

The following examples show how to use org.jsoup.select.Selector. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: AbstractHTMLProcessor.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Override
public ValidationResult validate(final String subject, final String value, final ValidationContext context) {
    if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(value)) {
        return new ValidationResult.Builder().subject(subject).input(value).explanation("Expression Language Present").valid(true).build();
    }

    String reason = null;
    try {
        Document doc = Jsoup.parse("<html></html>");
        doc.select(value);
    } catch (final Selector.SelectorParseException e) {
        reason = "\"" + value + "\" is an invalid CSS selector";
    }

    return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build();
}
 
Example #2
Source File: XPathParser.java    From xsoup with MIT License 6 votes vote down vote up
private void functionRegex(String remainder) {
    Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder);
    List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1)));
    if (params.size() == 1) {
        elementOperator = new ElementOperator.Regex(params.get(0));
    } else if (params.size() == 2) {
        if (params.get(0).startsWith("@")) {
            elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1));
        } else {
            elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1)));
        }
    } else if (params.size() == 3) {
        elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2)));
    } else {
        throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder);
    }
}
 
Example #3
Source File: XPathParser.java    From zongtui-webcrawler with GNU General Public License v2.0 6 votes vote down vote up
private void findElements() {
    if (tq.matches("@")) {
        consumeAttribute();
    } else if (tq.matches("*")) {
        allElements();
    } else if (tq.matchesRegex("\\w+\\(.*\\).*")) {
        consumeOperatorFunction();
    } else if (tq.matchesWord()) {
        byTag();
    } else if (tq.matchesRegex("\\[\\d+\\]")) {
        byNth();
    } else if (tq.matches("[")) {
        evals.add(consumePredicates(tq.chompBalanced('[', ']')));
    } else {
        // unhandled
        throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
    }

}
 
Example #4
Source File: XPathParser.java    From zongtui-webcrawler with GNU General Public License v2.0 6 votes vote down vote up
private Evaluator byFunction(XTokenQueue predicatesQueue) {
    for (Map.Entry<String, FunctionEvaluator> entry : FUNCTION_MAPPING.entrySet()) {
        if (predicatesQueue.matchChomp(entry.getKey())) {
            String paramString = predicatesQueue.chompBalanced('(', ')');
            List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(paramString));

            if (params.get(0).startsWith("@")) {
                params.set(0, params.get(0).substring(1));
                return entry.getValue().call(params.toArray(new String[0]));
            } else {
                return null;
            }
        }
    }

    throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
}
 
Example #5
Source File: XPathParser.java    From zongtui-webcrawler with GNU General Public License v2.0 6 votes vote down vote up
private void functionRegex(String remainder) {
    Validate.isTrue(remainder.endsWith(")"), "Unclosed bracket for function! " + remainder);
    List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(remainder.substring("regex(".length(), remainder.length() - 1)));
    if (params.size() == 1) {
        elementOperator = new ElementOperator.Regex(params.get(0));
    } else if (params.size() == 2) {
        if (params.get(0).startsWith("@")) {
            elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1));
        } else {
            elementOperator = new ElementOperator.Regex(params.get(0), null, Integer.parseInt(params.get(1)));
        }
    } else if (params.size() == 3) {
        elementOperator = new ElementOperator.Regex(params.get(1), params.get(0).substring(1), Integer.parseInt(params.get(2)));
    } else {
        throw new Selector.SelectorParseException("Unknown usage for regex()" + remainder);
    }
}
 
Example #6
Source File: XPathParser.java    From xsoup with MIT License 6 votes vote down vote up
private Evaluator byFunction(XTokenQueue predicatesQueue) {
    for (Map.Entry<String, FunctionEvaluator> entry : FUNCTION_MAPPING.entrySet()) {
        if (predicatesQueue.matchChomp(entry.getKey())) {
            String paramString = predicatesQueue.chompBalanced('(', ')');
            List<String> params = XTokenQueue.trimQuotes(XTokenQueue.parseFuncionParams(paramString));

            if (params.get(0).startsWith("@")) {
                params.set(0, params.get(0).substring(1));
                return entry.getValue().call(params.toArray(new String[0]));
            } else {
                return null;
            }
        }
    }

    throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
}
 
Example #7
Source File: XPathParser.java    From xsoup with MIT License 6 votes vote down vote up
private void findElements() {
    if (tq.matches("@")) {
        consumeAttribute();
    } else if (tq.matches("*")) {
        allElements();
    } else if (tq.matchesRegex("\\w+\\(.*\\).*")) {
        consumeOperatorFunction();
    } else if (tq.matchesWord()) {
        byTag();
    } else if (tq.matchesRegex("\\[\\d+\\]")) {
        byNth();
    } else if (tq.matches("[")) {
        evals.add(consumePredicates(tq.chompBalanced('[', ']')));
    } else {
        // unhandled
        throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
    }

}
 
Example #8
Source File: AbstractHTMLProcessor.java    From nifi with Apache License 2.0 6 votes vote down vote up
@Override
public ValidationResult validate(final String subject, final String value, final ValidationContext context) {
    if (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(value)) {
        return new ValidationResult.Builder().subject(subject).input(value).explanation("Expression Language Present").valid(true).build();
    }

    String reason = null;
    try {
        Document doc = Jsoup.parse("<html></html>");
        doc.select(value);
    } catch (final Selector.SelectorParseException e) {
        reason = "\"" + value + "\" is an invalid CSS selector";
    }

    return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build();
}
 
Example #9
Source File: XPathParser.java    From zongtui-webcrawler with GNU General Public License v2.0 5 votes vote down vote up
private Evaluator consumePredicates(String queue) {
    XTokenQueue predicatesQueue = new XTokenQueue(queue);
    EvaluatorStack evaluatorStack = new EvaluatorStack();
    Operation currentOperation = null;
    predicatesQueue.consumeWhitespace();
    while (!predicatesQueue.isEmpty()) {
        if (predicatesQueue.matchChomp("and")) {
            currentOperation = Operation.AND;
        } else if (predicatesQueue.matchChomp("or")) {
            currentOperation = Operation.OR;
        } else {
            if (currentOperation == null && evaluatorStack.size() > 0) {
                throw new IllegalArgumentException(String.format("Need AND/OR between two predicate! %s", predicatesQueue.remainder()));
            }
            Evaluator evaluator;
            if (predicatesQueue.matches("(")) {
                evaluator = consumePredicates(predicatesQueue.chompBalanced('(', ')'));
            } else if (predicatesQueue.matches("@")) {
                evaluator = byAttribute(predicatesQueue);
            } else if (predicatesQueue.matchesRegex("\\w+.*")) {
                evaluator = byFunction(predicatesQueue);
            } else {
                throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
            }
            evaluatorStack.calc(evaluator, currentOperation);
            //consume operator
            currentOperation = null;
        }
        predicatesQueue.consumeWhitespace();
    }
    evaluatorStack.mergeOr();
    return evaluatorStack.peek();
}
 
Example #10
Source File: XPathParser.java    From xsoup with MIT License 5 votes vote down vote up
private Evaluator consumePredicates(String queue) {
    XTokenQueue predicatesQueue = new XTokenQueue(queue);
    EvaluatorStack evaluatorStack = new EvaluatorStack();
    Operation currentOperation = null;
    predicatesQueue.consumeWhitespace();
    while (!predicatesQueue.isEmpty()) {
        if (predicatesQueue.matchChomp("and")) {
            currentOperation = Operation.AND;
        } else if (predicatesQueue.matchChomp("or")) {
            currentOperation = Operation.OR;
        } else {
            if (currentOperation == null && evaluatorStack.size() > 0) {
                throw new IllegalArgumentException(String.format("Need AND/OR between two predicate! %s", predicatesQueue.remainder()));
            }
            Evaluator evaluator;
            if (predicatesQueue.matches("(")) {
                evaluator = consumePredicates(predicatesQueue.chompBalanced('(', ')'));
            } else if (predicatesQueue.matches("@")) {
                evaluator = byAttribute(predicatesQueue);
            } else if (predicatesQueue.matchesRegex("\\w+.*")) {
                evaluator = byFunction(predicatesQueue);
            } else {
                throw new Selector.SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, predicatesQueue.remainder());
            }
            evaluatorStack.calc(evaluator, currentOperation);
            //consume operator
            currentOperation = null;
        }
        predicatesQueue.consumeWhitespace();
    }
    evaluatorStack.mergeOr();
    return evaluatorStack.peek();
}
 
Example #11
Source File: TestGetHTMLElement.java    From localization_nifi with Apache License 2.0 4 votes vote down vote up
@Test(expected = Selector.SelectorParseException.class)
public void testCSSSelectorSyntaxValidator() throws IOException {
    Document doc = Jsoup.parse(new File("src/test/resources/Weather.html"), StandardCharsets.UTF_8.name());
    doc.select("---invalidCssSelector");
}
 
Example #12
Source File: XPathParser.java    From xsoup with MIT License 4 votes vote down vote up
private Evaluator byAttribute(XTokenQueue cq) {
    cq.matchChomp("@");
    String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
    Validate.notEmpty(key);
    cq.consumeWhitespace();
    Evaluator evaluator;
    if (cq.isEmpty()) {
        if ("*".equals(key)) {
            evaluator = new XEvaluators.HasAnyAttribute();
        } else {
            evaluator = new Evaluator.Attribute(key);
        }
    } else {
        if (cq.matchChomp("=")) {
            String value = chompEqualValue(cq);
            //to support select one class out of all
            if (key.equals("class")) {
                String className = XTokenQueue.trimQuotes(value);
                if (!className.contains(" ")) {
                    evaluator = new Evaluator.Class(className);
                } else {
                    evaluator = new Evaluator.AttributeWithValue(key, className);
                }
            } else {
                evaluator = new Evaluator.AttributeWithValue(key, XTokenQueue.trimQuotes(value));
            }
        } else if (cq.matchChomp("!="))
            evaluator = new Evaluator.AttributeWithValueNot(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("^="))
            evaluator = new Evaluator.AttributeWithValueStarting(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("$="))
            evaluator = new Evaluator.AttributeWithValueEnding(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("*="))
            evaluator = new Evaluator.AttributeWithValueContaining(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("~="))
            evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(XTokenQueue.trimQuotes(chompEqualValue(cq))));
        else
            throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, chompEqualValue(cq));
    }
    return evaluator;
}
 
Example #13
Source File: TestGetHTMLElement.java    From nifi with Apache License 2.0 4 votes vote down vote up
@Test(expected = Selector.SelectorParseException.class)
public void testCSSSelectorSyntaxValidator() throws IOException {
    Document doc = Jsoup.parse(new File("src/test/resources/Weather.html"), StandardCharsets.UTF_8.name());
    doc.select("---invalidCssSelector");
}
 
Example #14
Source File: XPathParser.java    From zongtui-webcrawler with GNU General Public License v2.0 4 votes vote down vote up
private Evaluator byAttribute(XTokenQueue cq) {
    cq.matchChomp("@");
    String key = cq.consumeToAny("=", "!=", "^=", "$=", "*=", "~="); // eq, not, start, end, contain, match, (no val)
    Validate.notEmpty(key);
    cq.consumeWhitespace();
    Evaluator evaluator;
    if (cq.isEmpty()) {
        if ("*".equals(key)) {
            evaluator = new XEvaluators.HasAnyAttribute();
        } else {
            evaluator = new Evaluator.Attribute(key);
        }
    } else {
        if (cq.matchChomp("=")) {
            String value = chompEqualValue(cq);
            //to support select one class out of all
            if (key.equals("class")) {
                String className = XTokenQueue.trimQuotes(value);
                if (!className.contains(" ")) {
                    evaluator = new Evaluator.Class(className);
                } else {
                    evaluator = new Evaluator.AttributeWithValue(key, className);
                }
            } else {
                evaluator = new Evaluator.AttributeWithValue(key, XTokenQueue.trimQuotes(value));
            }
        } else if (cq.matchChomp("!="))
            evaluator = new Evaluator.AttributeWithValueNot(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("^="))
            evaluator = new Evaluator.AttributeWithValueStarting(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("$="))
            evaluator = new Evaluator.AttributeWithValueEnding(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("*="))
            evaluator = new Evaluator.AttributeWithValueContaining(key, XTokenQueue.trimQuotes(chompEqualValue(cq)));

        else if (cq.matchChomp("~="))
            evaluator = new Evaluator.AttributeWithValueMatching(key, Pattern.compile(XTokenQueue.trimQuotes(chompEqualValue(cq))));
        else
            throw new Selector.SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, chompEqualValue(cq));
    }
    return evaluator;
}
 
Example #15
Source File: RankDetailsPresenterImpl.java    From HHComicViewer with Apache License 2.0 4 votes vote down vote up
@Override
public void getRankList(String url) {
    HHApiProvider.getInstance().getWebContentAsyn(HHApplication.getInstance()
                    .getHHWebVariable().getCsite() + url,
            new NormalResponse<byte[]>() {
                @Override
                public void success(NormalRequest request, byte[] data) {
                    String content = null;
                    try {
                        content = new String(data, "utf-8");
                        Document doc = Jsoup.parse(content);
                        Elements comicSrcs = doc.select("div[class=cComicItem]");
                        List<Comic> comics = new ArrayList<>();
                        for (Element comicSrc : comicSrcs) {
                            Comic comic = new Comic();
                            String comicUrl = comicSrc.select("a").first().attr("href");
                            String end = comicUrl.substring(HHApplication.getInstance()
                                    .getHHWebVariable().getPre().length());
                            comic.setCid(Integer.parseInt(end.split("\\.")[0]));
                            comic.setThumbnailUrl(comicSrc.select("img").first().attr("src"));
                            comic.setTitle(comicSrc.select("span[class=cComicTitle]").first().text());
                            comic.setAuthor(comicSrc.select("span[class=cComicAuthor").first().text());
                            comic.setComicStatus(comicSrc.select("span[class=cComicRating").first().text());
                            comics.add(comic);
                        }
                        if (mFragment != null) {
                            mFragment.onSuccess(comics);
                        }
                    } catch (UnsupportedEncodingException | Selector.SelectorParseException e) {
                        e.printStackTrace();
                        if (mFragment != null) {
                            mFragment.onException(e);
                        }
                    }
                }

                @Override
                public void fail(int errorCode, String errorMsg) {
                    if (mFragment != null) {
                        mFragment.onFailure(errorCode, errorMsg);
                    }
                }
            });
}
 
Example #16
Source File: CssSelector.java    From jstarcraft-core with Apache License 2.0 4 votes vote down vote up
@Override
public Collection<Element> selectContent(Element content) {
    return Selector.select(query, content);
}
 
Example #17
Source File: Element.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
 * execution stops on the first hit.</p>
 * @param cssQuery cssQuery a {@link Selector} CSS-like query
 * @return the first matching element, or <b>{@code null}</b> if there is no match.
 */
public Element selectFirst(String cssQuery) {
    return Selector.selectFirst(cssQuery, this);
}
 
Example #18
Source File: Element.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}
 
Example #19
Source File: Element.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
 * <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
 * execution stops on the first hit.</p>
 * @param cssQuery cssQuery a {@link Selector} CSS-like query
 * @return the first matching element, or <b>{@code null}</b> if there is no match.
 */
public Element selectFirst(String cssQuery) {
    return Selector.selectFirst(cssQuery, this);
}
 
Example #20
Source File: Element.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}
 
Example #21
Source File: Element.java    From astor with GNU General Public License v2.0 2 votes vote down vote up
/**
 * Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
 * may include this element, or any of its children.
 * <p>
 * This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
 * multiple filters can be combined, e.g.:
 * </p>
 * <ul>
 * <li>{@code el.select("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
 * <li>{@code el.select("a[href*=example.com]")} - finds links pointing to example.com (loosely)
 * </ul>
 * <p>
 * See the query syntax documentation in {@link org.jsoup.select.Selector}.
 * </p>
 * 
 * @param cssQuery a {@link Selector} CSS-like query
 * @return elements that match the query (empty if none match)
 * @see org.jsoup.select.Selector
 * @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
 */
public Elements select(String cssQuery) {
    return Selector.select(cssQuery, this);
}