Java Code Examples for org.jsoup.select.Elements#add()

The following examples show how to use org.jsoup.select.Elements#add() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Rgaa22Rule11041.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example 2
Source File: Aw22Rule05081.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler<Element> elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example 3
Source File: Rgaa30Rule050801.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler<Element> elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example 4
Source File: JsoupParser.java    From HaoReader with GNU General Public License v3.0 6 votes vote down vote up
private Elements filterElements(Elements elements, String[] rules) {
    if (rules == null || rules.length < 2) return elements;
    final Elements selectedEls = new Elements();
    for (Element ele : elements) {
        boolean isOk = false;
        switch (rules[0]) {
            case "class":
                isOk = ele.getElementsByClass(rules[1]).size() > 0;
                break;
            case "id":
                isOk = ele.getElementById(rules[1]) != null;
                break;
            case "tag":
                isOk = ele.getElementsByTag(rules[1]).size() > 0;
                break;
            case "text":
                isOk = ele.getElementsContainingOwnText(rules[1]).size() > 0;
                break;
        }
        if (isOk) {
            selectedEls.add(ele);
        }
    }
    return selectedEls;
}
 
Example 5
Source File: AnalyzeByJSoup.java    From a with GNU General Public License v3.0 6 votes vote down vote up
private Elements filterElements(Elements elements, String[] rules) {
    if (rules == null || rules.length < 2) return elements;
    Elements selectedEls = new Elements();
    for (Element ele : elements) {
        boolean isOk = false;
        switch (rules[0]) {
            case "class":
                isOk = ele.getElementsByClass(rules[1]).size() > 0;
                break;
            case "id":
                isOk = ele.getElementById(rules[1]) != null;
                break;
            case "tag":
                isOk = ele.getElementsByTag(rules[1]).size() > 0;
                break;
            case "text":
                isOk = ele.getElementsContainingOwnText(rules[1]).size() > 0;
                break;
        }
        if (isOk) {
            selectedEls.add(ele);
        }
    }
    return selectedEls;
}
 
Example 6
Source File: CommonUtil.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
/**
 * 获取同胞中同名元素的数量
 * @param e
 * @return
 */
public static int sameTagElNums(Element e,Scope scope){
    Elements context = new Elements();
    Elements els = e.parent().getElementsByTag(e.tagName());
    for (Element el:els){
        if (scope.context().contains(el)){
            context.add(el);
        }
    }
    return context.size();
}
 
Example 7
Source File: JsoupBase64ImageInliner.java    From ogham with Apache License 2.0 5 votes vote down vote up
private static Elements getImagesToInline(Document doc, ImageResource image) {
	Elements imgs = doc.select(MessageFormat.format(IMG_SELECTOR, image.getSrcUrl()));
	Elements found = new Elements();
	for (Element img : imgs) {
		// only apply inlining if mode matches
		if (isInlineModeAllowed(img, BASE64)) {
			found.add(img);
		}
	}
	return found;
}
 
Example 8
Source File: JsoupAttachImageInliner.java    From ogham with Apache License 2.0 5 votes vote down vote up
private static Elements getImagesToAttach(Document doc, ImageResource image) {
	Elements imgs = doc.select(format(IMG_SELECTOR, image.getSrcUrl()));
	Elements found = new Elements();
	for (Element img : imgs) {
		// skip images that have skip-attach attribute
		if (isInlineModeAllowed(img, ATTACH)) {
			found.add(img);
		}
	}
	return found;
}
 
Example 9
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
 * of itself, so will not be included in the returned list.
 * @return sibling elements
 */
public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().childElementsList();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 
Example 10
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
 * of itself, so will not be included in the returned list.
 * @return sibling elements
 */
public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().childElementsList();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 
Example 11
Source File: NumTest.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
@Test
public void testShort() throws Exception {
    Elements context = new Elements();
    Element el = new Element("V");
    el.appendText("test .69");
    context.add(el);
    Num n = new Num();
    XValue v = n.call(Scope.create(context));
    logger.info("v = {}",v);
    Assert.assertEquals(0.69,v.asDouble(),0.00000000000001);
}
 
Example 12
Source File: TextExtractor.java    From storm-crawler with Apache License 2.0 5 votes vote down vote up
public String text(Element element) {
    // no patterns at all - return the text from the whole document
    if (inclusionPatterns.size() == 0 && excludedTags.size() == 0) {
        return _text(element);
    }

    Elements matches = new Elements();

    for (String pattern : inclusionPatterns) {
        matches = element.select(pattern);
        if (!matches.isEmpty())
            break;
    }

    // if nothing matches or no patterns were defined use the whole doc
    if (matches.isEmpty()) {
        matches.add(element);
    }

    final StringBuilder accum = new StringBuilder();

    for (Element node : matches) {
        accum.append(_text(node)).append("\n");
    }

    return accum.toString().trim();
}
 
Example 13
Source File: AxisSelector.java    From CrawlerForReader with Apache License 2.0 5 votes vote down vote up
/**
 * 返回下一个同胞节点(扩展) 语法 following-sibling-one
 *
 * @param e
 * @return
 */
public Elements followingSiblingOne(Element e) {
    Elements rs = new Elements();
    if (e.nextElementSibling() != null) {
        rs.add(e.nextElementSibling());
    }
    return rs;
}
 
Example 14
Source File: CommonUtil.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
public static Elements followingSibling(Element el){
    Elements rs = new Elements();
    Element tmp = el.nextElementSibling();
    while (tmp!=null){
        rs.add(tmp);
        tmp = tmp.nextElementSibling();
    }
    if (rs.size() > 0){
        return rs;
    }
    return null;
}
 
Example 15
Source File: AxisSelector.java    From CrawlerForReader with Apache License 2.0 5 votes vote down vote up
/**
 * 返回前一个同胞节点(扩展),语法 preceding-sibling-one
 *
 * @param e
 * @return
 */
public Elements precedingSiblingOne(Element e) {
    Elements rs = new Elements();
    if (e.previousElementSibling() != null) {
        rs.add(e.previousElementSibling());
    }
    return rs;
}
 
Example 16
Source File: Element.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
 * of itself, so will not be included in the returned list.
 * @return sibling elements
 */
public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().children();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 
Example 17
Source File: CommonUtil.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
public static Elements precedingSibling(Element el){
    Elements rs = new Elements();
    Element tmp = el.previousElementSibling();
    while (tmp!=null){
        rs.add(tmp);
        tmp = tmp.previousElementSibling();
    }
    if (rs.size() > 0){
        return rs;
    }
    return null;
}
 
Example 18
Source File: HtmlTreeBuilder.java    From jsoup-learning with MIT License 4 votes vote down vote up
List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
    // context may be null
    state = HtmlTreeBuilderState.Initial;
    initialiseParse(inputFragment, baseUri, errors);
    contextElement = context;
    fragmentParsing = true;
    Element root = null;

    if (context != null) {
        if (context.ownerDocument() != null) // quirks setup:
            doc.quirksMode(context.ownerDocument().quirksMode());

        // initialise the tokeniser state:
        String contextTag = context.tagName();
        if (StringUtil.in(contextTag, "title", "textarea"))
            tokeniser.transition(TokeniserState.Rcdata);
        else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
            tokeniser.transition(TokeniserState.Rawtext);
        else if (contextTag.equals("script"))
            tokeniser.transition(TokeniserState.ScriptData);
        else if (contextTag.equals(("noscript")))
            tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
        else if (contextTag.equals("plaintext"))
            tokeniser.transition(TokeniserState.Data);
        else
            tokeniser.transition(TokeniserState.Data); // default

        root = new Element(Tag.valueOf("html"), baseUri);
        doc.appendChild(root);
        stack.push(root);
        resetInsertionMode();

        // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
        // with form correctly
        Elements contextChain = context.parents();
        contextChain.add(0, context);
        for (Element parent: contextChain) {
            if (parent instanceof FormElement) {
                formElement = (FormElement) parent;
                break;
            }
        }
    }

    runParser();
    if (context != null)
        return root.childNodes();
    else
        return doc.childNodes();
}
 
Example 19
Source File: ElementSelector.java    From spring-boot with Apache License 2.0 3 votes vote down vote up
/**
 * 去掉 指定的 tag 及内容
 * -
 * select 语法 :tag1,tag2 ...
 * example :
 * select("script").remove().("style").remove().("head").remove()
 * or
 * select("script, style, .hidden").remove()
 * -
 * 此时 currentElements 会包含所有的标签,逐层递减,会有多个,而不是去掉标签之后的结果,只有第一个是
 *
 * @param tags
 * @return
 */
public ElementSelector removeTags(String... tags) {
    Elements temp = new Elements();

    for (String s : tags)   // 大小写不敏感,都能去掉
        this.currentElements.select(s).remove();   // currentElements 已经改变

    if (tags.length != 0) { // 避免无参数时,改变 currentElements
        temp.add(this.currentElements.first());     //此时 currentElements 会包含所有的标签,逐层递减,会有多个,而不是去掉标签之后的结果,只有第一个是
        this.currentElements = temp;
    }
    return this;
}
 
Example 20
Source File: AxisSelector.java    From CrawlerForReader with Apache License 2.0 2 votes vote down vote up
/**
 * 全部祖先节点和自身节点
 *
 * @param e
 * @return
 */
public Elements ancestorOrSelf(Element e) {
    Elements rs = e.parents();
    rs.add(e);
    return rs;
}