Java Code Examples for org.jsoup.select.Elements#add()

The following examples show how to use org.jsoup.select.Elements#add() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: a   File: AnalyzeByJSoup.java    License: GNU General Public License v3.0 6 votes vote down vote up
private Elements filterElements(Elements elements, String[] rules) {
    if (rules == null || rules.length < 2) return elements;
    Elements selectedEls = new Elements();
    for (Element ele : elements) {
        boolean isOk = false;
        switch (rules[0]) {
            case "class":
                isOk = ele.getElementsByClass(rules[1]).size() > 0;
                break;
            case "id":
                isOk = ele.getElementById(rules[1]) != null;
                break;
            case "tag":
                isOk = ele.getElementsByTag(rules[1]).size() > 0;
                break;
            case "text":
                isOk = ele.getElementsContainingOwnText(rules[1]).size() > 0;
                break;
        }
        if (isOk) {
            selectedEls.add(ele);
        }
    }
    return selectedEls;
}
 
Example 2
private Elements filterElements(Elements elements, String[] rules) {
    if (rules == null || rules.length < 2) return elements;
    final Elements selectedEls = new Elements();
    for (Element ele : elements) {
        boolean isOk = false;
        switch (rules[0]) {
            case "class":
                isOk = ele.getElementsByClass(rules[1]).size() > 0;
                break;
            case "id":
                isOk = ele.getElementById(rules[1]) != null;
                break;
            case "tag":
                isOk = ele.getElementsByTag(rules[1]).size() > 0;
                break;
            case "text":
                isOk = ele.getElementsContainingOwnText(rules[1]).size() > 0;
                break;
        }
        if (isOk) {
            selectedEls.add(ele);
        }
    }
    return selectedEls;
}
 
Example 3
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler<Element> elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example 4
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler<Element> elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example 5
/**
 * 
 * @param sspHandler
 * @param elementHandler 
 * @param elementHandlerWithoutDataTableMarkup
 */
private void extractTableWithDataTableMarkup(
            ElementHandler<Element> elementHandler, 
            ElementHandler elementHandlerWithoutDataTableMarkup) {
    
    Elements elementsWithMarkup = new Elements();
    
    for (Element el : elementHandler.get()) {
        if (el.select(DATA_TABLE_MARKUP_CSS_LIKE_QUERY).size() > 0) {
            elementsWithMarkup.add(el);
        } else if (elementHandlerWithoutDataTableMarkup != null) {
            elementHandlerWithoutDataTableMarkup.add(el);
        }
    }
    elementHandler.clean().addAll(elementsWithMarkup);
}
 
Example 6
Source Project: JsoupXpath   File: CommonUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static Elements precedingSibling(Element el){
    Elements rs = new Elements();
    Element tmp = el.previousElementSibling();
    while (tmp!=null){
        rs.add(tmp);
        tmp = tmp.previousElementSibling();
    }
    if (rs.size() > 0){
        return rs;
    }
    return null;
}
 
Example 7
Source Project: astor   File: Element.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
 * of itself, so will not be included in the returned list.
 * @return sibling elements
 */
public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().children();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 
Example 8
Source Project: CrawlerForReader   File: AxisSelector.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 返回前一个同胞节点(扩展),语法 preceding-sibling-one
 *
 * @param e
 * @return
 */
public Elements precedingSiblingOne(Element e) {
    Elements rs = new Elements();
    if (e.previousElementSibling() != null) {
        rs.add(e.previousElementSibling());
    }
    return rs;
}
 
Example 9
Source Project: JsoupXpath   File: CommonUtil.java    License: Apache License 2.0 5 votes vote down vote up
public static Elements followingSibling(Element el){
    Elements rs = new Elements();
    Element tmp = el.nextElementSibling();
    while (tmp!=null){
        rs.add(tmp);
        tmp = tmp.nextElementSibling();
    }
    if (rs.size() > 0){
        return rs;
    }
    return null;
}
 
Example 10
Source Project: CrawlerForReader   File: AxisSelector.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 返回下一个同胞节点(扩展) 语法 following-sibling-one
 *
 * @param e
 * @return
 */
public Elements followingSiblingOne(Element e) {
    Elements rs = new Elements();
    if (e.nextElementSibling() != null) {
        rs.add(e.nextElementSibling());
    }
    return rs;
}
 
Example 11
Source Project: storm-crawler   File: TextExtractor.java    License: Apache License 2.0 5 votes vote down vote up
public String text(Element element) {
    // no patterns at all - return the text from the whole document
    if (inclusionPatterns.size() == 0 && excludedTags.size() == 0) {
        return _text(element);
    }

    Elements matches = new Elements();

    for (String pattern : inclusionPatterns) {
        matches = element.select(pattern);
        if (!matches.isEmpty())
            break;
    }

    // if nothing matches or no patterns were defined use the whole doc
    if (matches.isEmpty()) {
        matches.add(element);
    }

    final StringBuilder accum = new StringBuilder();

    for (Element node : matches) {
        accum.append(_text(node)).append("\n");
    }

    return accum.toString().trim();
}
 
Example 12
Source Project: JsoupXpath   File: NumTest.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void testShort() throws Exception {
    Elements context = new Elements();
    Element el = new Element("V");
    el.appendText("test .69");
    context.add(el);
    Num n = new Num();
    XValue v = n.call(Scope.create(context));
    logger.info("v = {}",v);
    Assert.assertEquals(0.69,v.asDouble(),0.00000000000001);
}
 
Example 13
Source Project: JsoupXpath   File: CommonUtil.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 获取同胞中同名元素的数量
 * @param e
 * @return
 */
public static int sameTagElNums(Element e,Scope scope){
    Elements context = new Elements();
    Elements els = e.parent().getElementsByTag(e.tagName());
    for (Element el:els){
        if (scope.context().contains(el)){
            context.add(el);
        }
    }
    return context.size();
}
 
Example 14
Source Project: astor   File: Element.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
 * of itself, so will not be included in the returned list.
 * @return sibling elements
 */
public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().childElementsList();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 
Example 15
Source Project: astor   File: Element.java    License: GNU General Public License v2.0 5 votes vote down vote up
/**
 * Get sibling elements. If the element has no sibling elements, returns an empty list. An element is not a sibling
 * of itself, so will not be included in the returned list.
 * @return sibling elements
 */
public Elements siblingElements() {
    if (parentNode == null)
        return new Elements(0);

    List<Element> elements = parent().childElementsList();
    Elements siblings = new Elements(elements.size() - 1);
    for (Element el: elements)
        if (el != this)
            siblings.add(el);
    return siblings;
}
 
Example 16
Source Project: ogham   File: JsoupAttachImageInliner.java    License: Apache License 2.0 5 votes vote down vote up
private static Elements getImagesToAttach(Document doc, ImageResource image) {
	Elements imgs = doc.select(format(IMG_SELECTOR, image.getSrcUrl()));
	Elements found = new Elements();
	for (Element img : imgs) {
		// skip images that have skip-attach attribute
		if (isInlineModeAllowed(img, ATTACH)) {
			found.add(img);
		}
	}
	return found;
}
 
Example 17
Source Project: ogham   File: JsoupBase64ImageInliner.java    License: Apache License 2.0 5 votes vote down vote up
private static Elements getImagesToInline(Document doc, ImageResource image) {
	Elements imgs = doc.select(MessageFormat.format(IMG_SELECTOR, image.getSrcUrl()));
	Elements found = new Elements();
	for (Element img : imgs) {
		// only apply inlining if mode matches
		if (isInlineModeAllowed(img, BASE64)) {
			found.add(img);
		}
	}
	return found;
}
 
Example 18
Source Project: jsoup-learning   File: HtmlTreeBuilder.java    License: MIT License 4 votes vote down vote up
List<Node> parseFragment(String inputFragment, Element context, String baseUri, ParseErrorList errors) {
    // context may be null
    state = HtmlTreeBuilderState.Initial;
    initialiseParse(inputFragment, baseUri, errors);
    contextElement = context;
    fragmentParsing = true;
    Element root = null;

    if (context != null) {
        if (context.ownerDocument() != null) // quirks setup:
            doc.quirksMode(context.ownerDocument().quirksMode());

        // initialise the tokeniser state:
        String contextTag = context.tagName();
        if (StringUtil.in(contextTag, "title", "textarea"))
            tokeniser.transition(TokeniserState.Rcdata);
        else if (StringUtil.in(contextTag, "iframe", "noembed", "noframes", "style", "xmp"))
            tokeniser.transition(TokeniserState.Rawtext);
        else if (contextTag.equals("script"))
            tokeniser.transition(TokeniserState.ScriptData);
        else if (contextTag.equals(("noscript")))
            tokeniser.transition(TokeniserState.Data); // if scripting enabled, rawtext
        else if (contextTag.equals("plaintext"))
            tokeniser.transition(TokeniserState.Data);
        else
            tokeniser.transition(TokeniserState.Data); // default

        root = new Element(Tag.valueOf("html"), baseUri);
        doc.appendChild(root);
        stack.push(root);
        resetInsertionMode();

        // setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
        // with form correctly
        Elements contextChain = context.parents();
        contextChain.add(0, context);
        for (Element parent: contextChain) {
            if (parent instanceof FormElement) {
                formElement = (FormElement) parent;
                break;
            }
        }
    }

    runParser();
    if (context != null)
        return root.childNodes();
    else
        return doc.childNodes();
}
 
Example 19
Source Project: spring-boot   File: ElementSelector.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * 去掉 指定的 tag 及内容
 * -
 * select 语法 :tag1,tag2 ...
 * example :
 * select("script").remove().("style").remove().("head").remove()
 * or
 * select("script, style, .hidden").remove()
 * -
 * 此时 currentElements 会包含所有的标签,逐层递减,会有多个,而不是去掉标签之后的结果,只有第一个是
 *
 * @param tags
 * @return
 */
public ElementSelector removeTags(String... tags) {
    Elements temp = new Elements();

    for (String s : tags)   // 大小写不敏感,都能去掉
        this.currentElements.select(s).remove();   // currentElements 已经改变

    if (tags.length != 0) { // 避免无参数时,改变 currentElements
        temp.add(this.currentElements.first());     //此时 currentElements 会包含所有的标签,逐层递减,会有多个,而不是去掉标签之后的结果,只有第一个是
        this.currentElements = temp;
    }
    return this;
}
 
Example 20
Source Project: CrawlerForReader   File: AxisSelector.java    License: Apache License 2.0 2 votes vote down vote up
/**
 * 全部祖先节点和自身节点
 *
 * @param e
 * @return
 */
public Elements ancestorOrSelf(Element e) {
    Elements rs = e.parents();
    rs.add(e);
    return rs;
}