Java Code Examples for org.jsoup.nodes.Element#childNodes()

The following examples show how to use org.jsoup.nodes.Element#childNodes() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ElementOperator.java    From xsoup with MIT License 6 votes vote down vote up
@Override
public String operate(Element element) {
    int index = 0;
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            if (group == 0) {
                accum.append(textNode.text());
            } else if (++index == group) {
                return textNode.text();
            }
        }
    }
    return accum.toString();
}
 
Example 2
Source File: OutputFormatter.java    From Xndroid with GNU General Public License v3.0 6 votes vote down vote up
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
 
Example 3
Source File: HtmlRenderer.java    From kafka-connect-couchbase with Apache License 2.0 6 votes vote down vote up
private static void renderAsPlaintext(Node node, StringBuilder out) {
  if (node instanceof TextNode) {
    String text = ((TextNode) node).text();
    if (out.length() == 0 || endsWithWhitespace(out)) {
      text = trimLeft(text);
    }
    out.append(text);
    return;
  }

  if (node instanceof Element) {
    Element e = (Element) node;

    if (e.tagName().equals("p") || e.tagName().equals("br")) {
      trimRight(out);
      if (out.length() > 0) {
        out.append(PARAGRAPH_SEPARATOR);
      }
    }

    for (Node child : e.childNodes()) {
      renderAsPlaintext(child, out);
    }
  }
}
 
Example 4
Source File: ElementOperator.java    From zongtui-webcrawler with GNU General Public License v2.0 6 votes vote down vote up
@Override
public String operate(Element element) {
    int index = 0;
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            if (group == 0) {
                accum.append(textNode.text());
            } else if (++index == group) {
                return textNode.text();
            }
        }
    }
    return accum.toString();
}
 
Example 5
Source File: DeepTextElementBuilder.java    From Asqatasun with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public String buildTextFromElement(Element element) {
    StringBuilder elementText = new StringBuilder();
    if (element.hasAttr(ALT_ATTR)) {
        elementText.append(SPACER);
        elementText.append(altAttrTextBuilder.buildTextFromElement(element));
    }
    for (Node child : element.childNodes()) {
        if (child instanceof TextNode && !((TextNode)child).isBlank()) {
           elementText.append(SPACER);
           elementText.append(StringUtils.trim(((TextNode)child).text()));
        } else if (child instanceof Element){
            elementText.append(SPACER);
            elementText.append(buildTextFromElement((Element)child));
        }
    }
    return StringUtils.trim(elementText.toString());
}
 
Example 6
Source File: OutputFormatter.java    From JumpGo with Mozilla Public License 2.0 6 votes vote down vote up
private void appendTextSkipHidden(Element e, StringBuilder accum, int indent) {
    for (Node child : e.childNodes()) {
        if (unlikely(child)) {
            continue;
        }
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;
            String txt = textNode.text();
            accum.append(txt);
        } else if (child instanceof Element) {
            Element element = (Element) child;
            if (accum.length() > 0 && element.isBlock()
                    && !lastCharIsWhitespace(accum))
                accum.append(' ');
            else if (element.tagName().equals("br"))
                accum.append(' ');
            appendTextSkipHidden(element, accum, indent + 1);
        }
    }
}
 
Example 7
Source File: CssSelector.java    From webmagic with Apache License 2.0 5 votes vote down vote up
protected String getText(Element element) {
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            accum.append(textNode.text());
        }
    }
    return accum.toString();
}
 
Example 8
Source File: HtmlHelper.java    From FairEmail with GNU General Public License v3.0 5 votes vote down vote up
static boolean truncate(Document d, boolean reformat) {
    int max = (reformat ? MAX_FORMAT_TEXT_SIZE : MAX_FULL_TEXT_SIZE);

    int length = 0;
    int images = 0;
    for (Element elm : d.select("*")) {
        if ("img".equals(elm.tagName()))
            images++;

        boolean skip = false;
        for (Node child : elm.childNodes()) {
            if (child instanceof TextNode) {
                TextNode tnode = ((TextNode) child);
                String text = tnode.getWholeText();

                if (length < max) {
                    if (length + text.length() >= max) {
                        text = text.substring(0, max - length) + " ...";
                        tnode.text(text);
                        skip = true;
                    }
                } else {
                    if (skip)
                        tnode.text("");
                }

                length += text.length();
            }
        }

        if (length >= max && !skip)
            elm.remove();
    }

    Log.i("Message size=" + length + " images=" + images);

    return (length >= max);
}
 
Example 9
Source File: Evaluator.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
      	List<Node> family = element.childNodes();
      	for (int i = 0; i < family.size(); i++) {
      		Node n = family.get(i);
      		if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false; 
      	}
      	return true;
}
 
Example 10
Source File: Evaluator.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
      	List<Node> family = element.childNodes();
          for (Node n : family) {
              if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false;
          }
      	return true;
}
 
Example 11
Source File: Evaluator.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
      	List<Node> family = element.childNodes();
          for (Node n : family) {
              if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false;
          }
      	return true;
}
 
Example 12
Source File: Evaluator.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
      	List<Node> family = element.childNodes();
          for (Node n : family) {
              if (!(n instanceof Comment || n instanceof XmlDeclaration || n instanceof DocumentType)) return false;
          }
      	return true;
}
 
Example 13
Source File: CommonParser.java    From ZfsoftCampusAssit with Apache License 2.0 5 votes vote down vote up
public void parseCollegeTerms(String rawHtml, Setting setting) {
    Element doc = Jsoup.parse(rawHtml).getElementById("xqd");
    for (Node yearNode : doc.childNodes()) {
        if (yearNode.hasAttr("value")) {
            setting.ownTerms.add(yearNode.attr("value"));
            if (yearNode.hasAttr("selected")) {
                setting.currentTerm = yearNode.attr("selected");
            }
        }
    }
}
 
Example 14
Source File: CommonParser.java    From ZfsoftCampusAssit with Apache License 2.0 5 votes vote down vote up
public void parseCollegeYears(String rawHtml, Setting setting) {
    Element doc = Jsoup.parse(rawHtml).getElementById("xnd");
    for (Node yearNode : doc.childNodes()) {
        if (yearNode.hasAttr("value")) {
            setting.ownYears.add(yearNode.attr("value"));
            if (yearNode.hasAttr("selected")) {
                setting.currentYear = yearNode.attr("selected");
            }
        }
    }

}
 
Example 15
Source File: CommentParser.java    From Ouroboros with GNU General Public License v3.0 5 votes vote down vote up
private CharSequence parseFormatting(Element bodyLine, String currentBoard, String resto, FragmentManager fragmentManager, InfiniteDbHelper infiniteDbHelper){
    CharSequence parsedText = "";
    for (Node childNode : bodyLine.childNodes()){
        if (childNode instanceof TextNode){
            parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(((TextNode) childNode).text())));
        } else if (childNode instanceof Element){
            Element childElement = (Element) childNode;
            switch(childElement.tagName()){
                default:
                    parsedText = TextUtils.concat(parsedText, parseNormalText(new SpannableString(childElement.text())));
                    break;
                case "span":
                    CharSequence spanText = parseSpanText(childElement);
                    parsedText = TextUtils.concat(parsedText, spanText);
                    break;
                case "em":
                    parsedText = TextUtils.concat(parsedText, parseItalicText(new SpannableString(childElement.text())));
                    break;
                case "strong":
                    parsedText = TextUtils.concat(parsedText, parseBoldText(new SpannableString(childElement.text())));
                    break;
                case "u":
                    parsedText = TextUtils.concat(parsedText, parseUnderlineText(new SpannableString(childElement.text())));
                    break;
                case "s":
                    parsedText = TextUtils.concat(parsedText, parseStrikethroughText(new SpannableString(childElement.text())));
                    break;
                case "a":
                    parsedText = TextUtils.concat(parsedText, parseAnchorText(childElement, currentBoard, resto, fragmentManager, infiniteDbHelper));
            }
        }
    }
    return parsedText;
}
 
Example 16
Source File: CssSelector.java    From zongtui-webcrawler with GNU General Public License v2.0 5 votes vote down vote up
protected String getText(Element element) {
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            accum.append(textNode.text());
        }
    }
    return accum.toString();
}
 
Example 17
Source File: DebateHTMLParser.java    From argument-reasoning-comprehension-task with Apache License 2.0 5 votes vote down vote up
/**
 * Extracts elements from the html comments (paragraph breaks, links)
 *
 * @param pElement paragraph element
 * @return plain text
 */
public static String paragraphElementToString(Element pElement)
{
    StringBuilder sb = new StringBuilder();
    for (Node child : pElement.childNodes()) {
        if (child instanceof TextNode) {
            TextNode textNode = (TextNode) child;

            sb.append(textNode.text());
        }
        else if (child instanceof Element) {
            Element element = (Element) child;

            // append new line for break
            if ("br".equals(element.tag().getName())) {
                sb.append("\n");
            }
            else if ("a".equals(element.tag().getName())) {
                // extract link from a.href
                sb.append(" ").append(element.attr("href")).append(" ");
            }
            else {
                // or just add the text
                sb.append(" ").append(element.text()).append(" ");
            }
        }
    }

    return sb.toString();
}
 
Example 18
Source File: CssSelector.java    From NetDiscovery with Apache License 2.0 5 votes vote down vote up
protected String getText(Element element) {
    StringBuilder accum = new StringBuilder();
    for (Node node : element.childNodes()) {
        if (node instanceof TextNode) {
            TextNode textNode = (TextNode) node;
            accum.append(textNode.text());
        }
    }
    return accum.toString();
}
 
Example 19
Source File: TestJsoup.java    From frameworkAggregate with Apache License 2.0 5 votes vote down vote up
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
Example 20
Source File: MyJsoup.java    From frameworkAggregate with Apache License 2.0 5 votes vote down vote up
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}