Java Code Examples for org.jsoup.nodes.Node#childNodeSize()

The following examples show how to use org.jsoup.nodes.Node#childNodeSize() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: scava   File: HtmlParser.java    License: Eclipse Public License 2.0 6 votes vote down vote up
private static void readNodes(List<Node> nodeList, List<String> textList)
{
	String tempText;
	for(Node node : nodeList)
	{
		if(node.childNodeSize()>0)
		{
			readNodes(node.childNodes(), textList);
		}
		else
		{
			if(node.nodeName().equals("#text"))
			{
				tempText=((TextNode) node).getWholeText();
				tempText=newline.matcher(tempText).replaceAll("");
				if(!tempText.isEmpty())
					textList.add(tempText);
			}
		}
	}
}
 
Example 2
Source Project: scava   File: HtmlParser.java    License: Eclipse Public License 2.0 6 votes vote down vote up
private static void readNodesWithTags(List<Node> nodeList, List<Map.Entry<String,String>> textListMap, String tag)
{
	for(Node node : nodeList)
	{
		if(node.childNodeSize()>0)
		{
			readNodesWithTags(node.childNodes(), textListMap, node.nodeName());
		}
		else
		{
			if(node.nodeName().equals("#text"))
			{
				if(tag.equalsIgnoreCase("body"))
					tag="p";
				textListMap.add(new AbstractMap.SimpleEntry<String,String>(tag, ((TextNode) node).getWholeText() ));
			}
		}
	}
}
 
Example 3
/**
 * Start a depth-first traverse of the root and all of its descendants.
 * @param root the root node point to traverse.
 */
public void traverse(Node root) {
    Node node = root;
    int depth = 0;
    
    while (node != null) {
        visitor.head(node, depth);
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                visitor.tail(node, depth);
                node = node.parentNode();
                depth--;
            }
            visitor.tail(node, depth);
            if (node == root)
                break;
            node = node.nextSibling();
        }
    }
}
 
Example 4
/**
 * Start a depth-first traverse of the root and all of its descendants.
 * @param visitor Node visitor.
 * @param root the root node point to traverse.
 */
public static void traverse(NodeVisitor visitor, Node root) {
    Node node = root;
    int depth = 0;
    
    while (node != null) {
        visitor.head(node, depth);
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                visitor.tail(node, depth);
                node = node.parentNode();
                depth--;
            }
            visitor.tail(node, depth);
            if (node == root)
                break;
            node = node.nextSibling();
        }
    }
}
 
Example 5
/**
 * Start a depth-first traverse of the root and all of its descendants.
 * @param visitor Node visitor.
 * @param root the root node point to traverse.
 */
public static void traverse(NodeVisitor visitor, Node root) {
    Node node = root;
    int depth = 0;
    
    while (node != null) {
        visitor.head(node, depth);
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                visitor.tail(node, depth);
                node = node.parentNode();
                depth--;
            }
            visitor.tail(node, depth);
            if (node == root)
                break;
            node = node.nextSibling();
        }
    }
}
 
Example 6
Source Project: jsoup-learning   File: NodeTraversor.java    License: MIT License 6 votes vote down vote up
/**
 * Start a depth-first traverse of the root and all of its descendants.
 * @param root the root node point to traverse.
 */
public void traverse(Node root) {
    Node node = root;
    int depth = 0;
    
    while (node != null) {
        visitor.head(node, depth);
        if (node.childNodeSize() > 0) {
            node = node.childNode(0);
            depth++;
        } else {
            while (node.nextSibling() == null && depth > 0) {
                visitor.tail(node, depth);
                node = node.parent();
                depth--;
            }
            visitor.tail(node, depth);
            if (node == root)
                break;
            node = node.nextSibling();
        }
    }
}
 
Example 7
@Override
public void head(Node node, int depth)
{
    if (node.childNodeSize() == 0) {
        if (node instanceof TextNode && StringUtil.isBlank(node.outerHtml())) {
            return;
        }
        mergeToResult(node);
        nodes.add(node);
    }
}
 
Example 8
Source Project: baleen   File: DocumentToJCasConverter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Walk the HTML document node by node, creating annotations and text.
 *
 * @param builder the builder
 * @param root the root
 * @param depth the depth
 */
private void walk(
    final JCasBuilder builder, final Node root, final int depth, final boolean captureText) {
  if (root == null) {
    return;
  }

  final int begin = builder.getCurrentOffset();
  if (captureText) {
    // Generate the text and the annotations
    final String text = mapToText(root);
    if (!Strings.isNullOrEmpty(text)) {
      builder.addText(text);
    }
  }

  List<Annotation> annotations = null;
  if (root instanceof Element) {
    annotations = mapElementToAnnotations(builder.getJCas(), (Element) root);
  }

  // BUG: With multiple mappers depth here is wrong! It puts all mappers at the same depth...
  // (though in fairness they are all the same begin-end and same element too)

  // Walk the children
  if (root.childNodeSize() > 0) {
    for (final Node node : root.childNodes()) {
      walk(builder, node, depth + 1, captureText);
    }
  }

  // Add annotations to the JCas
  final int end = builder.getCurrentOffset();
  if (annotations != null && !annotations.isEmpty()) {
    builder.addAnnotations(annotations, begin, end, depth);
  }
}
 
Example 9
Source Project: flow   File: JsoupUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Removes all comments from the {@code node} tree.
 *
 * @param node
 *            a Jsoup node
 */
static void removeCommentsRecursively(Node node) {
    int i = 0;
    while (i < node.childNodeSize()) {
        Node child = node.childNode(i);
        if (child instanceof Comment) {
            child.remove();
        } else {
            removeCommentsRecursively(child);
            i++;
        }
    }
}