Java Code Examples for org.jsoup.nodes.Node#childNodes()

The following examples show how to use org.jsoup.nodes.Node#childNodes() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: flow   File: ElementUtil.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Converts a given JSoup {@link org.jsoup.nodes.Node} and its children into
 * a matching {@link com.vaadin.flow.dom.Element} hierarchy.
 * <p>
 * Only nodes of type {@link org.jsoup.nodes.TextNode} and
 * {@link org.jsoup.nodes.Element} are converted - other node types return
 * an empty optional.
 *
 * @param node
 *            JSoup node to convert
 * @return element with the matching hierarchy as the given node, or empty
 */
public static Optional<Element> fromJsoup(Node node) {
    Element ret;
    if (node instanceof TextNode) {
        return Optional.of(Element.createText(((TextNode) node).text()));
    } else if (node instanceof org.jsoup.nodes.Element) {
        ret = new Element(((org.jsoup.nodes.Element)node).tagName());
    } else {
        LoggerFactory.getLogger(ElementUtil.class).error(
                "Could not convert a {}, '{}' into {}!",
                Node.class.getName(), node, Element.class.getName());
        return Optional.empty();
    }

    node.attributes().asList().forEach(attribute -> ret
            .setAttribute(attribute.getKey(), attribute.getValue()));

    List<Node> childNodes = node.childNodes();
    if (!childNodes.isEmpty()) {
        childNodes.forEach(
                child -> fromJsoup(child).ifPresent(ret::appendChild));
    }

    return Optional.of(ret);
}
 
Example 2
Source Project: frameworkAggregate   File: MyJsoup.java    License: Apache License 2.0 5 votes vote down vote up
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
Example 3
Source Project: frameworkAggregate   File: TestJsoup.java    License: Apache License 2.0 5 votes vote down vote up
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
Example 4
Source Project: emotional_analysis   File: IpProxy.java    License: Apache License 2.0 5 votes vote down vote up
public static List<IpEntity> getProxyIp(String url) throws Exception{
	ArrayList<IpEntity> ipList = new ArrayList<>();
	Response execute = Jsoup.connect(url)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "max-age=60").header("Accept", "*/*")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
			.header("Referer", "http://music.163.com/song?id=186016")
			.header("Origin", "http://music.163.com").header("Host", "music.163.com")
			.header("Content-Type", "application/x-www-form-urlencoded")
			.header("Cookie",
					"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
			.method(Method.GET).ignoreContentType(true)
			.timeout(2099999999).execute();
	Document pageJson = execute.parse();
	Element body = pageJson.body();
	List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
	//把前10位的代理IP放到List中
	for(int i = 2;i <= 30;i += 2){
		IpEntity ipEntity = new IpEntity();
		Node node = childNodes.get(i);
		List<Node> nodes = node.childNodes();
		String ip = nodes.get(3).childNode(0).toString();
		int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
		ipEntity.setIp(ip);
		ipEntity.setPort(port);
		ipList.add(ipEntity);
	}
	return ipList;
}
 
Example 5
Source Project: M2Doc   File: M2DocHTMLParser.java    License: Eclipse Public License 1.0 5 votes vote down vote up
/**
 * Inserts a table.
 * 
 * @param parent
 *            the parent {@link MList}
 * @param context
 *            the current {@link Context}
 * @param node
 *            the table {@link Node};
 */
private void insertTable(MList parent, Context context, Node node) {
    final MTable table = new MTableImpl();
    parent.add(table);
    for (Node child : node.childNodes()) {
        if ("tr".equals(child.nodeName())) {
            final MRow row = new MRowImpl();
            table.getRows().add(row);
            for (Node rowChild : child.childNodes()) {
                if ("th".equals(rowChild.nodeName()) || "td".equals(rowChild.nodeName())) {
                    final MList contents = new MListImpl();
                    final MCell cell = new MCellImpl(contents, null);
                    final Context localContext;
                    if ("th".equals(rowChild.nodeName())) {
                        cell.setHAlignment(HAlignment.CENTER);
                        localContext = context.copy();
                        setModifiers(localContext, MStyle.FONT_BOLD);
                    } else {
                        localContext = context;
                    }
                    row.getCells().add(cell);
                    for (Node cellChild : rowChild.childNodes()) {
                        walkNodeTree(contents, localContext, cellChild);
                    }
                }
            }
        }
    }
}
 
Example 6
Source Project: SnowGraph   File: StackOverflowParser.java    License: Apache License 2.0 5 votes vote down vote up
private static List<CodeInfo> parseHTMLNodeToParagraphs(Node node) {
	List<CodeInfo> paragraphList = new ArrayList<>();
	List<Node> childNodes = node.childNodes();
	for (Node childNode : childNodes) {
		if (childNode.nodeName().equals("p") || childNode.nodeName().equals("li")) continue;
		if (childNode.nodeName().equals("pre"))
			childNode.childNodes().stream()
					.filter(n -> n.nodeName().equals("code"))
					.map(n -> new CodeInfo(StringEscapeUtils.unescapeHtml4(((Element) n).text())))
					.forEach(paragraphList::add);
		else paragraphList.addAll(parseHTMLNodeToParagraphs(childNode));
	}
	return paragraphList;
}
 
Example 7
Source Project: baleen   File: DocumentToJCasConverter.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Walk the HTML document node by node, creating annotations and text.
 *
 * @param builder the builder
 * @param root the root
 * @param depth the depth
 */
private void walk(
    final JCasBuilder builder, final Node root, final int depth, final boolean captureText) {
  if (root == null) {
    return;
  }

  final int begin = builder.getCurrentOffset();
  if (captureText) {
    // Generate the text and the annotations
    final String text = mapToText(root);
    if (!Strings.isNullOrEmpty(text)) {
      builder.addText(text);
    }
  }

  List<Annotation> annotations = null;
  if (root instanceof Element) {
    annotations = mapElementToAnnotations(builder.getJCas(), (Element) root);
  }

  // BUG: With multiple mappers depth here is wrong! It puts all mappers at the same depth...
  // (though in fairness they are all the same begin-end and same element too)

  // Walk the children
  if (root.childNodeSize() > 0) {
    for (final Node node : root.childNodes()) {
      walk(builder, node, depth + 1, captureText);
    }
  }

  // Add annotations to the JCas
  final int end = builder.getCurrentOffset();
  if (annotations != null && !annotations.isEmpty()) {
    builder.addAnnotations(annotations, begin, end, depth);
  }
}
 
Example 8
Source Project: quarkus   File: JavaDocParser.java    License: Apache License 2.0 4 votes vote down vote up
private void appendHtml(StringBuilder sb, Node node) {
    for (Node childNode : node.childNodes()) {
        switch (childNode.nodeName()) {
            case PARAGRAPH_NODE:
                sb.append(NEW_LINE);
                appendHtml(sb, childNode);
                break;
            case ORDERED_LIST_NODE:
            case UN_ORDERED_LIST_NODE:
                appendHtml(sb, childNode);
                break;
            case LIST_ITEM_NODE:
                final String marker = childNode.parent().nodeName().equals(ORDERED_LIST_NODE)
                        ? ORDERED_LIST_ITEM_ASCIDOC_STYLE
                        : UNORDERED_LIST_ITEM_ASCIDOC_STYLE;
                sb.append(NEW_LINE);
                sb.append(marker);
                appendHtml(sb, childNode);
                break;
            case LINK_NODE:
                final String link = childNode.attr(HREF_ATTRIBUTE);
                sb.append("link:");
                sb.append(link);
                final StringBuilder caption = new StringBuilder();
                appendHtml(caption, childNode);
                sb.append(String.format(LINK_ATTRIBUTE_FORMAT, caption.toString().trim()));
                break;
            case CODE_NODE:
                sb.append(BACKTICK);
                appendHtml(sb, childNode);
                sb.append(BACKTICK);
                break;
            case BOLD_NODE:
            case EMPHASIS_NODE:
                sb.append(STAR);
                appendHtml(sb, childNode);
                sb.append(STAR);
                break;
            case ITALICS_NODE:
                sb.append(UNDERSCORE);
                appendHtml(sb, childNode);
                sb.append(UNDERSCORE);
                break;
            case UNDERLINE_NODE:
                sb.append(UNDERLINE_ASCIDOC_STYLE);
                sb.append(HASH);
                appendHtml(sb, childNode);
                sb.append(HASH);
                break;
            case SMALL_NODE:
                sb.append(SMALL_ASCIDOC_STYLE);
                sb.append(HASH);
                appendHtml(sb, childNode);
                sb.append(HASH);
                break;
            case BIG_NODE:
                sb.append(BIG_ASCIDOC_STYLE);
                sb.append(HASH);
                appendHtml(sb, childNode);
                sb.append(HASH);
                break;
            case SUB_SCRIPT_NODE:
                sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
                appendHtml(sb, childNode);
                sb.append(SUB_SCRIPT_ASCIDOC_STYLE);
                break;
            case SUPER_SCRIPT_NODE:
                sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
                appendHtml(sb, childNode);
                sb.append(SUPER_SCRIPT_ASCIDOC_STYLE);
                break;
            case DEL_NODE:
            case S_NODE:
            case STRIKE_NODE:
                sb.append(LINE_THROUGH_ASCIDOC_STYLE);
                sb.append(HASH);
                appendHtml(sb, childNode);
                sb.append(HASH);
                break;
            case NEW_LINE_NODE:
                sb.append(NEW_LINE);
                break;
            case TEXT_NODE:
                appendEscapedAsciiDoc(sb, ((TextNode) childNode).text());
                break;
            default:
                appendHtml(sb, childNode);
                break;
        }
    }
}