Java Code Examples for org.jsoup.nodes.Element#children()

The following examples show how to use org.jsoup.nodes.Element#children() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PomParserServiceImpl.java    From artifact-listener with Apache License 2.0 8 votes vote down vote up
private List<ArtifactBean> populate(Elements elements) {
	List<ArtifactBean> artifactList = Lists.newArrayList();
	for (Element dependency : elements) {
		
		ArtifactBean artifactBean = new ArtifactBean();
		for (Element child : dependency.children()) {
			if (child.nodeName().compareTo("groupid") == 0) {
				artifactBean.setGroupId(child.text());
			} else if (child.nodeName().compareTo("artifactid") == 0) {
				artifactBean.setArtifactId(child.text());
			} else if (child.nodeName().compareTo("type") == 0) {
				artifactBean.setType(child.text());
			}
		}
		
		if (StringUtils.hasText(artifactBean.getGroupId()) && StringUtils.hasText(artifactBean.getArtifactId())) {
			artifactBean.setId(artifactBean.getGroupId() + ":" + artifactBean.getArtifactId());
			artifactList.add(artifactBean);
		}
	}
	
	Collections.sort(artifactList);
	
	return artifactList;
}
 
Example 2
Source File: JsoupInstanceListExtractor.java    From wandora with GNU General Public License v3.0 8 votes vote down vote up
private void parseTopic(Element classElement, Topic typeTopic) throws TopicMapException {
    System.out.println(classElement.text());
    
    Topic t = getOrCreateTopic(tm, null, classElement.text());
    
    if(typeTopic == null) typeTopic = wandoraClass;
    t.addType(typeTopic);
    
    // See if the next element is a list (of instances)
    Element listWrapper = classElement.nextElementSibling();
    if(listWrapper != null && !listWrapper.children().isEmpty()) {
        for(Element listCandidate: listWrapper.children()) {
            if(listCandidate.tagName().equals("ul")) {
                parseList(listCandidate, t);
            }
        }
    }
}
 
Example 3
Source File: Book.java    From nju-lib-downloader with GNU General Public License v3.0 8 votes vote down vote up
protected Node parseLi(Element liElement) {
    Elements children = liElement.children();
    if (children.size() == 1 && children.get(0).nodeName().equals("a")) {
        return parseA(children.get(0));
    }
    Node root = new Node();
    for (Element child : liElement.children()) {
        if (child.nodeName().equals("span")) {
            root = parseSpan(child);
        }
        if (child.nodeName().equals("ul")) {
            root.addAll(parseUL(child));
        }
    }
    return root;
}
 
Example 4
Source File: JsoupSuperSubClassListExtractor.java    From wandora with GNU General Public License v3.0 8 votes vote down vote up
private void parseTopic(Element classElement, Topic classTopic) throws TopicMapException {
    String name = classElement.text().trim();
    
    if(name.length() == 0) return;
    
    Topic t = getOrCreateTopic(tm, null , name);
    
    if(classTopic == null) classTopic = wandoraClass;
    makeSubclassOf(tm, t, classTopic);
    
    // See if the next element is a list (of instances)
    Element listWrapper = classElement.nextElementSibling();
    if(listWrapper != null && !listWrapper.children().isEmpty()) {
        for(Element listCandidate: listWrapper.children()){
            if(listCandidate.tagName().equals("ul"))
                parseList(listCandidate, t);
        }
    }
}
 
Example 5
Source File: URLDownloadTests.java    From java_in_examples with Apache License 2.0 8 votes vote down vote up
private static void testHtmlParser(String url) throws Exception {
    Document doc = Jsoup.connect(url).userAgent(USER_AGENT).cookie("auth", "token")
            .timeout(30000).get();
    Charset charset = doc.charset();
    System.out.println("charset = " + charset);
    System.out.println("location = " + doc.location());
    System.out.println("nodeName = " + doc.nodeName());
    Document.OutputSettings outputSettings = doc.outputSettings();
    System.out.println("charset = " + outputSettings.charset());
    System.out.println("indentAmount = " + outputSettings.indentAmount());
    System.out.println("syntax = " + outputSettings.syntax());
    System.out.println("escapeMode = " + outputSettings.escapeMode());
    System.out.println("prettyPrint = " + outputSettings.prettyPrint());
    System.out.println("outline = " + outputSettings.outline());

    System.out.println("title = " + doc.title());
    System.out.println("baseUri = " + doc.baseUri());

    Element head = doc.head();
    Elements children = head.children();
    for(Element child: children) {
        System.out.print(child.tag().getName() + " : ");
        System.out.println(child);
    }
    printElements(doc.body().children());
}
 
Example 6
Source File: Evaluator.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
	final Element p = element.parent();
	if (p==null || p instanceof Document) return false;
	
	int pos = 0;
      	Elements family = p.children();
      	for (int i = 0; i < family.size(); i++) {
      		if (family.get(i).tag().equals(element.tag())) pos++;
      	}
      	return pos == 1;
}
 
Example 7
Source File: PrefixExtractor.java    From superword with Apache License 2.0 5 votes vote down vote up
public static Prefix extractPrefix(Element element){
    Prefix prefix = new Prefix();
    List<Element> tds = element.children();
    if(tds==null || tds.size()!=3){
        return prefix;
    }
    String p = tds.get(0).text().trim();
    if(!p.endsWith("-")){
        return prefix;
    }
    String des = tds.get(1).text();
    return new Prefix(p, des);
}
 
Example 8
Source File: SHelper.java    From Xndroid with GNU General Public License v3.0 5 votes vote down vote up
private static String printNode(Element root, int indentation) {
    StringBuilder sb = new StringBuilder(indentation);
    for (int i = 0; i < indentation; i++) {
        sb.append(' ');
    }
    sb.append(root.tagName());
    sb.append(':');
    sb.append(root.ownText());
    sb.append('\n');
    for (Element el : root.children()) {
        sb.append(printNode(el, indentation + 1));
        sb.append('\n');
    }
    return sb.toString();
}
 
Example 9
Source File: JobConfParserImpl.java    From Eagle with Apache License 2.0 5 votes vote down vote up
public Map<String, String> parse(Document doc) {
	Elements elements = doc.select("table[id=conf]").select("tbody").select("tr");
	Iterator<Element> iter = elements.iterator();
	Map<String, String> configs = new HashMap<String, String>();
	while(iter.hasNext()) {
		Element element = iter.next();
		Elements tds = element.children();
		String key = tds.get(0).text();
		String value = tds.get(1).text();
		configs.put(key, value);
	}
	return configs;
}
 
Example 10
Source File: JsoupParserIntegrationTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void examplesTraversing() {
    Elements sections = doc.select("section");

    Element firstSection = sections.first();
    Element lastSection = sections.last();
    Element secondSection = sections.get(2);
    Elements allParents = firstSection.parents();
    Element parent = firstSection.parent();
    Elements children = firstSection.children();
    Elements siblings = firstSection.siblingElements();

    sections.forEach(el -> System.out.println("section: " + el));
}
 
Example 11
Source File: SHelper.java    From JumpGo with Mozilla Public License 2.0 5 votes vote down vote up
private static String printNode(Element root, int indentation) {
    StringBuilder sb = new StringBuilder(indentation);
    for (int i = 0; i < indentation; i++) {
        sb.append(' ');
    }
    sb.append(root.tagName());
    sb.append(':');
    sb.append(root.ownText());
    sb.append('\n');
    for (Element el : root.children()) {
        sb.append(printNode(el, indentation + 1));
        sb.append('\n');
    }
    return sb.toString();
}
 
Example 12
Source File: GetYAnswersPropertiesFromQid.java    From LiveQAServerDemo with MIT License 5 votes vote down vote up
private static Element findElement(Element e, ElementPredicate f) {
    if (f.check(e)) {
        return e;
    }
    for (Element c : e.children()) {
        Element elem = findElement(c, f);
        if (elem != null) {
            return elem;
        }
    }
    return null;
}
 
Example 13
Source File: JsoupInstanceListExtractor.java    From wandora with GNU General Public License v3.0 5 votes vote down vote up
private void parseList(Element list, Topic typeTopic) throws TopicMapException {
    Elements listElements = list.children();
    for(Element outerElement: listElements){
        if(outerElement.children().isEmpty()){
            parseTopic(outerElement, typeTopic);
        }
    }
}
 
Example 14
Source File: JsoupSuperSubClassListExtractor.java    From wandora with GNU General Public License v3.0 5 votes vote down vote up
private void parseList(Element list, Topic classTopic) throws TopicMapException {
    
    Elements listElements = list.children();
    
    for(Element outerElement: listElements){
        if(outerElement.children().isEmpty()){
            parseTopic(outerElement, classTopic);
        }
    }
    
}
 
Example 15
Source File: ZeppelinRDisplay.java    From zeppelin with Apache License 2.0 5 votes vote down vote up
private static RDisplay htmlDisplay(Element body, String imageWidth) {
  String div = "";
  for (Element element : body.children()) {
    String eHtml = element.html();
    String eOuterHtml = element.outerHtml();

    eOuterHtml = eOuterHtml.replace("“%html " , "").replace("”", "");

    Matcher matcher = pattern.matcher(eHtml);
    if (matcher.matches()) {
      eOuterHtml = eOuterHtml.replace(matcher.group(), "");
    }

    div = div + eOuterHtml;
  }

  String content =  div
    .replaceAll("src=\"//", "src=\"http://")
    .replaceAll("href=\"//", "href=\"http://");

  body.html(content);

  for (Element image : body.getElementsByTag("img")) {
    image.attr("width", imageWidth);
  }

  return new RDisplay(body.html(), Type.HTML, Code.SUCCESS);
}
 
Example 16
Source File: Mf2Parser.java    From indigenous-android with GNU General Public License v3.0 5 votes vote down vote up
private void parseMicroformats(Element elem, URI baseUri, JsonList items) {
    if (hasRootClass(elem)) {
        JsonDict itemDict = parseMicroformat(elem, baseUri);
        items.add(itemDict);
    }
    else {
        for (Element child : elem.children()) {
            parseMicroformats(child, baseUri, items);
        }
    }
}
 
Example 17
Source File: Hentai2ReadContent.java    From Hentoid with Apache License 2.0 4 votes vote down vote up
public Content toContent(@Nonnull String url) {
    Content result = new Content();

    result.setSite(Site.HENTAI2READ);
    if (url.isEmpty()) return result.setStatus(StatusContent.IGNORED);

    result.setUrl(url.replace(Site.HENTAI2READ.getUrl(), ""));
    result.setCoverImageUrl(coverUrl);
    if (!title.isEmpty()) {
        String titleStr = title.get(title.size() - 1).text();
        result.setTitle(!titleStr.isEmpty() ? Helper.removeNonPrintableChars(titleStr) : "");
    } else result.setTitle("<no title>");
    result.setUniqueSiteId(uniqueId);

    AttributeMap attributes = new AttributeMap();
    String currentProperty = "";
    for (Element e : properties) {
        for (Element child : e.children()) {
            if (child.nodeName().equals("b"))
                currentProperty = child.text().toLowerCase().trim();
            else if (child.nodeName().equals("a")) {
                switch (currentProperty) {
                    case "page":
                        String qtyPages = child.text().substring(0, child.text().indexOf(" page"));
                        result.setQtyPages(Integer.parseInt(qtyPages));
                        break;
                    case "parody":
                        ParseHelper.parseAttribute(attributes, AttributeType.SERIE, child, true, Site.HENTAI2READ);
                        break;
                    case "artist":
                        ParseHelper.parseAttribute(attributes, AttributeType.ARTIST, child, true, Site.HENTAI2READ);
                        break;
                    case "language":
                        ParseHelper.parseAttribute(attributes, AttributeType.LANGUAGE, child, true, Site.HENTAI2READ);
                        break;
                    case "character":
                        ParseHelper.parseAttribute(attributes, AttributeType.CHARACTER, child, true, Site.HENTAI2READ);
                        break;
                    case "content":
                    case "category":
                        ParseHelper.parseAttribute(attributes, AttributeType.TAG, child, true, Site.HENTAI2READ);
                        break;
                    default:
                        // Other cases aren't interesting
                }
            }
        }
    }
    result.addAttributes(attributes);

    return result;
}
 
Example 18
Source File: Mf2Parser.java    From indigenous-android with GNU General Public License v3.0 4 votes vote down vote up
private String parseImpliedName(Element elem) {
    if (("img".equals(elem.tagName()) || ("area".equals(elem.tagName())) && elem.hasAttr("alt"))) {
        return elem.attr("alt");
    }
    if ("abbr".equals(elem.tagName()) && elem.hasAttr("title")) {
        return elem.attr("title");
    }

    Elements children = elem.children();
    if (children.size() == 1) {
        Element child = children.first();
        // else if .h-x>img:only-child[alt]:not[.h-*] then use that img alt for name
        // else if .h-x>area:only-child[alt]:not[.h-*] then use that area alt for name
        if (!hasRootClass(child)
                && ("img".equals(child.tagName()) || "area".equals(child.tagName()))
                && child.hasAttr("alt")) {
            return child.attr("alt");
        }
        // else if .h-x>abbr:only-child[title] then use that abbr title for name
        if ("abbr".equals(child.tagName()) && child.hasAttr("title")) {
            return child.attr("title");
        }

        Elements grandChildren = child.children();
        if (grandChildren.size() == 1) {
            Element grandChild = grandChildren.first();
            // else if .h-x>:only-child>img:only-child[alt]:not[.h-*] then use that img alt for name
            // else if .h-x>:only-child>area:only-child[alt]:not[.h-*] then use that area alt for name
            if (!hasRootClass(grandChild)
                    && ("img".equals(grandChild.tagName()) || "area".equals(grandChild.tagName()))
                    && grandChild.hasAttr("alt")) {
                return grandChild.attr("alt");
            }
            // else if .h-x>:only-child>abbr:only-child[title] use that abbr title for name
            if ("abbr".equals(grandChild.tagName()) && grandChild.hasAttr("c")) {
                return grandChild.attr("title");
            }
        }
    }

    // else use the textContent of the .h-x for name
    // drop leading & trailing white-space from name, including nbsp
    return elem.text().trim();
}
 
Example 19
Source File: TtsHelper.java    From coolreader with MIT License 4 votes vote down vote up
private void parseText(Elements elements, int startId) {
	Log.d(TAG, "Start ID:" + startId);
	boolean isSkip = true;
	if (startId == 0)
		isSkip = false;

	for (int idx = 0; idx < elements.size(); idx++) {
		Element el = elements.get(idx);
		if (el.hasAttr("id") && isSkip) {
			try {
				int id = Integer.parseInt(el.attr("id"));
				if (id >= startId)
					isSkip = false;
			} catch (Exception ex) {
				Log.e(TAG, ex.getMessage());
			}
		}
		if (isSkip)
			continue;
		if (el.parent().hasClass("editsection"))
			continue;
		if (isWhiteSpace(el.tagName())) {
			SpeakValue s = new SpeakValue();
			s.Val = SILENCE;
			s.ID = null;
			queue.add(s);
		}

		SpeakValue val = new SpeakValue();
		// check if have children element for formatting
		boolean hasFormattingChild = false;
		for (Element child : el.children()) {
			if (FORMATTING_ELEMENTS.contains(child.tagName().toLowerCase())) {
				hasFormattingChild = true;
				break;
			}
		}

		if (hasFormattingChild) {
			Log.d(TAG, "Got formatting text: " + el.html());
			val.Val = el.text();
			Log.d(TAG, "Use text: " + el.text());
			removeAllChildren(el, elements);
			idx--;
		} else {
			val.Val = el.ownText();
		}

		if (el.hasAttr("id"))
			val.ID = el.attr("id");
		else
			val.ID = null;

		queue.add(val);
	}
}
 
Example 20
Source File: AxisSelector.java    From CrawlerForReader with Apache License 2.0 2 votes vote down vote up
/**
 * 直接子节点
 *
 * @param e
 * @return
 */
public Elements child(Element e) {
    return e.children();
}