Java Code Examples for org.jsoup.nodes.Element#child()

The following examples show how to use org.jsoup.nodes.Element#child() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BiQuGeReadUtil.java    From MissZzzReader with Apache License 2.0 6 votes vote down vote up
/**
 * 获取书城小说分类列表
 * @param html
 * @return
 */
public static List<BookType> getBookTypeList(String html){
    List<BookType> bookTypes = new ArrayList<>();
    Document doc = Jsoup.parse(html);

    Elements divs = doc.getElementsByClass("nav");
    if (divs.size() > 0){
        Elements uls = divs.get(0).getElementsByTag("ul");
        if (uls.size() > 0){
            for(Element li : uls.get(0).children()){
                Element a = li.child(0);
                BookType bookType = new BookType();
                bookType.setTypeName(a.attr("title"));
                bookType.setUrl(a.attr("href"));
                if (!bookType.getTypeName().contains("小说") || bookType.getTypeName().contains("排行")) continue;
                if (StringHelper.isNotEmpty(bookType.getTypeName())){
                    bookTypes.add(bookType);
                }

            }
        }

    }
    return bookTypes;

}
 
Example 2
Source File: TvMaoCrawler.java    From MyTv with Apache License 2.0 6 votes vote down vote up
/**
 * 解析电视台对象
 * 
 * @param city
 * @param html
 * @return
 */
private List<TvStation> parseTvStation(String city, String html) {
	Document doc = Jsoup.parse(html);
	Elements classifyElements = doc.select("div.chlsnav div.pbar b");
	String classify = classifyElements.get(0).text().trim();
	List<TvStation> resultList = new ArrayList<TvStation>();
	Elements channelElements = doc.select("div.chlsnav ul.r li");
	for (Element element : channelElements) {
		Element channel = element.child(0);
		TvStation tv = new TvStation();
		String stationName = channel.text().trim();
		tv.setName(stationName);
		tv.setCity(city);
		tv.setClassify(classify);
		tv.setSequence(SEQUENCE.incrementAndGet());
		for (CrawlEventListener listener : listeners) {
			listener.itemFound(new TvStationFoundEvent(this, tv));
		}
		resultList.add(tv);
	}
	return resultList;
}
 
Example 3
Source File: Javadocs.java    From Recaf with MIT License 6 votes vote down vote up
/**
 * @return Class description.
 */
public String getDescription() {
	if (description !=null)
		return description;
	try {
		// Inside <div class="description"><ul><li>
		// - Get last <div class="block">
		// - Return string content.
		Element el = doc.getElementsByClass("description").get(0).child(0).child(0);
		int i = el.children().size() - 1;
		while (i > 0) {
			Element ec = el.child(i);
			if(ec.tagName().equals("div") && ec.className().equals("block"))
				return description = text(ec);
			i--;
		}
	} catch(IndexOutOfBoundsException ex) {
		// Expected
	}
	// Description not found
	return description = NO_DESCRIPTION;
}
 
Example 4
Source File: Javadocs.java    From Recaf with MIT License 5 votes vote down vote up
private void parseMethodDescriptor(Element c, StringBuilder retDesc, List<DocParameter> params) {
	for (int i = 0; i < c.children().size();) {
		Element cc = c.child(i);
		String key = cc.text();
		if (key.startsWith("Returns")) {
			// Returns should just have one following element
			retDesc.append(c.child(i+1).text());
			i+=2;
		} else if (key.startsWith("Parameters")) {
			// Parameters followed by 0 or more <dd> content elments
			// <dd><code>parameter</code> - description</dd>
			while (i < c.children().size() - 1) {
				Element value = c.child(i + 1);
				if (!value.tagName().equals("dd"))
					break;
				String pname = value.child(0).text();
				String pdesc = value.text();
				if (pdesc.length() > pname.length() + 3)
					pdesc = pdesc.substring(pname.length() + 3);
				params.add(new DocParameter(pname, pdesc));
				i++;
			}
			i++;
		} else {
			// Unknown documentation element
			i++;
		}
	}
}
 
Example 5
Source File: GalleryDetailParser.java    From EhViewer with Apache License 2.0 5 votes vote down vote up
/**
 * Parse large previews with regular expressions
 */
private static LargePreviewSet parseLargePreviewSet(Document d, String body) throws ParseException {
    try {
        LargePreviewSet largePreviewSet = new LargePreviewSet();
        Element gdt = d.getElementById("gdt");
        Elements gdtls = gdt.getElementsByClass("gdtl");
        int n = gdtls.size();
        if (n <= 0) {
            throw new ParseException("Can't parse large preview", body);
        }
        for (int i = 0; i < n; i++) {
            Element element = gdtls.get(i).child(0);
            String pageUrl = element.attr("href");
            element = element.child(0);
            String imageUrl = element.attr("src");
            if (Settings.getFixThumbUrl()) {
                imageUrl = EhUrl.getFixedPreviewThumbUrl(imageUrl);
            }
            int index = Integer.parseInt(element.attr("alt")) - 1;
            largePreviewSet.addItem(index, imageUrl, pageUrl);
        }
        return largePreviewSet;
    } catch (Throwable e) {
        ExceptionUtils.throwIfFatal(e);
        e.printStackTrace();
        throw new ParseException("Can't parse large preview", body);
    }
}
 
Example 6
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesComments() {
    String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>";
    Document doc = Jsoup.parse(html);

    Element body = doc.body();
    Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag
    assertEquals(" <table><tr><td></table> ", comment.getData());
    Element p = body.child(1);
    TextNode text = (TextNode) p.childNode(0);
    assertEquals("Hello", text.getWholeText());
}
 
Example 7
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesSimpleDocument() {
    String html = "<html><head><title>First!</title></head><body><p>First post! <img src=\"foo.png\" /></p></body></html>";
    Document doc = Jsoup.parse(html);
    // need a better way to verify these:
    Element p = doc.body().child(0);
    assertEquals("p", p.tagName());
    Element img = p.child(0);
    assertEquals("foo.png", img.attr("src"));
    assertEquals("img", img.tagName());
}
 
Example 8
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void parsesComments() {
    String html = "<html><head></head><body><img src=foo><!-- <table><tr><td></table> --><p>Hello</p></body></html>";
    Document doc = Jsoup.parse(html);

    Element body = doc.body();
    Comment comment = (Comment) body.childNode(1); // comment should not be sub of img, as it's an empty tag
    assertEquals(" <table><tr><td></table> ", comment.getData());
    Element p = body.child(1);
    TextNode text = (TextNode) p.childNode(0);
    assertEquals("Hello", text.getWholeText());
}
 
Example 9
Source File: ContestsListFragment.java    From fridge with MIT License 5 votes vote down vote up
public void parseContests(String html) {
    Document doc = Jsoup.parse(html);
    Elements h3 = doc.select("h3");
    Elements tables = doc.select(".dataTable");
    int flag = 0;
    contestArrayList.clear();
    for (Element table : tables) {
        Elements tbody = table.getElementsByTag("tbody");
        Elements contests = tbody.get(0).children();
        for (Element contest : contests) {
            Element code = contest.child(0);
            Element name = contest.child(1);
            Element startDate = contest.child(2);
            Element endDate = contest.child(3);
            String cCode = code.text();
            String cName = name.text();
            String sDate = startDate.text();
            String eDate = endDate.text();
            Contest contest1 = new Contest(cCode, cName, sDate, eDate, flag);
            contestArrayList.add(contest1);
        }
        flag++;
    }
    contestsListAdapter = new ContestsListAdapter(contestArrayList, getActivity());
    recyclerView.swapAdapter(contestsListAdapter, false);
    if (filter != null)
        contestsListAdapter.setFilter(filter);

    if (contestArrayList == null || contestArrayList.isEmpty()) {
        recyclerView.setVisibility(View.GONE);
        contestsPlaceholder.setVisibility(View.VISIBLE);
    } else {
        recyclerView.setVisibility(View.VISIBLE);
        contestsPlaceholder.setVisibility(View.GONE);
    }
}
 
Example 10
Source File: NewsItemBiz.java    From CSDN with Apache License 2.0 5 votes vote down vote up
public List<NewsItem> getNewsItems(int newsType, int curPage)
        throws CommonException {
    List<NewsItem> newsItems = new ArrayList<>();
    String url = UrlUtil.getUrl(newsType, curPage);
    String htmlStr = DataUtil.doGet(url);
    NewsItem item = null;
    Document doc = Jsoup.parse(htmlStr);
    Elements units = doc.getElementsByClass("unit");
    for (int i = 0; i < units.size(); i++) {
        item = new NewsItem();
        item.setNewsType(newsType);
        Element unit = units.get(i);
        Element h1 = unit.getElementsByTag("h1").get(0);
        Element ha = h1.child(0);
        item.setTitle(h1.text());
        item.setLink(ha.attr("href"));
        Element h4 = unit.getElementsByTag("h4").get(0);
        Element ago = h4.getElementsByClass("ago").get(0);
        item.setDate(ago.text());
        Element dl_ele = unit.getElementsByTag("dl").get(0);
        Element dt_ele = dl_ele.child(0);
        try {
            // 可能没有图片
            Element img_ele = dt_ele.child(0);
            String imgLink = img_ele.child(0).attr("src");
            item.setImgLink(imgLink);
        } catch (IndexOutOfBoundsException e) {
        }

        Element dd_ele = dl_ele.child(1);
        item.setContent(dd_ele.text());
        newsItems.add(item);
    }

    return newsItems;
}
 
Example 11
Source File: ListItemExtensions.java    From Android-WYSIWYG-Editor with Apache License 2.0 5 votes vote down vote up
public void RenderList(boolean isOrdered, Element element) {
    if (element.children().size() > 0) {
        Element li = element.child(0);
        String text = componentsWrapper.getHtmlExtensions().getHtmlSpan(li);
        TableLayout layout = componentsWrapper.getListItemExtensions().insertList(editorCore.getParentChildCount(), isOrdered, text);
        for (int i = 1; i < element.children().size(); i++) {
            li = element.child(i);
            text = componentsWrapper.getHtmlExtensions().getHtmlSpan(li);
            View view = componentsWrapper.getListItemExtensions().addListItem(layout, isOrdered, text);
            componentsWrapper.getListItemExtensions().applyStyles(view, li);
        }
    }
}
 
Example 12
Source File: Book.java    From nju-lib-downloader with GNU General Public License v3.0 5 votes vote down vote up
protected Node parseSpan(Element spanElement) {
    if (spanElement.children() != null) {
        Element trueNode = spanElement.child(0);
        return parseA(trueNode);
    }
    return new Node();
}
 
Example 13
Source File: Book.java    From nju-lib-downloader with GNU General Public License v3.0 5 votes vote down vote up
protected List<Node> parseUL(Element element) {
    List<Node> nodes = new LinkedList<>();
    for (int i = 0; i < element.children().size(); i++) {
        Element child = element.child(i);
        if (child.nodeName().equals("li")) {
            nodes.add(parseLi(child));
        }
    }
    return nodes;
}
 
Example 14
Source File: BookDownloader.java    From nju-lib-downloader with GNU General Public License v3.0 5 votes vote down vote up
public List<Node> parseTreeView(Element element) {
    List<Node> nodes = new LinkedList<>();
    for (int i = 0; i < element.children().size(); i++) {
        Element child = element.child(i);
        if (child.nodeName().equals("tree")) {
            nodes.add(parseTree(child));
        }
        if (child.nodeName().equals("node")) {
            nodes.add(parseNode(child));
        }
    }
    return nodes;
}
 
Example 15
Source File: TianLaiReadUtil.java    From MissZzzReader with Apache License 2.0 5 votes vote down vote up
/**
     * 从搜索html中得到书列表
     *
     * @param html
     * @return
     */
    public static ArrayList<Book> getBooksFromSearchHtml(String html) {
        ArrayList<Book> books = new ArrayList<>();
        Document doc = Jsoup.parse(html);
//        Element node = doc.getElementById("results");
//        for (Element div : node.children()) {
        Elements divs = doc.getElementsByClass("result-list");
        Element div = divs.get(0);
//        if (!StringHelper.isEmpty(div.className()) && div.className().equals("result-list")) {
        for (Element element : div.children()) {
            Book book = new Book();
            Element img = element.child(0).child(0).child(0);
            book.setImgUrl(img.attr("src"));
            Element title = element.getElementsByClass("result-item-title result-game-item-title").get(0);
            book.setName(title.child(0).attr("title"));
            book.setChapterUrl(title.child(0).attr("href"));
            Element desc = element.getElementsByClass("result-game-item-desc").get(0);
            book.setDesc(desc.text());
            Element info = element.getElementsByClass("result-game-item-info").get(0);
            for (Element element1 : info.children()) {
                String infoStr = element1.text();
                if (infoStr.contains("作者:")) {
                    book.setAuthor(infoStr.replace("作者:", "").replace(" ", ""));
                } else if (infoStr.contains("类型:")) {
                    book.setType(infoStr.replace("类型:", "").replace(" ", ""));
                } else if (infoStr.contains("更新时间:")) {
                    book.setUpdateDate(infoStr.replace("更新时间:", "").replace(" ", ""));
                } else {
                    Element newChapter = element1.child(1);
                    book.setNewestChapterUrl(newChapter.attr("href"));
                    book.setNewestChapterTitle(newChapter.text());
                }
            }
            book.setSource(BookSource.tianlai.toString());
            books.add(book);

        }

        return books;
    }
 
Example 16
Source File: CommentParser.java    From Ouroboros with GNU General Public License v3.0 4 votes vote down vote up
private CharSequence parseCodeText(Element codeElement){
    Element preElement = codeElement.child(0);
    SpannableString codeText = new SpannableString("\n" + preElement.text() + "\n");
    codeText.setSpan(new BackgroundColorSpan(Color.LTGRAY), 0, codeText.length(), Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
    return codeText;
}
 
Example 17
Source File: Evaluator.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
	final Element r = root instanceof Document?root.child(0):root;
	return element == r;
}
 
Example 18
Source File: Evaluator.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
	final Element r = root instanceof Document?root.child(0):root;
	return element == r;
}
 
Example 19
Source File: Evaluator.java    From jsoup-learning with MIT License 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
	final Element r = root instanceof Document?root.child(0):root;
	return element == r;
}
 
Example 20
Source File: Evaluator.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Override
public boolean matches(Element root, Element element) {
	final Element r = root instanceof Document?root.child(0):root;
	return element == r;
}