Java Code Examples for org.jsoup.nodes.Document#getAllElements()

The following examples show how to use org.jsoup.nodes.Document#getAllElements() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.

Example 1

Source File: PageLoaderEpub.java From a with GNU General Public License v3.0

6 votes

@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
    Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
    StringBuilder content = new StringBuilder();
    Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
    Elements elements = doc.getAllElements();
    for (Element element : elements) {
        List<TextNode> contentEs = element.textNodes();
        for (int i = 0; i < contentEs.size(); i++) {
            String text = contentEs.get(i).text().trim();
            text = StringUtils.formatHtml(text);
            if (elements.size() > 1) {
                if (text.length() > 0) {
                    if (content.length() > 0) {
                        content.append("\r\n");
                    }
                    content.append("\u3000\u3000").append(text);
                }
            } else {
                content.append(text);
            }
        }
    }
    return content.toString();
}

Example 2

Source File: PageLoaderEpub.java From MyBookshelf with GNU General Public License v3.0

6 votes

@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
    Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
    StringBuilder content = new StringBuilder();
    Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
    Elements elements = doc.getAllElements();
    for (Element element : elements) {
        List<TextNode> contentEs = element.textNodes();
        for (int i = 0; i < contentEs.size(); i++) {
            String text = contentEs.get(i).text().trim();
            text = StringUtils.formatHtml(text);
            if (elements.size() > 1) {
                if (text.length() > 0) {
                    if (content.length() > 0) {
                        content.append("\r\n");
                    }
                    content.append("\u3000\u3000").append(text);
                }
            } else {
                content.append(text);
            }
        }
    }
    return content.toString();
}

Example 3

Source File: UtilsDemo.java From UltimateAndroid with Apache License 2.0

5 votes

public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>"
            + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();

}

Example 4

Source File: UtilsDemo.java From UltimateAndroid with Apache License 2.0

5 votes

public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>"
            + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();

}

Example 5

Source File: ParseHTML.java From ankus_crawler with Apache License 2.0

4 votes

/**
 * get structure value with scriptDef
 * @param br
 * @param list
 * @return
 */
public ArrayList<String> convertBrToDoc(BufferedReader br, ArrayList<ElementDEF> list){
	String line;
	StringBuffer buf = new StringBuffer();
	ArrayList<String> retList = new ArrayList<String>();
	
	try {
		while((line = br.readLine())!=null){
			buf.append(line);
			buf.append("\r\n");
		}
		
		br.close();
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	Document doc = Jsoup.parse(buf.toString());
	Elements eList = doc.getAllElements();
	
	int size = list.size();
	
	for(int i = 0 ; i < size ; i++){
		ElementDEF eDef = list.get(i);
		Elements eleList = null;
		if(eDef.ElementType.equals("CLASS")){
			eleList = doc.getElementsByClass(eDef.ElementValue);
		} else if(eDef.ElementType.equals("TAG")){
			eleList = doc.getElementsByTag(eDef.ElementValue);
		}
		
		if(eleList != null){
			if(eDef.ElementValueType.equals("html")){
				retList.add(eDef.ElementName+"\t"+eleList.toString());
			} else if (eDef.ElementValueType.equals("text")){
				retList.add(eDef.ElementName+"\t"+eleList.text());
			}
			
		}
	}
	return retList;
}