Java Code Examples for org.jsoup.nodes.Document#getAllElements()

The following examples show how to use org.jsoup.nodes.Document#getAllElements() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: PageLoaderEpub.java    From a with GNU General Public License v3.0 6 votes vote down vote up
@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
    Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
    StringBuilder content = new StringBuilder();
    Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
    Elements elements = doc.getAllElements();
    for (Element element : elements) {
        List<TextNode> contentEs = element.textNodes();
        for (int i = 0; i < contentEs.size(); i++) {
            String text = contentEs.get(i).text().trim();
            text = StringUtils.formatHtml(text);
            if (elements.size() > 1) {
                if (text.length() > 0) {
                    if (content.length() > 0) {
                        content.append("\r\n");
                    }
                    content.append("\u3000\u3000").append(text);
                }
            } else {
                content.append(text);
            }
        }
    }
    return content.toString();
}
 
Example 2
Source File: PageLoaderEpub.java    From MyBookshelf with GNU General Public License v3.0 6 votes vote down vote up
@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
    Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
    StringBuilder content = new StringBuilder();
    Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
    Elements elements = doc.getAllElements();
    for (Element element : elements) {
        List<TextNode> contentEs = element.textNodes();
        for (int i = 0; i < contentEs.size(); i++) {
            String text = contentEs.get(i).text().trim();
            text = StringUtils.formatHtml(text);
            if (elements.size() > 1) {
                if (text.length() > 0) {
                    if (content.length() > 0) {
                        content.append("\r\n");
                    }
                    content.append("\u3000\u3000").append(text);
                }
            } else {
                content.append(text);
            }
        }
    }
    return content.toString();
}
 
Example 3
Source File: UtilsDemo.java    From UltimateAndroid with Apache License 2.0 5 votes vote down vote up
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>"
            + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();

}
 
Example 4
Source File: UtilsDemo.java    From UltimateAndroid with Apache License 2.0 5 votes vote down vote up
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>"
            + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();

}
 
Example 5
Source File: ParseHTML.java    From ankus_crawler with Apache License 2.0 4 votes vote down vote up
/**
 * get structure value with scriptDef
 * @param br
 * @param list
 * @return
 */
public ArrayList<String> convertBrToDoc(BufferedReader br, ArrayList<ElementDEF> list){
	String line;
	StringBuffer buf = new StringBuffer();
	ArrayList<String> retList = new ArrayList<String>();
	
	try {
		while((line = br.readLine())!=null){
			buf.append(line);
			buf.append("\r\n");
		}
		
		br.close();
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	Document doc = Jsoup.parse(buf.toString());
	Elements eList = doc.getAllElements();
	
	int size = list.size();
	
	for(int i = 0 ; i < size ; i++){
		ElementDEF eDef = list.get(i);
		Elements eleList = null;
		if(eDef.ElementType.equals("CLASS")){
			eleList = doc.getElementsByClass(eDef.ElementValue);
		} else if(eDef.ElementType.equals("TAG")){
			eleList = doc.getElementsByTag(eDef.ElementValue);
		}
		
		if(eleList != null){
			if(eDef.ElementValueType.equals("html")){
				retList.add(eDef.ElementName+"\t"+eleList.toString());
			} else if (eDef.ElementValueType.equals("text")){
				retList.add(eDef.ElementName+"\t"+eleList.text());
			}
			
		}
	}
	return retList;
}