Java Code Examples for org.jsoup.nodes.Document#getAllElements()

The following examples show how to use org.jsoup.nodes.Document#getAllElements() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: a   File: PageLoaderEpub.java    License: GNU General Public License v3.0 6 votes vote down vote up
@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
    Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
    StringBuilder content = new StringBuilder();
    Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
    Elements elements = doc.getAllElements();
    for (Element element : elements) {
        List<TextNode> contentEs = element.textNodes();
        for (int i = 0; i < contentEs.size(); i++) {
            String text = contentEs.get(i).text().trim();
            text = StringUtils.formatHtml(text);
            if (elements.size() > 1) {
                if (text.length() > 0) {
                    if (content.length() > 0) {
                        content.append("\r\n");
                    }
                    content.append("\u3000\u3000").append(text);
                }
            } else {
                content.append(text);
            }
        }
    }
    return content.toString();
}
 
Example 2
@Override
protected String getChapterContent(BookChapterBean chapter) throws Exception {
    Resource resource = epubBook.getResources().getByHref(chapter.getDurChapterUrl());
    StringBuilder content = new StringBuilder();
    Document doc = Jsoup.parse(new String(resource.getData(), mCharset));
    Elements elements = doc.getAllElements();
    for (Element element : elements) {
        List<TextNode> contentEs = element.textNodes();
        for (int i = 0; i < contentEs.size(); i++) {
            String text = contentEs.get(i).text().trim();
            text = StringUtils.formatHtml(text);
            if (elements.size() > 1) {
                if (text.length() > 0) {
                    if (content.length() > 0) {
                        content.append("\r\n");
                    }
                    content.append("\u3000\u3000").append(text);
                }
            } else {
                content.append(text);
            }
        }
    }
    return content.toString();
}
 
Example 3
Source Project: UltimateAndroid   File: UtilsDemo.java    License: Apache License 2.0 5 votes vote down vote up
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>"
            + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();

}
 
Example 4
Source Project: UltimateAndroid   File: UtilsDemo.java    License: Apache License 2.0 5 votes vote down vote up
public static String TestJsoup() {
    String html1 = "<html><head><title>First parse</title></head>"
            + "<body><p>Parsed HTML into a doc.</p></body></html>";
    StringBuffer sb = new StringBuffer();
    Document doc = Jsoup.parse(html1);
    Logs.d("docs---" + doc.title() + "   " + doc.getAllElements().size());
    Logs.d("docs---" + doc.children().size() + "   " + doc.location());
    for (Element element : doc.getAllElements()) {
        sb.append(element.tagName() + "   " + element.nodeName() + "   " + element.children().size() + "   " + element.data() + "   " + element.text() + "\n");
        Logs.d(element.text() + "   ");
    }
    return sb.toString();

}
 
Example 5
Source Project: ankus_crawler   File: ParseHTML.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * get structure value with scriptDef
 * @param br
 * @param list
 * @return
 */
public ArrayList<String> convertBrToDoc(BufferedReader br, ArrayList<ElementDEF> list){
	String line;
	StringBuffer buf = new StringBuffer();
	ArrayList<String> retList = new ArrayList<String>();
	
	try {
		while((line = br.readLine())!=null){
			buf.append(line);
			buf.append("\r\n");
		}
		
		br.close();
	} catch (IOException e) {
		// TODO Auto-generated catch block
		e.printStackTrace();
	}
	
	Document doc = Jsoup.parse(buf.toString());
	Elements eList = doc.getAllElements();
	
	int size = list.size();
	
	for(int i = 0 ; i < size ; i++){
		ElementDEF eDef = list.get(i);
		Elements eleList = null;
		if(eDef.ElementType.equals("CLASS")){
			eleList = doc.getElementsByClass(eDef.ElementValue);
		} else if(eDef.ElementType.equals("TAG")){
			eleList = doc.getElementsByTag(eDef.ElementValue);
		}
		
		if(eleList != null){
			if(eDef.ElementValueType.equals("html")){
				retList.add(eDef.ElementName+"\t"+eleList.toString());
			} else if (eDef.ElementValueType.equals("text")){
				retList.add(eDef.ElementName+"\t"+eleList.text());
			}
			
		}
	}
	return retList;
}