Java Code Examples for org.jsoup.nodes.Element#nextElementSibling()

The following examples show how to use org.jsoup.nodes.Element#nextElementSibling() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsoupInstanceListExtractor.java    From wandora with GNU General Public License v3.0 8 votes vote down vote up
private void parseTopic(Element classElement, Topic typeTopic) throws TopicMapException {
    System.out.println(classElement.text());
    
    Topic t = getOrCreateTopic(tm, null, classElement.text());
    
    if(typeTopic == null) typeTopic = wandoraClass;
    t.addType(typeTopic);
    
    // See if the next element is a list (of instances)
    Element listWrapper = classElement.nextElementSibling();
    if(listWrapper != null && !listWrapper.children().isEmpty()) {
        for(Element listCandidate: listWrapper.children()) {
            if(listCandidate.tagName().equals("ul")) {
                parseList(listCandidate, t);
            }
        }
    }
}
 
Example 2
Source File: JsoupSuperSubClassListExtractor.java    From wandora with GNU General Public License v3.0 8 votes vote down vote up
private void parseTopic(Element classElement, Topic classTopic) throws TopicMapException {
    String name = classElement.text().trim();
    
    if(name.length() == 0) return;
    
    Topic t = getOrCreateTopic(tm, null , name);
    
    if(classTopic == null) classTopic = wandoraClass;
    makeSubclassOf(tm, t, classTopic);
    
    // See if the next element is a list (of instances)
    Element listWrapper = classElement.nextElementSibling();
    if(listWrapper != null && !listWrapper.children().isEmpty()) {
        for(Element listCandidate: listWrapper.children()){
            if(listCandidate.tagName().equals("ul"))
                parseList(listCandidate, t);
        }
    }
}
 
Example 3
Source File: Elements.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private Elements siblings(String query, boolean next, boolean all) {
    Elements els = new Elements();
    Evaluator eval = query != null? QueryParser.parse(query) : null;
    for (Element e : this) {
        do {
            Element sib = next ? e.nextElementSibling() : e.previousElementSibling();
            if (sib == null) break;
            if (eval == null)
                els.add(sib);
            else if (sib.is(eval))
                els.add(sib);
            e = sib;
        } while (all);
    }
    return els;
}
 
Example 4
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 6 votes vote down vote up
/***
 * Look for <h3>after
 * <h2>containing the volume list. Treat each li in dl/ul/div as the chapters.
 * 
 * @param novel
 * @param h2
 * @return
 */
private static ArrayList<BookModel> parseBooksMethod1(NovelCollectionModel novel, Element h2, String language) {
	// Log.d(TAG, "method 1");
	ArrayList<BookModel> books = new ArrayList<BookModel>();
	Element bookElement = h2;
	boolean walkBook = true;
	int bookOrder = 0;
	do {
		bookElement = bookElement.nextElementSibling();
		if (bookElement == null || bookElement.tagName() == "h2")
			walkBook = false;
		else if (bookElement.tagName() != "h3") {
			Elements h3s = bookElement.select("h3");
			if (h3s != null && h3s.size() > 0) {
				for (Element h3 : h3s) {
					bookOrder = processH3(novel, books, h3, bookOrder, language);
				}
			}
		} else if (bookElement.tagName() == "h3") {
			bookOrder = processH3(novel, books, bookElement, bookOrder, language);
		}
	} while (walkBook);
	return books;
}
 
Example 5
Source File: Elements.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
private Elements siblings(String query, boolean next, boolean all) {
    Elements els = new Elements();
    Evaluator eval = query != null? QueryParser.parse(query) : null;
    for (Element e : this) {
        do {
            Element sib = next ? e.nextElementSibling() : e.previousElementSibling();
            if (sib == null) break;
            if (eval == null)
                els.add(sib);
            else if (sib.is(eval))
                els.add(sib);
            e = sib;
        } while (all);
    }
    return els;
}
 
Example 6
Source File: LoadMessagesTask.java    From SteamGifts with MIT License 6 votes vote down vote up
private List<IEndlessAdaptable> loadMessages(Document document) {
    List<IEndlessAdaptable> list = new ArrayList<>();
    Elements children = document.select(".comments__entity");
    for (Element element : children) {
        Element link = element.select(".comments__entity__name a").first();
        if (link != null) {
            MessageHeader message = new MessageHeader(link.text(), link.absUrl("href"));

            Element commentElement = element.nextElementSibling();
            if (commentElement != null)
                Utils.loadComments(commentElement, message, Comment.Type.COMMENT);

            // add the message & all associated comments.
            list.add(message);
            list.addAll(message.getComments());
        }
    }

    return list;
}
 
Example 7
Source File: CifnewsPageHandler.java    From cetty with Apache License 2.0 6 votes vote down vote up
@Override
public Element appendBody(Elements tempBody) {
    final Element articleBody = new Element(Tag.valueOf("div"), "");
    String blockquote = tempBody.select("div.fetch-read>div.summary").text();
    buildBlockquote(blockquote, articleBody);
    Elements inner = tempBody.select("div.article-inner>*");
    for (Element pEl : inner) {
        if (pEl.select("div.fetch-present").size() != 0) {
            continue;
        }
        Element imgEl = pEl.select("p>img").first();
        if (imgEl != null) {
            Element figure = buildFigure(imgEl);
            if (imgEl.nextElementSibling() != null && imgEl.nextElementSibling().tagName().equals("p")) {
                Element figcaption = buildFigcaption(imgEl.nextElementSibling().text());
                figure.appendChild(figcaption);
                articleBody.appendChild(figure);
                continue;
            }
            articleBody.appendChild(figure);
            continue;
        }
        articleBody.appendChild(pEl);
    }
    return articleBody;
}
 
Example 8
Source File: WikiCorpusTask.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
private static String extractFirstParagraphs(Document document) {
		Elements paragraphs = document.select("p");
		StringBuilder sb = new StringBuilder();
		for (Element p : paragraphs) {
			if (!p.parent().hasClass("mw-parser-output") || !p.children().isEmpty() && p.child(0).attr("style").equals("display:none")) {
				continue;
			}
			sb.append(p.text()).append("\n");

			if (p.nextElementSibling() != null && (p.nextElementSibling().className().startsWith("toc") ||
					!p.nextElementSibling().nodeName().equals("p") && !p.nextElementSibling().attr("style").equals("display:none"))) {
				break;
			}
//
//            if (p.className().startsWith("toc")) {
//                break;
//            }
		}
		return sb.toString();
	}
 
Example 9
Source File: RawWikiCorpusExtractor.java    From ambiverse-nlu with Apache License 2.0 6 votes vote down vote up
static String extractFirstParagraphs(Document document) {
		Elements paragraphs = document.select("p");
		StringBuilder sb = new StringBuilder();
		for (Element p : paragraphs) {
			if (!p.parent().hasClass("mw-parser-output") || !p.children().isEmpty() && p.child(0).attr("style").equals("display:none")) {
				continue;
			}
			sb.append(p.text()).append("\n");

			if ( p.nextElementSibling() != null && (p.nextElementSibling().className().startsWith("toc") ||
					!p.nextElementSibling().nodeName().equals("p") && !p.nextElementSibling().attr("style").equals("display:none"))) {
				break;
			}
//
//            if (p.className().startsWith("toc")) {
//                break;
//            }
		}
		return sb.toString();
	}
 
Example 10
Source File: AxisSelector.java    From CrawlerForReader with Apache License 2.0 5 votes vote down vote up
/**
 * 节点后面的全部同胞节点following-sibling
 *
 * @param e
 * @return
 */
public Elements followingSibling(Element e) {
    Elements rs = new Elements();
    Element tmp = e.nextElementSibling();
    while (tmp != null) {
        rs.add(tmp);
        tmp = tmp.nextElementSibling();
    }
    return rs;
}
 
Example 11
Source File: CommonUtil.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
public static Elements followingSibling(Element el){
    Elements rs = new Elements();
    Element tmp = el.nextElementSibling();
    while (tmp!=null){
        rs.add(tmp);
        tmp = tmp.nextElementSibling();
    }
    if (rs.size() > 0){
        return rs;
    }
    return null;
}
 
Example 12
Source File: FollowingSiblingOneSelector.java    From JsoupXpath with Apache License 2.0 5 votes vote down vote up
/**
 *
 * @param context
 * @return res
 */
@Override
public XValue apply(Elements context) {
    List<Element> total = new LinkedList<>();
    for (Element el : context){
        if (el.nextElementSibling()!=null){
            total.add(el.nextElementSibling());
        }
    }
    Elements newContext = new Elements();
    newContext.addAll(total);
    return XValue.create(newContext);
}
 
Example 13
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 5 votes vote down vote up
public static int processH3(NovelCollectionModel novel, ArrayList<BookModel> books, Element bookElement, int bookOrder, String language) {
	// Log.d(TAG, "Found: " +bookElement.text());
	BookModel book = new BookModel();
	book.setTitle(CommonParser.sanitize(bookElement.text(), true));
	book.setOrder(bookOrder);
	ArrayList<PageModel> chapterCollection = new ArrayList<PageModel>();
	String parent = novel.getPage() + Constants.NOVEL_BOOK_DIVIDER + book.getTitle();

	// parse the chapters.
	boolean walkChapter = true;
	int chapterOrder = 0;
	Element chapterElement = bookElement;
	do {
		chapterElement = chapterElement.nextElementSibling();
		if (chapterElement == null || chapterElement.tagName() == "h2" || chapterElement.tagName() == "h3") {
			walkChapter = false;
		} else {
			Elements chapters = chapterElement.select("li");
			for (Element chapter : chapters) {
				PageModel p = processLI(chapter, parent, chapterOrder, language);
				if (p != null) {
					chapterCollection.add(p);
					++chapterOrder;
				}
			}
		}
		book.setChapterCollection(chapterCollection);
	} while (walkChapter);
	books.add(book);
	++bookOrder;
	return bookOrder;
}
 
Example 14
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 5 votes vote down vote up
/***
 * Only have 1 book, chapter list is nested in ul/dl, e.g:Fate/Apocrypha, Gekkou
 * Parse the li as the chapters.
 * 
 * @param novel
 * @param h2
 * @return
 */
private static ArrayList<BookModel> parseBooksMethod3(NovelCollectionModel novel, Element h2, String language) {
	ArrayList<BookModel> books = new ArrayList<BookModel>();
	Element bookElement = h2;
	boolean walkBook = true;
	int bookOrder = 0;
	do {
		bookElement = bookElement.nextElementSibling();
		if (bookElement == null || bookElement.tagName() == "h2")
			walkBook = false;
		else if (bookElement.tagName() == "ul" || bookElement.tagName() == "dl") {
			// Log.d(TAG, "Found: " +bookElement.text());
			BookModel book = new BookModel();
			book.setTitle(CommonParser.sanitize(h2.text(), true));
			book.setOrder(bookOrder);
			ArrayList<PageModel> chapterCollection = new ArrayList<PageModel>();
			String parent = novel.getPage() + Constants.NOVEL_BOOK_DIVIDER + book.getTitle();

			// parse the chapters.
			int chapterOrder = 0;
			Elements chapters = bookElement.select("li");
			for (Element chapter : chapters) {
				PageModel p = processLI(chapter, parent, chapterOrder, language);
				if (p != null) {
					chapterCollection.add(p);
					++chapterOrder;
				}
			}
			book.setChapterCollection(chapterCollection);
			books.add(book);
			++bookOrder;
		}
	} while (walkBook);
	return books;
}
 
Example 15
Source File: AxisSelector.java    From CrawlerForReader with Apache License 2.0 5 votes vote down vote up
/**
 * 返回下一个同胞节点(扩展) 语法 following-sibling-one
 *
 * @param e
 * @return
 */
public Elements followingSiblingOne(Element e) {
    Elements rs = new Elements();
    if (e.nextElementSibling() != null) {
        rs.add(e.nextElementSibling());
    }
    return rs;
}
 
Example 16
Source File: ESchoolParser.java    From substitution-schedule-parser with Mozilla Public License 2.0 4 votes vote down vote up
private void parseTable(Element table, SubstitutionScheduleDay day) {
    for (Element th : table.select("th[colspan=10]")) {
        String lesson;

        Pattern pattern = Pattern.compile("(\\d+)\\. Stunde");
        Matcher matcher = pattern.matcher(th.text());
        if (matcher.find()) {
            lesson = matcher.group(1);
        } else {
            lesson = th.text();
        }

        // skip over table headers
        Element row = th.parent().nextElementSibling().nextElementSibling();
        while (row != null && row.select("th").size() == 0) {
            Substitution subst = new Substitution();
            subst.setLesson(lesson);

            Elements columns = row.select("td");

            String[] classes = columns.get(0).text().split(", |\\+");
            subst.setClasses(new HashSet<>(Arrays.asList(classes)));

            subst.setPreviousTeacher(getPreviousValue(columns.get(1)));
            subst.setTeacher(getNewValue(columns.get(1)));
            subst.setPreviousSubject(getPreviousValue(columns.get(2)));
            subst.setSubject(getNewValue(columns.get(2)));
            subst.setPreviousRoom(getPreviousValue(columns.get(3)));
            subst.setRoom(getNewValue(columns.get(3)));
            if (columns.get(4).text().isEmpty()) {
                subst.setType("Vertretung");
                subst.setColor(colorProvider.getColor("Vertretung"));
            } else {
                String desc = columns.get(4).text();
                subst.setDesc(desc);
                String recognizedType = recognizeType(desc);
                if (recognizedType == null) recognizedType = "Vertretung";
                subst.setType(recognizedType);
                subst.setColor(colorProvider.getColor(recognizedType));
            }

            day.addSubstitution(subst);

            row = row.nextElementSibling();
        }
    }
}
 
Example 17
Source File: JiSho.java    From ankihelper with GNU General Public License v3.0 4 votes vote down vote up
public List<Definition> wordLookup(String key) {
    try {
        Document doc = Jsoup.connect(wordUrl + key)
                .userAgent("Mozilla")
                .timeout(5000)
                .get();
        Elements entrys = doc.select("div.concept_light");
        ArrayList<Definition> defList = new ArrayList<>();
        if (entrys.size() > 0) {
            for (Element ele : entrys){
                String furigana = "";
                String writing = "";
                String mp3_url = "";
                //String meaning_tag = "";
                //String definition = "";
                Elements furigana_soup = ele.select("span.furigana");
                if(furigana_soup.size() > 0){
                    furigana = furigana_soup.get(0).text().trim();
                }

                Elements writing_soup = ele.select("span.text");
                if(writing_soup.size() > 0){
                    writing = writing_soup.get(0).text().trim();
                }

                Elements audio_soup = ele.select("audio > source");
                if(audio_soup.size() > 0){
                    mp3_url = "[sound:" +audio_soup.get(0).attr("src") + "]";
                }

                Elements meaning_tags_soup = ele.select("div.meaning-tags");
                for(Element tag : meaning_tags_soup){
                    String meaning_tag = tag.text().trim();
                    Element word_def_soup = tag.nextElementSibling();
                    if(word_def_soup != null){
                        for(Element defSoup : word_def_soup.select("div.meaning-definition > span.meaning-meaning")){
                            HashMap<String, String> defMap = new HashMap<>();
                            String definition = "<i><font color='grey'>" + meaning_tag + "</font></i> " + defSoup.text().trim();
                            defMap.put(EXP_ELE[0], writing);
                            defMap.put(EXP_ELE[1], furigana);
                            defMap.put(EXP_ELE[2], mp3_url);
                            //defMap.put(EXP_ELE[3], meaning_tag);
                            defMap.put(EXP_ELE[3], definition);
                            String audioIndicator = "";
                            if(!mp3_url.isEmpty()){
                                audioIndicator = "<font color='#227D51' >"+AUDIO_TAG + "</font>";
                            }
                            String export_html = "<b>" + writing + "</b> <font color='grey'>" + furigana + "</font> " + audioIndicator + "<br/>" + definition;
                            defList.add(new Definition(defMap, export_html));
                        }
                    }
                }
            }
        }

        return defList;
    } catch (IOException ioe) {
        //Log.d("time out", Log.getStackTraceString(ioe));
        Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show();
        return new ArrayList<Definition>();
    }

}
 
Example 18
Source File: JsoupDefinitionListExtractor.java    From wandora with GNU General Public License v3.0 4 votes vote down vote up
private void parseName(Element name, Topic listTopic) throws TopicMapException {
    
    Topic nameTopic = getOrCreateTopic(tm, null, name.text());
    nameTopic.addType(definitionType);
    
    declareChild(listTopic, nameTopic);
    
    Element defCandidate = name.nextElementSibling();
    
    while(defCandidate != null && defCandidate.tagName().equals("dd")){
        nameTopic.setData(definitionType, langTopic, defCandidate.text());
        
        defCandidate = defCandidate.nextElementSibling();
    }
    
}