Java Code Examples for org.jsoup.select.Elements#first()

The following examples show how to use org.jsoup.select.Elements#first() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: JsoupAssociationRowTableExtractor.java    From wandora with GNU General Public License v3.0 6 votes vote down vote up
private void parseTable(Element table) throws Exception{
    
    Elements rows = table.select("tr");
    
    Element headerRow = rows.first();
    
    ArrayList<Topic> roles = new ArrayList<Topic>();
    
    for(Element headerCell: headerRow.select("th")){
        String roleValue = headerCell.text().trim();
        if(roleValue.length() == 0) continue;
        
        Topic role = getOrCreateTopic(tm, null, roleValue);
        roles.add(role);
    }
    
    List<Element> playerRows = rows.subList(1,rows.size());
    
    for(Element playerRow: playerRows){
        try {
            handlePlayerRow(playerRow, roles);
        } catch (Exception e) {
            log(e.getMessage());
        }
    }
}
 
Example 2
Source File: BlacklistHelper.java    From hipda with GNU General Public License v2.0 6 votes vote down vote up
public static String addBlacklist2(String formhash, String username) throws Exception {
    ParamsMap params = new ParamsMap();
    params.put("formhash", formhash);
    params.put("user", username);
    String response = OkHttpHelper.getInstance().post(HiUtils.AddBlackUrl, params);
    Document doc = Jsoup.parse(response);
    Elements errors = doc.select("div.alert_error");
    if (errors.size() > 0) {
        Element el = errors.first();
        el.select("a").remove();
        return el.text();
    } else {
        HiSettingsHelper.getInstance().addToBlacklist(username);
    }
    return "";
}
 
Example 3
Source File: ElementTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void testRemoveBeforeIndex() {
	Document doc = Jsoup.parse(
            "<html><body><div><p>before1</p><p>before2</p><p>XXX</p><p>after1</p><p>after2</p></div></body></html>",
            "");
    Element body = doc.select("body").first();
    Elements elems = body.select("p:matchesOwn(XXX)");
    Element xElem = elems.first();
    Elements beforeX = xElem.parent().getElementsByIndexLessThan(xElem.elementSiblingIndex());

    for(Element p : beforeX) {
        p.remove();
    }

    assertEquals("<body><div><p>XXX</p><p>after1</p><p>after2</p></div></body>", TextUtil.stripNewlines(body.outerHtml()));
}
 
Example 4
Source File: NexusParser.java    From Hentoid with Apache License 2.0 6 votes vote down vote up
@Override
protected List<String> parseImages(@NonNull Content content) throws IOException {
    List<String> result = new ArrayList<>();

    progressStart(content.getQtyPages());
    /*
     * Open all pages and grab the URL of the displayed image
     */
    for (int i = 0; i < content.getQtyPages(); i++) {
        String readerUrl = content.getReaderUrl().replace("001", Helper.formatIntAsStr(i + 1, 3));
        Document doc = getOnlineDocument(readerUrl);
        if (doc != null) {
            Elements elements = doc.select("section a img");
            if (elements != null && !elements.isEmpty()) {
                Element e = elements.first();
                result.add(e.attr("src"));
            }
        }
        progressPlus();
    }

    progressComplete();

    return result;
}
 
Example 5
Source File: JokeBean.java    From Study_Android_Demo with Apache License 2.0 6 votes vote down vote up
public JokeBean(Element element) {
    //内容
    //得到内容,返回的是元素集合,然后再取第一个数据
    Element tmpContent = element.getElementsByClass("content").first();
    //取出文本
    this.content = tmpContent.text();

    //图片
    //图片地址,有两种可能,有或没有
    Elements tmpThumb = element.getElementsByClass("thumb");
    //如果imgs为null,或者内容长度为0说明没有图片,否则有图片,取第一个即可
    if(tmpThumb !=null && tmpThumb.size()>0){
        //有图片,解析出图片地址,取出第一个元素
        Element tmpImg = tmpThumb.first();
        //得到img标签的选择器,src的属性值即为图片地址
        this.img = tmpImg.select("img").attr("src");

    }
    //链接地址
    //得到class='contentHerf',取出第一个元素,得到a的选择器,取出href属性
    Element tmpHerf = element.getElementsByClass("contentHerf").first();
    this.contentHerf = tmpHerf.select("a").attr("href");

}
 
Example 6
Source File: HentaifoundryRipper.java    From ripme with MIT License 6 votes vote down vote up
@Override
public Document getNextPage(Document doc) throws IOException {
    if (!doc.select("li.next.hidden").isEmpty()) {
        // Last page
        throw new IOException("No more pages");
    }
    Elements els = doc.select("li.next > a");
    Element first = els.first();
    try {
        String nextURL = first.attr("href");
        nextURL = "https://www.hentai-foundry.com" + nextURL;
        return Http.url(nextURL)
                .referrer(url)
                .cookies(cookies)
                .get();
    } catch (NullPointerException e) {
        throw new IOException("No more pages");
    }
}
 
Example 7
Source File: BakaTsukiParserAlternative.java    From coolreader with MIT License 6 votes vote down vote up
/***
 * Process li to chapter.
 * 
 * @param li
 * @param parent
 * @param chapterOrder
 * @return
 */
private static PageModel processLI(Element li, String parent, int chapterOrder, String language) {
	PageModel p = null;
	Elements links = li.select("a");
	if (links != null && links.size() > 0) {
		// TODO: need to handle multiple link in one list item
		Element link = links.first();

		// skip if User_talk:
		if (link.attr("href").contains("User_talk:"))
			return null;

		p = processA(li.text(), parent, chapterOrder, link, language);
	}
	return p;
}
 
Example 8
Source File: Mf2Parser.java    From indigenous-android with GNU General Public License v3.0 6 votes vote down vote up
private String parseImpliedUrlRelative(Element elem) {
    //     if a.h-x[href] or area.h-x[href] then use that [href] for url
    if (("a".equals(elem.tagName()) || "area".equals(elem.tagName()))
            && elem.hasAttr("href")) {
        return elem.attr("href");
    }
    //else if .h-x>a[href]:only-of-type:not[.h-*] then use that [href] for url
    //else if .h-x>area[href]:only-of-type:not[.h-*] then use that [href] for url
    for (String childTag : Arrays.asList("a", "area")) {
        Elements children = filterByTag(elem.children(), childTag);
        if(children.size() == 1) {
            Element child = children.first();
            if (!hasRootClass(child) && child.hasAttr("href")) {
                return child.attr("href");
            }
        }
    }

    return null;
}
 
Example 9
Source File: SelectorFetcher.java    From stevia with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
/**
 * verify an element locator as unique
 * @param e
 * @param locator
 * @return
 * @throws Exception 
 */
private static String verifyLocator( Element e, String locator) throws Exception {
	Element rootElement = e.parents().last();
	if(!locator.startsWith("//")) {
		Elements selected = rootElement.select(locator);
		if (selected.size() == 1) {
			if (!uniqueLocators.containsKey(e)) {
				uniqueLocators.put(e, locator);
			}
			return locator + " UNIQUE = "+selected.first();
		} else if (selected.size() > 1) {
			return locator + " NON-UNIQUE = "+selected;
		} else {
			return locator +" NOT FOUND - PROBLEM";
		}
	} else if(locator.startsWith("//")) { //xpath 
	    XElements elements = Xsoup.select(rootElement, locator);
	    if (elements.getElements().size() > 1) {
	    	return locator + " NON-UNIQUE!!! ";
	    } else if (elements.getElements().size() == 0) {
	    	return locator +" NOT FOUND - PROBLEM";
	    }
	    if (!uniqueLocators.containsKey(e)) {
			uniqueLocators.put(e, locator);
		}
	    return locator + " UNIQUE = "+ elements.getElements().get(0);
	    
	}
	
	return locator + " XPATH?";
}
 
Example 10
Source File: EHentaiParser.java    From Hentoid with Apache License 2.0 5 votes vote down vote up
private String getDisplayedImageUrl(@Nonnull Document doc) {
    Elements elements = doc.select("img#img");
    if (!elements.isEmpty()) {
        Element e = elements.first();
        return e.attr("src");
    }
    return "";
}
 
Example 11
Source File: AbstractSpiderServer.java    From Doctor with Apache License 2.0 5 votes vote down vote up
/**
     * 症状并发症等含有通用词的
     *
     * @param href
     * @return
     */
    protected Map<String, Object> getBrief(String href, String word) throws Exception {
        String url = (word == null) ? (index + href) : (index + word + href.substring(href.lastIndexOf("/")));
        //症状详情页
        Document document = SpiderUtil.getDocument(url);
        Elements select = document.select("div.spider");
        if (select.size() == 0) {
            if (document.select("div.jb-body").size()!=0){
                select = document.select("div.jb-body");
            }else{
                logger.error("异常:详情页无详情 "+url);
            }
        }
        Element first = select.first();
        //爬取所有描述
        Map<String, Object> map = new HashMap<>();
        map.put(ALL, first.text());
        //判断是否有词
        Elements elements = first.getElementsByTag("a");
        if (elements.size()== 0) {
//            logger.warn("正常无spider<a> "+url);
            return map;
        }
        //遍历词
        List<String> symptomList = new ArrayList<>();
        for (Element element1 : elements) {
            symptomList.add(element1.text());
            //新的词链接
            String href1 = element1.attr("href");
            //保存新词到本地txt文件
            TexUtil.write(element1.text()+"\r\n"+href1+"\r\n",ProjectPath.getRootPath("/word_link.txt"));
        }
        map.put(WORD, symptomList);
        return map;
    }
 
Example 12
Source File: JsoupProcessor.java    From AcgClub with MIT License 5 votes vote down vote up
/**
 * Extract first element according to a query
 */
private static Element element(Element container, String query) {

  Elements select = container.select(query);

  if (select.size() == 0) {
    return null;
  }

  return select.first();
}
 
Example 13
Source File: MyJsoup.java    From frameworkAggregate with Apache License 2.0 5 votes vote down vote up
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
Example 14
Source File: TestJsoup.java    From frameworkAggregate with Apache License 2.0 5 votes vote down vote up
private static List<FlowerCategory> getCategoryList() {

		List<FlowerCategory> categories = new ArrayList<FlowerCategory>();

		try {
			Document doc = Jsoup.connect("http://www.aihuhua.com/baike/").get();
			Elements catelist = doc.getElementsByClass("catelist");
			Element cates = catelist.first();
			List<Node> childNodes = cates.childNodes();
			for (int i = 0; i < childNodes.size(); i++) {
				Node node = childNodes.get(i);
				List<Node> childs = node.childNodes();
				if (childs != null && childs.size() > 0) {
					FlowerCategory category = new FlowerCategory();
					for (int j = 0; j < childs.size(); j++) {
						Node child = childs.get(j);
						if ("a".equals(child.nodeName())) {
							category.setUrl(child.attr("href"));
							category.setImgPath(child.childNode(1).attr("src"));
						} else if ("h2".equals(child.nodeName())) {
							category.setName(child.attr("title"));
						}
					}
					categories.add(category);
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		}

		return categories;
	}
 
Example 15
Source File: BaseTask.java    From guanggoo-android with Apache License 2.0 5 votes vote down vote up
protected boolean checkAuth(Document doc) {
    Elements elements = doc.select("div.usercard");
    if (!elements.isEmpty()) {
        Element usercardElement = elements.first();

        AuthInfoManager.getInstance().setUsername(usercardElement.select("div.username").first().text());
        AuthInfoManager.getInstance().setAvatar(usercardElement.select("img.avatar").first().attr("src"));
        return true;
    }
    return false;
}
 
Example 16
Source File: JsoupParserIntegrationTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void examplesTraversing() {
    Elements sections = doc.select("section");

    Element firstSection = sections.first();
    Element lastSection = sections.last();
    Element secondSection = sections.get(2);
    Elements allParents = firstSection.parents();
    Element parent = firstSection.parent();
    Elements children = firstSection.children();
    Elements siblings = firstSection.siblingElements();

    sections.forEach(el -> System.out.println("section: " + el));
}
 
Example 17
Source File: ModifySamlResponseStepBuilder.java    From keycloak with Apache License 2.0 4 votes vote down vote up
private HttpUriRequest handlePostBinding(CloseableHttpResponse currentResponse) throws Exception {
    assertThat(currentResponse, statusCodeIsHC(Status.OK));

    final String htmlBody = EntityUtils.toString(currentResponse.getEntity());
    assertThat(htmlBody, Matchers.containsString("SAML"));
    org.jsoup.nodes.Document theResponsePage = Jsoup.parse(htmlBody);
    Elements samlResponses = theResponsePage.select("input[name=SAMLResponse]");
    Elements samlRequests = theResponsePage.select("input[name=SAMLRequest]");
    Elements forms = theResponsePage.select("form");
    Elements relayStates = theResponsePage.select("input[name=RelayState]");
    int size = samlResponses.size() + samlRequests.size();
    assertThat("Checking uniqueness of SAMLResponse/SAMLRequest input field in the page", size, is(1));
    assertThat("Checking uniqueness of forms in the page", forms, hasSize(1));

    Element respElement = samlResponses.isEmpty() ? samlRequests.first() : samlResponses.first();
    Element form = forms.first();

    String base64EncodedSamlDoc = respElement.val();
    InputStream decoded = PostBindingUtil.base64DecodeAsStream(base64EncodedSamlDoc);
    String samlDoc = IOUtils.toString(decoded, GeneralConstants.SAML_CHARSET);
    IOUtils.closeQuietly(decoded);

    String transformed = getTransformer().transform(samlDoc);
    if (transformed == null) {
        return null;
    }

    final String attributeName = this.targetAttribute != null
      ? this.targetAttribute
      : respElement.attr("name");
    List<NameValuePair> parameters = new LinkedList<>();

    if (! relayStates.isEmpty()) {
        parameters.add(new BasicNameValuePair(GeneralConstants.RELAY_STATE, relayStates.first().val()));
    }
    URI locationUri = this.targetUri != null
      ? this.targetUri
      : URI.create(form.attr("action"));

    return createRequest(locationUri, attributeName, transformed, parameters);
}
 
Example 18
Source File: EHentaiParser.java    From Hentoid with Apache License 2.0 4 votes vote down vote up
public List<ImageFile> parseImageList(@NonNull Content content) throws Exception {
    EventBus.getDefault().register(this);

    try {
        List<ImageFile> result = new ArrayList<>();
        boolean useHentoidAgent = Site.EHENTAI.canKnowHentoidAgent();
        Map<String, String> downloadParams = new HashMap<>();
        int order = 1;

        /*
         * 1- Detect the number of pages of the gallery
         *
         * 2- Browse the gallery and fetch the URL for every page (since all of them have a different temporary key...)
         *
         * 3- Open all pages and grab the URL of the displayed image
         */

        // 1- Detect the number of pages of the gallery
        Element e;
        List<Pair<String, String>> headers = new ArrayList<>();
        headers.add(new Pair<>(HttpHelper.HEADER_COOKIE_KEY, "nw=1")); // nw=1 (always) avoids the Offensive Content popup (equivalent to clicking the "Never warn me again" link)
        Document doc = getOnlineDocument(content.getGalleryUrl(), headers, useHentoidAgent);
        if (doc != null) {
            Elements elements = doc.select("table.ptt a");
            if (null == elements || elements.isEmpty()) return result;

            int tabId = (1 == elements.size()) ? 0 : elements.size() - 2;
            int nbGalleryPages = Integer.parseInt(elements.get(tabId).text());

            progress.start(nbGalleryPages + content.getQtyPages());

            // 2- Browse the gallery and fetch the URL for every page (since all of them have a different temporary key...)
            List<String> pageUrls = new ArrayList<>();

            fetchPageUrls(doc, pageUrls);

            if (nbGalleryPages > 1) {
                for (int i = 1; i < nbGalleryPages && !processHalted; i++) {
                    doc = getOnlineDocument(content.getGalleryUrl() + "/?p=" + i, headers, useHentoidAgent);
                    if (doc != null) fetchPageUrls(doc, pageUrls);
                    progress.advance();
                }
            }

            // 3- Open all pages and
            //    - grab the URL of the displayed image
            //    - grab the alternate URL of the "Click here if the image fails loading" link
            result.add(ImageFile.newCover(content.getCoverImageUrl(), StatusContent.SAVED));
            ImageFile img;
            for (String pageUrl : pageUrls) {
                if (processHalted) break;
                doc = getOnlineDocument(pageUrl, headers, useHentoidAgent);
                if (doc != null) {
                    // Displayed image
                    String imageUrl = getDisplayedImageUrl(doc).toLowerCase();
                    if (!imageUrl.isEmpty()) {
                        // If we have the 509.gif picture, it means the bandwidth limit for e-h has been reached
                        if (imageUrl.contains("/509.gif"))
                            throw new LimitReachedException("E-hentai download points regenerate over time or can be bought on e-hentai if you're in a hurry");
                        img = ParseHelper.urlToImageFile(imageUrl, order++, pageUrls.size(), StatusContent.SAVED);
                        result.add(img);

                        // "Click here if the image fails loading" link
                        elements = doc.select("#loadfail");
                        if (!elements.isEmpty()) {
                            e = elements.first();
                            String arg = e.attr("onclick");
                            // Get the argument between 's
                            int quoteBegin = arg.indexOf('\'');
                            int quoteEnd = arg.indexOf('\'', quoteBegin + 1);
                            arg = arg.substring(quoteBegin + 1, quoteEnd);
                            // Get the query URL
                            if (pageUrl.contains("?")) pageUrl += "&";
                            else pageUrl += "?";
                            pageUrl += "nl=" + arg;
                            // Get the final URL
                            if (URLUtil.isValidUrl(pageUrl)) {
                                downloadParams.put("backupUrl", pageUrl);
                                String downloadParamsStr = JsonHelper.serializeToJson(downloadParams, JsonHelper.MAP_STRINGS);
                                img.setDownloadParams(downloadParamsStr);
                            }
                        }
                    }
                }
                progress.advance();
            }
        }
        progress.complete();

        // If the process has been halted manually, the result is incomplete and should not be returned as is
        if (processHalted) throw new PreparationInterruptedException();
        return result;
    } finally {
        EventBus.getDefault().unregister(this);
    }
}
 
Example 19
Source File: Mf2Parser.java    From indigenous-android with GNU General Public License v3.0 4 votes vote down vote up
private String parseImpliedName(Element elem) {
    if (("img".equals(elem.tagName()) || ("area".equals(elem.tagName())) && elem.hasAttr("alt"))) {
        return elem.attr("alt");
    }
    if ("abbr".equals(elem.tagName()) && elem.hasAttr("title")) {
        return elem.attr("title");
    }

    Elements children = elem.children();
    if (children.size() == 1) {
        Element child = children.first();
        // else if .h-x>img:only-child[alt]:not[.h-*] then use that img alt for name
        // else if .h-x>area:only-child[alt]:not[.h-*] then use that area alt for name
        if (!hasRootClass(child)
                && ("img".equals(child.tagName()) || "area".equals(child.tagName()))
                && child.hasAttr("alt")) {
            return child.attr("alt");
        }
        // else if .h-x>abbr:only-child[title] then use that abbr title for name
        if ("abbr".equals(child.tagName()) && child.hasAttr("title")) {
            return child.attr("title");
        }

        Elements grandChildren = child.children();
        if (grandChildren.size() == 1) {
            Element grandChild = grandChildren.first();
            // else if .h-x>:only-child>img:only-child[alt]:not[.h-*] then use that img alt for name
            // else if .h-x>:only-child>area:only-child[alt]:not[.h-*] then use that area alt for name
            if (!hasRootClass(grandChild)
                    && ("img".equals(grandChild.tagName()) || "area".equals(grandChild.tagName()))
                    && grandChild.hasAttr("alt")) {
                return grandChild.attr("alt");
            }
            // else if .h-x>:only-child>abbr:only-child[title] use that abbr title for name
            if ("abbr".equals(grandChild.tagName()) && grandChild.hasAttr("c")) {
                return grandChild.attr("title");
            }
        }
    }

    // else use the textContent of the .h-x for name
    // drop leading & trailing white-space from name, including nbsp
    return elem.text().trim();
}
 
Example 20
Source File: HiParser.java    From hipda with GNU General Public License v2.0 4 votes vote down vote up
private static SimpleListBean parseSearch(Document doc) {
    if (doc == null) {
        return null;
    }

    SimpleListBean list = new SimpleListBean();
    int last_page = 1;

    //if this is the last page, page number is in <strong>
    Elements pagesES = doc.select("div.pages_btns div.pages a");
    pagesES.addAll(doc.select("div.pages_btns div.pages strong"));
    String searchIdUrl;
    if (pagesES.size() > 0) {
        searchIdUrl = pagesES.first().attr("href");
        list.setSearchId(Utils.getMiddleString(searchIdUrl, "searchid=", "&"));
        for (Node n : pagesES) {
            int tmp = Utils.getIntFromString(((Element) n).text());
            if (tmp > last_page) {
                last_page = tmp;
            }
        }
    }
    list.setMaxPage(last_page);

    Elements tbodyES = doc.select("tbody");
    for (int i = 0; i < tbodyES.size(); ++i) {
        Element tbodyE = tbodyES.get(i);
        SimpleListItemBean item = new SimpleListItemBean();

        Elements subjectES = tbodyE.select("tr th.subject a");
        if (subjectES.size() == 0) {
            continue;
        }

        Element titleLink = subjectES.first();
        String href = titleLink.attr("href");
        item.setTid(Utils.getMiddleString(href, "tid=", "&"));
        item.setTitle(titleLink.text());

        Elements authorAES = tbodyE.select("tr td.author cite a");
        if (authorAES.size() == 0) {
            continue;
        }
        item.setAuthor(authorAES.first().text());

        String spaceUrl = authorAES.first().attr("href");
        if (!TextUtils.isEmpty(spaceUrl)) {
            String uid = Utils.getMiddleString(spaceUrl, "uid=", "&");
            item.setAvatarUrl(HiUtils.getAvatarUrlByUid(uid));
        }

        Elements timeES = tbodyE.select("tr td.author em");
        if (timeES.size() > 0) {
            item.setTime(timeES.first().text());
        }

        Elements forumES = tbodyE.select("tr td.forum");
        if (forumES.size() > 0) {
            item.setForum(forumES.first().text());
        }

        list.add(item);
    }

    return list;
}