Java Code Examples for org.jsoup.select.Elements#size()

The following examples show how to use org.jsoup.select.Elements#size() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AppsGamesCatalogApi.java    From 4pdaClient-plus with Apache License 2.0 6 votes vote down vote up
private static void loadCatalog(IHttpClient client, AppGameCatalog catalog, ArrayList<AppGameCatalog> res) throws IOException {

            String pageBody = client.performGet(GAMES_CATALOG_URL).getResponseBody();

            Pattern pattern = Pattern.compile("<div class=\"[^\"]*post_body[^\"]*\">(.*?)(?:<!--Begin Msg Number|<!-- TABLE FOOTER)",
                    Pattern.CASE_INSENSITIVE);

            Matcher m = pattern.matcher(pageBody);
            if (!m.find()) return;

            Document doc = Jsoup.parse(m.group(1));


            for (Element categoryElement : doc.select("ol[type=1]>li")) {
                Elements elements = categoryElement.select("a");
                if (elements.size() == 0) continue;

                Element element = elements.get(0);
                Uri uri = Uri.parse(element.attr("href"));
                String title = element.text();
                AppGameCatalog category = new AppGameCatalog(uri.getQueryParameter("p"), title)
                        .setLevel(AppGameCatalog.LEVEL_CATEGORY).setGames();
                category.setParent(catalog);
                res.add(category);
            }
        }
 
Example 2
Source File: FormatUtil.java    From wlmedia with Apache License 2.0 6 votes vote down vote up
public static String formatLiveMenu(String html)
{
    Document document = Jsoup.parse(html);
    Elements menus = document.getElementsByClass("tab-list-syb").select("li");
    StringBuffer buffer = new StringBuffer();
    buffer.append("[");
    int size = menus.size();
    for(int i = 0; i < size; i++)
    {
        String menu = menus.get(i).text();
        buffer.append("{\"name\":\"");
        buffer.append(menu);
        buffer.append("\",\"id\":");
        buffer.append(menus.get(i).attr("data-player"));
        if(i == size - 1)
        {
            buffer.append("}");
        }
        else
        {
            buffer.append("},");
        }
    }
    buffer.append("]");
    return buffer.toString();
}
 
Example 3
Source File: WebPage.java    From zap-extensions with Apache License 2.0 6 votes vote down vote up
/**
 * Extracts meta nodes from web page HTML document
 *
 * @param url
 * @throws IOException
 */
@SuppressWarnings("null")
private void getMetaNodes(URL url) throws IOException {

    // Document doc = getHTML(url);// this required another connexion
    Elements metas = HTMLDoc.select("meta");

    for (int i = 0; i < metas.size(); i++) {
        Element meta = metas.get(i);

        if (meta.hasAttr("name") && meta.hasAttr("content")) {
            // System.out.println("meta = "+metas.get(i)+"");
            this.metas.put(meta.attr("name"), meta.attr("content"));
        }
        // System.out.println("-----------------------");
    }
}
 
Example 4
Source File: Yande.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> data = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#post-list-posts li div.inner a");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();
        temp.setAlbumUrl(baseUrl + element.attr("href"));
        Elements elements1 = element.select("img");
        if (elements1.size() > 0)
            temp.setPicUrl(elements1.get(0).attr("src"));
        data.add(temp);
    }

    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, data);
    return resultMap;
}
 
Example 5
Source File: Nanrentu.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public Map<ContentsActivity.parameter, Object> getContent(String baseUrl, String currentUrl, byte[] result, Map<ContentsActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<AlbumInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "gb2312"));
    Elements elements = document.select(".partacpic li a");
    for (Element element : elements) {
        AlbumInfo temp = new AlbumInfo();
        temp.setAlbumUrl(baseUrl + element.attr("href"));
        Elements elements1 = element.select("img");
        if (elements1.size() > 0) {
            temp.setPicUrl(baseUrl + elements1.get(0).attr("src"));
        }
        urls.add(temp);
    }
    resultMap.put(ContentsActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(ContentsActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 6
Source File: GalleryDetailParser.java    From EhViewer with Apache License 2.0 6 votes vote down vote up
@NonNull
public static GalleryTagGroup[] parseTagGroups(Elements trs) {
    try {
        List<GalleryTagGroup> list = new ArrayList<>(trs.size());
        for (int i = 0, n = trs.size(); i < n; i++) {
            GalleryTagGroup group = parseTagGroup(trs.get(i));
            if (null != group) {
                list.add(group);
            }
        }
        return list.toArray(new GalleryTagGroup[list.size()]);
    } catch (Throwable e) {
        ExceptionUtils.throwIfFatal(e);
        e.printStackTrace();
        return EMPTY_GALLERY_TAG_GROUP_ARRAY;
    }
}
 
Example 7
Source File: Aitaotu.java    From PicKing with Apache License 2.0 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements title = document.select("#photos h1");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements time = document.select(".tsmaincont-desc span");
    String sTime = "";
    if (time.size() > 0)
        sTime = time.get(0).text();

    Elements elements = document.select("#big-pic img");
    for (Element element : elements) {
        urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
    }

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 8
Source File: CityParser.java    From zuihou-admin-boot with Apache License 2.0 5 votes vote down vote up
/**
 * 村庄数据
 *
 * @param url
 * @return
 */
public List<Area> parseVillagetr(String fullName, String url) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("villagetr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");
        if (tds == null || tds.size() != 3) {
            continue;
        }
        String villagetrCode = tds.get(0).text();
        String villagetrName = tds.get(2).text();

        Area villagetrArea = Area.builder().code(villagetrCode)
                .label(villagetrName)
                .fullName(fullName + villagetrName)
                .sortValue(sort++)
                .source(url).build();
        StaticLog.info("				村级数据:  {}  ", villagetrArea);

        counties.add(villagetrArea);
    }
    return counties;
}
 
Example 9
Source File: MiniTokyo.java    From PicKing with Apache License 2.0 5 votes vote down vote up
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select(".pagination a:containsOwn(Next »)");
    if (elements.size() > 0) {
        return baseUrl + elements.get(0).attr("href");
    }
    return "";
}
 
Example 10
Source File: Aitaotu.java    From PicKing with Apache License 2.0 5 votes vote down vote up
@Override
public String getDetailNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#nl a");
    if (elements.size() > 0)
        return baseUrl + elements.get(0).attr("href");
    return "";
}
 
Example 11
Source File: Apic.java    From PicKing with Apache License 2.0 5 votes vote down vote up
@Override
public String getContentNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#page .next");
    if (elements.size() > 0)
        return elements.get(0).attr("href");
    return "";
}
 
Example 12
Source File: hindu.java    From Gazetti_Newspaper_Reader with MIT License 5 votes vote down vote up
private String getImageURL(Element bodyElement) {
    Elements mainImageElement = bodyElement.select(ConfigService.getInstance().getTheHinduImageFirst());
    Elements carouselElements = bodyElement.select(ConfigService.getInstance().getTheHinduImageSecond());

    if (mainImageElement.size() != 0) {
        mImageURL = mainImageElement.first().attr("src");
    } else if (carouselElements.size() != 0) {
        Elements carouselImage = carouselElements.select("div#pic").first().select("img");
        mImageURL = carouselImage.attr("src");
    }

    return mImageURL;

}
 
Example 13
Source File: doc.java    From templatespider with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
		
		Document doc = Jsoup.parse(FileUtil.read("G:/MyEclipseWorkSpace/templaete/template/about.html", FileUtil.UTF8));
		Element body = doc.getElementsByTag("body").first();
//		System.out.println(body);
		
		Elements e = body.children();
		for (int i = 0; i < e.size(); i++) {
			System.out.println("---------------"+e.get(i).tagName());
			System.out.println(e.get(i));
		}
		
	}
 
Example 14
Source File: CodeView.java    From imsdk-android with MIT License 5 votes vote down vote up
private void showCodeHtml(Elements codes) {
    if (codes != null) {
        for (int i = 0; i < codes.size(); i++) {
            String raw = codes.get(i).text();
            codes.get(i).html("<pre><code>" + raw + "</code></pre>");
        }
    }
}
 
Example 15
Source File: ElementTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void testPrependNewHtml() {
    Document doc = Jsoup.parse("<div id=1><p>Hello</p></div>");
    Element div = doc.getElementById("1");
    div.prepend("<p>there</p><p>now</p>");
    assertEquals("<p>there</p><p>now</p><p>Hello</p>", TextUtil.stripNewlines(div.html()));

    // check sibling index (reindexChildren):
    Elements ps = doc.select("p");
    for (int i = 0; i < ps.size(); i++) {
        assertEquals(i, ps.get(i).siblingIndex);
    }
}
 
Example 16
Source File: DataUtil.java    From Focus with GNU General Public License v3.0 5 votes vote down vote up
public static String getFeedItemImageUrl(FeedItem feedItem){
    String content = PostUtil.getContent(feedItem);
    if (content!=null && !content.equals("")){
        Document doc = Jsoup.parse(content);
        if (doc != null) {
            Elements images = doc.select("img");
            if (images.size() > 0) {
                return images.get(0).attr("src");
            }
        }
    }
    return null;
}
 
Example 17
Source File: ContentExtractor.java    From ScriptSpider with Apache License 2.0 5 votes vote down vote up
private static String GetDocContent(Document doc) {
    Elements divs = doc.body().getElementsByTag("div");
    int max = -1;
    String content = null;
    for (int i = 0; i < divs.size(); i++) {
        Element div = (Element) divs.get(i);
        String divContent = GetDivContent(div);
        if (divContent.length() > max) {
            max = divContent.length();
            content = divContent;
        }
    }
    return content;
}
 
Example 18
Source File: ManChaptersFragment.java    From Man-Man with GNU General Public License v3.0 4 votes vote down vote up
@Override
public Loader<List<ManSectionItem>> onCreateLoader(int id, @NonNull final Bundle args) {
    return new AbstractNetworkAsyncLoader<List<ManSectionItem>>(getActivity()) {

        @Override
        protected void onStartLoading() {
            if(args.containsKey(CHAPTER_INDEX)) {
                super.onStartLoading();
            }
        }

        /**
         * Loads package page from network asynchronously
         *
         * @return list of commands with their descriptions and urls
         * or null on error/no input provided
         */
        @Nullable
        @Override
        public List<ManSectionItem> loadInBackground() {
            // retrieve package content
            String index = args.getString(CHAPTER_INDEX);
            String url = args.getString(CHAPTER_PACKAGE);
            if(!isStarted()) // task was cancelled
                return Collections.emptyList();

            OkHttpClient client = new OkHttpClient();
            Request request = new Request.Builder().url(url).build();
            try {
                Response response = client.newCall(request).execute();
                if (response.isSuccessful()) {
                    String result = response.body().string();
                    Document root = Jsoup.parse(result, CHAPTER_COMMANDS_PREFIX);
                    Elements rows = root.select(String.format("caption:has(a[href=/%s/]) ~ tbody > tr", index));
                    List<ManSectionItem> manPages = new ArrayList<>(rows.size());
                    for (Element row : rows) {
                        manPages.add(sectionItemFromRow(index, row));
                    }
                    return manPages;
                }
            } catch (IOException e) {
                Log.e(MM_TAG, "Exception while parsing package page " + url, e);
                return Collections.emptyList();
            }

            return Collections.emptyList();
        }

        @Override
        public void deliverResult(List<ManSectionItem> data) {
            mProgress.hide();
            super.deliverResult(data);
        }
    };
}
 
Example 19
Source File: HiParser.java    From hipda with GNU General Public License v2.0 4 votes vote down vote up
private static SimpleListBean parseMyPost(Document doc) {
    if (doc == null) {
        return null;
    }

    Elements tableES = doc.select("table.datatable");
    if (tableES.size() == 0) {
        return null;
    }

    SimpleListBean list = new SimpleListBean();

    int last_page = 1;
    //if this is the last page, page number is in <strong>
    Elements pagesES = doc.select("div.pages_btns div.pages a");
    pagesES.addAll(doc.select("div.pages_btns div.pages strong"));
    if (pagesES.size() > 0) {
        for (Node n : pagesES) {
            int tmp = Utils.getIntFromString(((Element) n).text());
            if (tmp > last_page) {
                last_page = tmp;
            }
        }
    }
    list.setMaxPage(last_page);

    Elements trES = tableES.first().select("tr");

    SimpleListItemBean item = null;
    //first tr is title, skip
    for (int i = 1; i < trES.size(); ++i) {
        Element trE = trES.get(i);

        // odd have title, even have reply text;
        item = new SimpleListItemBean();

        // thread
        Elements thES = trE.select("th");
        if (thES.size() == 0) {
            continue;
        }
        Elements linkES = thES.first().select("a");
        if (linkES.size() != 1) {
            continue;
        }
        String tid = linkES.first().attr("href");
        if (!tid.contains("viewthread.php?tid=")) {
            continue;
        }
        tid = Utils.getMiddleString(tid, "viewthread.php?tid=", "&");
        String title = linkES.first().text();

        // time
        Elements lastpostES = trE.select("td.lastpost");
        if (lastpostES.size() == 0) {
            continue;
        }
        String time = lastpostES.first().text();

        item.setTid(tid);
        item.setTitle(title);
        item.setTime(time);

        Elements forumES = trE.select("td.forum");
        if (forumES.size() > 0) {
            item.setForum(forumES.first().text());
        }

        list.add(item);
    }
    return list;
}
 
Example 20
Source File: BootstrapHandlerDependenciesTest.java    From flow with Apache License 2.0 4 votes vote down vote up
@Test
public void flowDependenciesShouldBeImportedBeforeUserDependenciesWithCorrectAttributes() {
    Consumer<Document> uiPageTestingMethod = page -> {
        boolean foundClientEngine = false;
        int flowDependencyMaxIndex = Integer.MAX_VALUE;
        int userDependencyMinIndex = Integer.MAX_VALUE;

        Elements children = page.head().children();
        for (int i = 0; i < children.size(); i++) {
            Element element = children.get(i);
            String elementString = element.toString();
            if (foundClientEngine) {
                if (userDependencyMinIndex > i) {
                    userDependencyMinIndex = i;
                }
                if (elementString.contains("dndConnector.js")) {
                    continue;
                }
                assertThat(
                        "Expected to have here dependencies added with Flow public api",
                        elementString,
                        either(containsString("eager"))
                                .or(containsString("lazy"))
                                .or(containsString("inline")));
            } else {
                flowDependencyMaxIndex = i;
                // skip element with uidl that contains lazy dependencies
                if (!elementString.contains(BOOTSTRAP_SCRIPT_CONTENTS)) {
                    assertThat(
                            "Flow dependencies should not contain user dependencies",
                            elementString,
                            both(not(containsString("eager")))
                                    .and(not(containsString("lazy")))
                                    .and(not(containsString("inline"))));
                    if (elementString.contains(
                            BootstrapHandler.clientEngineFile.get())) {
                        foundClientEngine = true;
                    }
                } else {
                    assertThat(
                            "uidl should not contain eager and inline dependencies",
                            elementString,
                            both(not(containsString("eager")))
                                    .and(not(containsString("inline"))));
                }
            }

            assertThat(String.format(
                    "All javascript dependencies should be loaded without 'async' attribute. Dependency with url %s has this attribute",
                    element.attr("src")), element.attr("async"), is(""));
        }

        assertThat(
                "Flow dependencies should be imported before user dependencies",
                flowDependencyMaxIndex,
                is(lessThan(userDependencyMinIndex)));

    };

    testUis(uiPageTestingMethod, new UIAnnotated_LoadingOrderTest(),
            new UIWithMethods_LoadingOrderTest());
}