Java Code Examples for org.jsoup.nodes.Document#getElementsByClass()

The following examples show how to use org.jsoup.nodes.Document#getElementsByClass() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BangumiCrawlerService.java    From Pixiv-Illustration-Collection-Backend with Apache License 2.0 6 votes vote down vote up
private List<Integer> querySubjectId(Integer pageNum) throws IOException, InterruptedException {
    List<Integer> idList = new ArrayList<>(24);
    int currentIndex = 0;
    //开始查找id并添加到文件
    for (; currentIndex < pageNum; currentIndex++) {
        System.out.println("开始爬取第" + currentIndex + "页");
        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create("https://bangumi.tv/anime/browser/?sort=date&page=" + currentIndex)).GET().build();
        String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
        //jsoup提取文本
        Document doc = Jsoup.parse(body);
        Elements elements = doc.getElementsByClass("subjectCover cover ll");
        elements.forEach(e -> {
            idList.add(Integer.parseInt(e.attr("href").replaceAll("\\D", "") + "\n"));
        });
    }
    return idList;
}
 
Example 2
Source File: NewService.java    From Pixiv-Illustration-Collection-Backend with Apache License 2.0 6 votes vote down vote up
private void pullACG17News() throws IOException, InterruptedException {
    HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create("http://acg17.com/category/news/")).GET().build();
    String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
    Document doc = Jsoup.parse(body);
    Elements elements = doc.getElementsByClass("item-list");
    List<ACGNew> acgNewList = elements.stream().map(e -> {
        String style = e.getElementsByClass("attachment-tie-medium size-tie-medium wp-post-image").get(0).attr("style");
        String cover = style.substring(style.indexOf("url(") + 4, style.indexOf(")"));
        Element t = e.getElementsByClass("post-box-title").get(0).child(0);
        LocalDate createDate = LocalDate.parse(e.getElementsByClass("tie-date").get(0).text().replaceAll("[年月]", "-").replace("日", ""));
        String intro = e.getElementsByClass("entry").get(0).child(0).text();
        String title = t.text();
        String rerfererUrl = t.attr("href");
        return new ACGNew(title, intro, NewsCrawlerConstant.ACG17, cover, rerfererUrl, createDate, NewsCrawlerConstant.ACG17);
    }).collect(Collectors.toList());
    process(acgNewList, "class", "entry");
}
 
Example 3
Source File: CityStats.java    From zuihou-admin-boot with Apache License 2.0 6 votes vote down vote up
public static void parseVillagetr(String url, Area countyArea) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("villagetr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");
        if (tds == null || tds.size() != 3) {
            continue;
        }
        String villagetrCode = tds.get(0).text();
        String villagetrName = tds.get(2).text();

        Area villagetrArea = Area.builder().code(villagetrCode).label(villagetrName).source(url)
                .sortValue(sort++).level(new RemoteData<>("VILLAGETR")).fullName(countyArea.getFullName() + villagetrName)
                .build();
        StaticLog.info("		村级数据:  {}  ", villagetrArea);

        counties.add(villagetrArea);

    }
    countyArea.setChildren(counties);
}
 
Example 4
Source File: CityParser.java    From zuihou-admin-boot with Apache License 2.0 6 votes vote down vote up
private List<Area> parseCity(String provinceName, String url) {
        String htmlStr = HttpUtil.get(url, CHARSET);
        Document document = Jsoup.parse(htmlStr);
        Elements trs = document.getElementsByClass("citytr");

        List<Area> cities = new LinkedList<Area>();
        int sort = 1;
        for (Element tr : trs) {
            Elements links = tr.getElementsByTag("a");
            String href = links.get(0).attr("href");
            String cityCode = links.get(0).text();
//            String cityCode = links.get(0).text().substring(0, 4);
            String cityName = links.get(1).text();

            Area cityArea = Area.builder()
                    .label(cityName).code(cityCode).source(url).sortValue(sort++)
                    .level(new RemoteData<>("CITY"))
                    .fullName(provinceName + cityName)
                    .build();
            cityArea.setChildren(parseCounty(provinceName + cityName, COMMON_URL + href));
            StaticLog.info("	市级数据:  {}  ", cityArea);

            cities.add(cityArea);
        }
        return cities;
    }
 
Example 5
Source File: NewDevDbApi.java    From 4pdaClient-plus with Apache License 2.0 6 votes vote down vote up
public static ArrayList<DevCatalog> parseBrands(IHttpClient client, String devicesTypeUrl) throws Throwable {
    String pageBody = client.performGet(devicesTypeUrl + "all").getResponseBody();
    Document doc = Jsoup.parse(pageBody);
    ArrayList<DevCatalog> res = new ArrayList<>();

    Elements con = doc.getElementsByClass("word-list");
    Elements con1 = con.select("li");
    for (Element element1 : con1) {
        String brandsLink = element1.getElementsByTag("a").attr("href");
        String brandsName = element1.text();
        DevCatalog f = new DevCatalog(brandsLink, brandsName);
        f.setType(DevCatalog.DEVICE_BRAND);
        res.add(f);
    }
    return res;
}
 
Example 6
Source File: HTTPStudy.java    From newblog with Apache License 2.0 5 votes vote down vote up
public static void baidu(String keyword) throws Exception {
    String content = HttpHelper.getInstance().get(baseURL.replaceAll("keyword", keyword));
    Document jsoup = Jsoup.parse(content);
    Elements elements = jsoup.getElementsByClass("result");
    for (Element element : elements) {
        String str = element.select(".c-showurl").text();
        if (str.contains("www.wenzhihuai.com")) {
            String wenzhihuai = element.select(".t").select("a").attr("href");
            HttpHelper.getInstance().get(wenzhihuai);
            logger.info("百度->温志怀URL:" + wenzhihuai);
        }
    }
}
 
Example 7
Source File: JsoupUtils.java    From EhViewer with Apache License 2.0 5 votes vote down vote up
@Nullable
public static Element getElementByClass(Document doc, String className) {
    Elements elements = doc.getElementsByClass(className);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
Example 8
Source File: SearchArticleVideoViewBinder.java    From Toutiao with Apache License 2.0 5 votes vote down vote up
private Map<String, String> parseJson(String content) {
    Document doc = Jsoup.parse(content);
    Elements elements = doc.getElementsByClass("tt-video-box");
    String id = elements.get(0).attr("tt-videoid");
    String imageUrl = elements.get(0).attr("tt-poster");
    Map<String, String> map = new HashMap<>();
    if (!TextUtils.isEmpty(id)) {
        map.put("id", id);
    }
    if (!TextUtils.isEmpty(imageUrl)) {
        map.put("imageUrl", imageUrl);
    }
    return map;
}
 
Example 9
Source File: JsoupUtils.java    From MHViewer with Apache License 2.0 5 votes vote down vote up
@Nullable
public static Element getElementByClass(Document doc, String className) {
    Elements elements = doc.getElementsByClass(className);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
Example 10
Source File: MoikrugStrategy.java    From JavaRushTasks with MIT License 5 votes vote down vote up
@Override
public List<Vacancy> getVacancies(String searchString)
{
    List<Vacancy> Vacancies = new ArrayList<>();
    int pageNum = 0;
    Document doc = null;
    while(true)
    {
        try {
            doc = getDocument(searchString, pageNum);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Elements vacancies = doc.getElementsByClass("job");
        if (vacancies.size()==0) break;
        for (Element element: vacancies)
        {
            if (element != null)
            {
                Vacancy vac = new Vacancy();
                vac.setTitle(element.getElementsByAttributeValue("class", "title").text());
                vac.setCompanyName(element.getElementsByAttributeValue("class", "company_name").text());
                vac.setSiteName(URL_FORMAT);
                vac.setUrl("https://moikrug.ru" + element.select("a[class=job_icon]").attr("href"));
                String salary = element.getElementsByAttributeValue("class", "salary").text();
                String city = element.getElementsByAttributeValue("class", "location").text();
                vac.setSalary(salary.length()==0 ? "" : salary);
                vac.setCity(city.length()==0 ? "" : city);
                Vacancies.add(vac);
            }
        }
        pageNum++;
    }
    return Vacancies;
}
 
Example 11
Source File: WikiCorpusTask.java    From ambiverse-nlu with Apache License 2.0 5 votes vote down vote up
private String retrieveLinkInLanguage(Document document, String language) {
		Elements elementsByClass = document.getElementsByClass("interwiki-" + language);
		if (elementsByClass == null || elementsByClass.isEmpty()) {
//		logger.info("link in " + language + " was not found");
			return null;
		}
		return elementsByClass.first().child(0).attr("href");
	}
 
Example 12
Source File: CityStats.java    From zuihou-admin-boot with Apache License 2.0 5 votes vote down vote up
public static void parseTowntr(String url, Area countyArea) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("towntr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements links = tr.getElementsByTag("a");
        if (links == null || links.size() != 2) {
            continue;
        }
        String href = links.get(0).attr("href");
        String towntrCode = links.get(0).text().substring(0, 9);
        String towntrName = links.get(1).text();

        Area towntrArea = Area.builder().label(towntrName).code(towntrCode).source(url)
                .sortValue(sort++).level(new RemoteData<>("TOWNTR")).fullName(countyArea.getFullName() + towntrName)
                .build();

        StaticLog.info("		乡镇级数据:  {}  ", towntrArea);

        parseVillagetr(COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href,
                countyArea);

        counties.add(towntrArea);
    }
    countyArea.setChildren(counties);
}
 
Example 13
Source File: ConfluenceServerRCE.java    From TrackRay with GNU General Public License v3.0 5 votes vote down vote up
@Override
public Object start() {

    println("请输入要读取的文件 如/etc/passwd,输入exit退出");

    while(true){
        String input = getInput();
        if (input.equals("exit"))
            break;

        if (input.startsWith("/"))
            input = input.substring(1,input.length());
        String format = String.format(readFilePayload, "file:///".concat(input));
        Document parse = attack(format);

        if (parse!=null){
            Elements wiki = parse.getElementsByClass("wiki-content");
            if (wiki!=null&&wiki.hasText()){
                String text = wiki.html();
                println("=========================");
                sendColorMsg(Message.RED(HtmlUtils.htmlEscape(text)));
                println("=========================");
            }
        }

    }

    return "";
}
 
Example 14
Source File: CityStats.java    From zuihou-admin-cloud with Apache License 2.0 5 votes vote down vote up
public static void parseProvince(String url) {

        String htmlStr = HttpUtil.get(url, CHARSET);

        Document document = Jsoup.parse(htmlStr);

        // 获取 class='provincetr' 的元素
        Elements elements = document.getElementsByClass("provincetr");
        List<Area> provinces = new LinkedList<Area>();
        int sort = 1;
        for (Element element : elements) {
            // 获取 elements 下属性是 href 的元素
            Elements links = element.getElementsByAttribute("href");
            for (Element link : links) {
                String provinceName = link.text();
                String href = link.attr("href");
                String provinceCode = href.substring(0, 2);

                StaticLog.info("provinceName: {} , provinceCode: {} .", provinceName, provinceCode);

                Area provinceArea = Area.builder().code(provinceCode).label(provinceName).source(url)
                        .sortValue(sort++).fullName(provinceName).level(new RemoteData<>("PROVINCE"))
                        .build();

                StaticLog.info("省级数据:  {}  ", provinceArea);

                parseCity(COMMON_URL + href, provinceArea);
                provinces.add(provinceArea);
            }
        }
        StaticLog.info(JSONUtil.toJsonPrettyStr(provinces));
    }
 
Example 15
Source File: CityParser.java    From zuihou-admin-cloud with Apache License 2.0 5 votes vote down vote up
/**
     * 乡镇级数据
     *
     * @param url
     * @return
     */
    public List<Area> parseTowntr(String fullName, String url) {
        String htmlStr = HttpUtil.get(url, CHARSET);
        Document document = Jsoup.parse(htmlStr);
        Elements trs = document.getElementsByClass("towntr");

        List<Area> counties = new LinkedList<Area>();
        int sort = 1;
        for (Element tr : trs) {
            Elements links = tr.getElementsByTag("a");
            if (links == null || links.size() != 2) {
                continue;
            }
            String href = links.get(0).attr("href");
            String towntrCode = links.get(0).text();
//            String towntrCode = links.get(0).text().substring(0, 6);
            String towntrName = links.get(1).text();

            Area towntrArea = Area.builder()
                    .label(towntrName).code(towntrCode).source(url)
                    .fullName(fullName + towntrName)
                    .level(new RemoteData<>("TOWNTR"))
                    .sortValue(sort++)
//                    .nodes(parseVillagetr(fullName + towntrName, COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href))
                    .build();

            StaticLog.info("			乡镇级数据:  {}  ", towntrArea);

            counties.add(towntrArea);
        }
        return counties;
    }
 
Example 16
Source File: Character.java    From KaellyBot with GNU General Public License v3.0 4 votes vote down vote up
public static Character getCharacter(String url, Language lg) throws IOException {
    Document doc = JSoupManager.getDocument(url);
    String bigSkinURL = doc.getElementsByClass("ak-entitylook").first().attr("style");
    bigSkinURL = bigSkinURL.substring(bigSkinURL.indexOf("https://"), bigSkinURL.indexOf(")"));
    String littleSkinURL = doc.getElementsByClass("ak-entitylook").last().toString();
    littleSkinURL = littleSkinURL.substring(littleSkinURL.indexOf("https://"), littleSkinURL.indexOf(")"));
    String pseudo = doc.getElementsByClass("ak-return-link").first().text();
    String level = doc.getElementsByClass("ak-directories-level").first().text()
            .replace(Translator.getLabel(lg, "whois.extract.level"), "").trim();
    String classe = doc.getElementsByClass("ak-directories-breed").first().text();
    String server = doc.getElementsByClass("ak-directories-server-name").first().text();
    String score = doc.getElementsByClass("ak-score-text").first().text() + " ("
            + doc.getElementsByClass("ak-progress-bar-text").first().text() + ")";

    // Optional
    String guildName = null;
    String guildUrl = null;
    String alliName = null;
    String alliUrl = null;

    Elements elem = doc.getElementsByClass("ak-infos-guildname");

    if (!elem.isEmpty()) {
        guildName = elem.first().text();
        guildUrl = elem.first().select("a").attr("abs:href");

        elem = doc.getElementsByClass("ak-infos-alliancename");

        if (!elem.isEmpty()) {
            alliName = elem.first().text();
            alliUrl = elem.first().select("a").attr("abs:href");
        }
    }

    StringBuilder ladderXP = new StringBuilder();
    StringBuilder ladderKoli = new StringBuilder();
    StringBuilder ladderSuccess = new StringBuilder();

    elem = doc.getElementsByClass("ak-container ak-table ak-responsivetable");
    if (!elem.isEmpty()) {
        ladderXP.append(doc.getElementsByClass("ak-total-xp").first().text()).append("\n");

        for(Element cote : doc.getElementsByClass("ak-total-kolizeum"))
            if (! cote.text().endsWith("-1"))
                ladderKoli.append(cote.text().replace(Translator.getLabel(lg, "whois.extract.koli"), "").trim()).append("\n");

        Elements trs = elem.first().getElementsByTag("tbody").first().getElementsByTag("tr");
        for (Element tr : trs) {
            String ladderText = tr.getElementsByTag("td").first().text() + " : ";
            tr.getElementsByTag("td").first().remove();
            if (!tr.getElementsByTag("td").first().text().equals("-"))
                ladderXP.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").first().text())).append("\n");
            if (!tr.getElementsByTag("td").get(1).text().equals("-"))
                ladderKoli.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").get(1).text())).append("\n");
            if (!tr.getElementsByTag("td").last().text().equals("-"))
                ladderSuccess.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").last().text())).append("\n");
        }
    }

    return new Character(pseudo, level, classe, server, score,
            guildName, guildUrl, alliName, alliUrl, littleSkinURL, bigSkinURL, url,
            ladderXP.toString(), ladderKoli.toString(), ladderSuccess.toString());
}
 
Example 17
Source File: OneKeyWifi.java    From zhangshangwuda with Apache License 2.0 4 votes vote down vote up
public static String getErrorMessage(String html) {
	Document doc = null;
	doc = Jsoup.parse(html);
	Elements links = doc.getElementsByClass("msg");
	return links.text().toString();
}
 
Example 18
Source File: addPingLun.java    From xmpp with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	File in = new File("index.html");
	News_pinglunDaoImpl ndi=new News_pinglunDaoImpl();
	try {

		Document doc = Jsoup.parse(in, "UTF-8", "");

		Elements e1 = doc.getElementsByClass("comment_item");

		for (int i = e1.size()-1; i>=0; i--) {
			String ptime=e1.get(i).getElementsByClass("ptime").text();
			ptime=ptime.replaceAll("����", "");
			System.out.println(ptime
					+ "\t"
					+e1.get(i).getElementsByClass("username")
					.text()
					+ "\t"
					+ (e1.get(i).getElementsByTag("img").attr("src"))
					+ "\t"
					+ e1.get(i).getElementsByClass("comment_body").text());
			
			int id = 30;//����id
			String user = e1.get(i).getElementsByClass("username").text()+ ";"
			+ (e1.get(i).getElementsByTag("img").attr("src"));
			String plocation = "";
			String pcontent = e1.get(i).getElementsByClass("comment_body").text();
			String zan = "0";
			News_pinglun news = new News_pinglun(id, user, plocation, ptime,
					pcontent, zan);
			if (ndi.save(news)) {
				

			}

		}

	} catch (IOException e) {

		// TODO Auto-generated catch block

		e.printStackTrace();

	}

}
 
Example 19
Source File: CustomVRaptorIntegration.java    From mamute with Apache License 2.0 4 votes vote down vote up
protected Elements getElementsByClass(String html, String cssClass) {
	Document document = Jsoup.parse(html);
	return document.getElementsByClass(cssClass);
}
 
Example 20
Source File: TCGPlayerDeckSniffer.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
@Override
public List<RetrievableDeck> getDeckList() throws IOException {
	String url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT);
	logger.debug("get List deck at " + url);
	List<RetrievableDeck> list = new ArrayList<>();
	int maxPage = getInt(MAX_PAGE);

	for (int i = 1; i <= maxPage; i++) {
		url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT) + "&page=" + i;
		Document d = Jsoup.parse(IncapsulaParser.readUrl(url));
		
		for (Element tr : d.getElementsByClass("gradeA")) {
			RetrievableDeck deck = new RetrievableDeck();

			String mana = "";

			Element manaEl = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(0);
			if (manaEl.toString().contains("white-mana"))
				mana += "{W}";
			if (manaEl.toString().contains("blue-mana"))
				mana += "{U}";
			if (manaEl.toString().contains("black-mana"))
				mana += "{B}";
			if (manaEl.toString().contains("red-mana"))
				mana += "{R}";
			if (manaEl.toString().contains("green-mana"))
				mana += "{G}";

			String deckName = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).text();
			String link = getString(URL) + tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).getElementsByTag("a").attr("href");
			String deckPlayer = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(2).text();
			String deckDesc = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(3).text();
			
			deck.setColor(mana);
			deck.setAuthor(deckPlayer);
			deck.setName(deckName);
			deck.setDescription(deckDesc);
			
			try {
				deck.setUrl(new URI(link));
			} catch (URISyntaxException e) {
				deck.setUrl(null);
			}

			list.add(deck);

		}

	}

	return list;

}