Java Code Examples for org.jsoup.nodes.Document#getElementsByClass()

The following examples show how to use org.jsoup.nodes.Document#getElementsByClass() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: zuihou-admin-boot   File: CityStats.java    License: Apache License 2.0 6 votes vote down vote up
public static void parseVillagetr(String url, Area countyArea) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("villagetr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements tds = tr.getElementsByTag("td");
        if (tds == null || tds.size() != 3) {
            continue;
        }
        String villagetrCode = tds.get(0).text();
        String villagetrName = tds.get(2).text();

        Area villagetrArea = Area.builder().code(villagetrCode).label(villagetrName).source(url)
                .sortValue(sort++).level(new RemoteData<>("VILLAGETR")).fullName(countyArea.getFullName() + villagetrName)
                .build();
        StaticLog.info("		村级数据:  {}  ", villagetrArea);

        counties.add(villagetrArea);

    }
    countyArea.setChildren(counties);
}
 
Example 2
Source Project: zuihou-admin-boot   File: CityParser.java    License: Apache License 2.0 6 votes vote down vote up
private List<Area> parseCity(String provinceName, String url) {
        String htmlStr = HttpUtil.get(url, CHARSET);
        Document document = Jsoup.parse(htmlStr);
        Elements trs = document.getElementsByClass("citytr");

        List<Area> cities = new LinkedList<Area>();
        int sort = 1;
        for (Element tr : trs) {
            Elements links = tr.getElementsByTag("a");
            String href = links.get(0).attr("href");
            String cityCode = links.get(0).text();
//            String cityCode = links.get(0).text().substring(0, 4);
            String cityName = links.get(1).text();

            Area cityArea = Area.builder()
                    .label(cityName).code(cityCode).source(url).sortValue(sort++)
                    .level(new RemoteData<>("CITY"))
                    .fullName(provinceName + cityName)
                    .build();
            cityArea.setChildren(parseCounty(provinceName + cityName, COMMON_URL + href));
            StaticLog.info("	市级数据:  {}  ", cityArea);

            cities.add(cityArea);
        }
        return cities;
    }
 
Example 3
Source Project: 4pdaClient-plus   File: NewDevDbApi.java    License: Apache License 2.0 6 votes vote down vote up
public static ArrayList<DevCatalog> parseBrands(IHttpClient client, String devicesTypeUrl) throws Throwable {
    String pageBody = client.performGet(devicesTypeUrl + "all").getResponseBody();
    Document doc = Jsoup.parse(pageBody);
    ArrayList<DevCatalog> res = new ArrayList<>();

    Elements con = doc.getElementsByClass("word-list");
    Elements con1 = con.select("li");
    for (Element element1 : con1) {
        String brandsLink = element1.getElementsByTag("a").attr("href");
        String brandsName = element1.text();
        DevCatalog f = new DevCatalog(brandsLink, brandsName);
        f.setType(DevCatalog.DEVICE_BRAND);
        res.add(f);
    }
    return res;
}
 
Example 4
private void pullACG17News() throws IOException, InterruptedException {
    HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create("http://acg17.com/category/news/")).GET().build();
    String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
    Document doc = Jsoup.parse(body);
    Elements elements = doc.getElementsByClass("item-list");
    List<ACGNew> acgNewList = elements.stream().map(e -> {
        String style = e.getElementsByClass("attachment-tie-medium size-tie-medium wp-post-image").get(0).attr("style");
        String cover = style.substring(style.indexOf("url(") + 4, style.indexOf(")"));
        Element t = e.getElementsByClass("post-box-title").get(0).child(0);
        LocalDate createDate = LocalDate.parse(e.getElementsByClass("tie-date").get(0).text().replaceAll("[年月]", "-").replace("日", ""));
        String intro = e.getElementsByClass("entry").get(0).child(0).text();
        String title = t.text();
        String rerfererUrl = t.attr("href");
        return new ACGNew(title, intro, NewsCrawlerConstant.ACG17, cover, rerfererUrl, createDate, NewsCrawlerConstant.ACG17);
    }).collect(Collectors.toList());
    process(acgNewList, "class", "entry");
}
 
Example 5
private List<Integer> querySubjectId(Integer pageNum) throws IOException, InterruptedException {
    List<Integer> idList = new ArrayList<>(24);
    int currentIndex = 0;
    //开始查找id并添加到文件
    for (; currentIndex < pageNum; currentIndex++) {
        System.out.println("开始爬取第" + currentIndex + "页");
        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create("https://bangumi.tv/anime/browser/?sort=date&page=" + currentIndex)).GET().build();
        String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
        //jsoup提取文本
        Document doc = Jsoup.parse(body);
        Elements elements = doc.getElementsByClass("subjectCover cover ll");
        elements.forEach(e -> {
            idList.add(Integer.parseInt(e.attr("href").replaceAll("\\D", "") + "\n"));
        });
    }
    return idList;
}
 
Example 6
Source Project: MHViewer   File: JsoupUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Nullable
public static Element getElementByClass(Document doc, String className) {
    Elements elements = doc.getElementsByClass(className);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
Example 7
Source Project: zuihou-admin-cloud   File: CityStats.java    License: Apache License 2.0 5 votes vote down vote up
public static void parseProvince(String url) {

        String htmlStr = HttpUtil.get(url, CHARSET);

        Document document = Jsoup.parse(htmlStr);

        // 获取 class='provincetr' 的元素
        Elements elements = document.getElementsByClass("provincetr");
        List<Area> provinces = new LinkedList<Area>();
        int sort = 1;
        for (Element element : elements) {
            // 获取 elements 下属性是 href 的元素
            Elements links = element.getElementsByAttribute("href");
            for (Element link : links) {
                String provinceName = link.text();
                String href = link.attr("href");
                String provinceCode = href.substring(0, 2);

                StaticLog.info("provinceName: {} , provinceCode: {} .", provinceName, provinceCode);

                Area provinceArea = Area.builder().code(provinceCode).label(provinceName).source(url)
                        .sortValue(sort++).fullName(provinceName).level(new RemoteData<>("PROVINCE"))
                        .build();

                StaticLog.info("省级数据:  {}  ", provinceArea);

                parseCity(COMMON_URL + href, provinceArea);
                provinces.add(provinceArea);
            }
        }
        StaticLog.info(JSONUtil.toJsonPrettyStr(provinces));
    }
 
Example 8
Source Project: zuihou-admin-boot   File: CityStats.java    License: Apache License 2.0 5 votes vote down vote up
public static void parseTowntr(String url, Area countyArea) {
    String htmlStr = HttpUtil.get(url, CHARSET);
    Document document = Jsoup.parse(htmlStr);
    Elements trs = document.getElementsByClass("towntr");

    List<Area> counties = new LinkedList<Area>();
    int sort = 1;
    for (Element tr : trs) {
        Elements links = tr.getElementsByTag("a");
        if (links == null || links.size() != 2) {
            continue;
        }
        String href = links.get(0).attr("href");
        String towntrCode = links.get(0).text().substring(0, 9);
        String towntrName = links.get(1).text();

        Area towntrArea = Area.builder().label(towntrName).code(towntrCode).source(url)
                .sortValue(sort++).level(new RemoteData<>("TOWNTR")).fullName(countyArea.getFullName() + towntrName)
                .build();

        StaticLog.info("		乡镇级数据:  {}  ", towntrArea);

        parseVillagetr(COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href,
                countyArea);

        counties.add(towntrArea);
    }
    countyArea.setChildren(counties);
}
 
Example 9
Source Project: ambiverse-nlu   File: WikiCorpusTask.java    License: Apache License 2.0 5 votes vote down vote up
private String retrieveLinkInLanguage(Document document, String language) {
		Elements elementsByClass = document.getElementsByClass("interwiki-" + language);
		if (elementsByClass == null || elementsByClass.isEmpty()) {
//		logger.info("link in " + language + " was not found");
			return null;
		}
		return elementsByClass.first().child(0).attr("href");
	}
 
Example 10
Source Project: JavaRushTasks   File: MoikrugStrategy.java    License: MIT License 5 votes vote down vote up
@Override
public List<Vacancy> getVacancies(String searchString)
{
    List<Vacancy> Vacancies = new ArrayList<>();
    int pageNum = 0;
    Document doc = null;
    while(true)
    {
        try {
            doc = getDocument(searchString, pageNum);
        } catch (IOException e) {
            e.printStackTrace();
        }
        Elements vacancies = doc.getElementsByClass("job");
        if (vacancies.size()==0) break;
        for (Element element: vacancies)
        {
            if (element != null)
            {
                Vacancy vac = new Vacancy();
                vac.setTitle(element.getElementsByAttributeValue("class", "title").text());
                vac.setCompanyName(element.getElementsByAttributeValue("class", "company_name").text());
                vac.setSiteName(URL_FORMAT);
                vac.setUrl("https://moikrug.ru" + element.select("a[class=job_icon]").attr("href"));
                String salary = element.getElementsByAttributeValue("class", "salary").text();
                String city = element.getElementsByAttributeValue("class", "location").text();
                vac.setSalary(salary.length()==0 ? "" : salary);
                vac.setCity(city.length()==0 ? "" : city);
                Vacancies.add(vac);
            }
        }
        pageNum++;
    }
    return Vacancies;
}
 
Example 11
private Map<String, String> parseJson(String content) {
    Document doc = Jsoup.parse(content);
    Elements elements = doc.getElementsByClass("tt-video-box");
    String id = elements.get(0).attr("tt-videoid");
    String imageUrl = elements.get(0).attr("tt-poster");
    Map<String, String> map = new HashMap<>();
    if (!TextUtils.isEmpty(id)) {
        map.put("id", id);
    }
    if (!TextUtils.isEmpty(imageUrl)) {
        map.put("imageUrl", imageUrl);
    }
    return map;
}
 
Example 12
Source Project: EhViewer   File: JsoupUtils.java    License: Apache License 2.0 5 votes vote down vote up
@Nullable
public static Element getElementByClass(Document doc, String className) {
    Elements elements = doc.getElementsByClass(className);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
Example 13
Source Project: newblog   File: HTTPStudy.java    License: Apache License 2.0 5 votes vote down vote up
public static void baidu(String keyword) throws Exception {
    String content = HttpHelper.getInstance().get(baseURL.replaceAll("keyword", keyword));
    Document jsoup = Jsoup.parse(content);
    Elements elements = jsoup.getElementsByClass("result");
    for (Element element : elements) {
        String str = element.select(".c-showurl").text();
        if (str.contains("www.wenzhihuai.com")) {
            String wenzhihuai = element.select(".t").select("a").attr("href");
            HttpHelper.getInstance().get(wenzhihuai);
            logger.info("百度->温志怀URL:" + wenzhihuai);
        }
    }
}
 
Example 14
@Override
public Object start() {

    println("请输入要读取的文件 如/etc/passwd,输入exit退出");

    while(true){
        String input = getInput();
        if (input.equals("exit"))
            break;

        if (input.startsWith("/"))
            input = input.substring(1,input.length());
        String format = String.format(readFilePayload, "file:///".concat(input));
        Document parse = attack(format);

        if (parse!=null){
            Elements wiki = parse.getElementsByClass("wiki-content");
            if (wiki!=null&&wiki.hasText()){
                String text = wiki.html();
                println("=========================");
                sendColorMsg(Message.RED(HtmlUtils.htmlEscape(text)));
                println("=========================");
            }
        }

    }

    return "";
}
 
Example 15
Source Project: zuihou-admin-cloud   File: CityParser.java    License: Apache License 2.0 5 votes vote down vote up
/**
     * 乡镇级数据
     *
     * @param url
     * @return
     */
    public List<Area> parseTowntr(String fullName, String url) {
        String htmlStr = HttpUtil.get(url, CHARSET);
        Document document = Jsoup.parse(htmlStr);
        Elements trs = document.getElementsByClass("towntr");

        List<Area> counties = new LinkedList<Area>();
        int sort = 1;
        for (Element tr : trs) {
            Elements links = tr.getElementsByTag("a");
            if (links == null || links.size() != 2) {
                continue;
            }
            String href = links.get(0).attr("href");
            String towntrCode = links.get(0).text();
//            String towntrCode = links.get(0).text().substring(0, 6);
            String towntrName = links.get(1).text();

            Area towntrArea = Area.builder()
                    .label(towntrName).code(towntrCode).source(url)
                    .fullName(fullName + towntrName)
                    .level(new RemoteData<>("TOWNTR"))
                    .sortValue(sort++)
//                    .nodes(parseVillagetr(fullName + towntrName, COMMON_URL + href.subSequence(2, 5).toString() + "/" + href.substring(5, 7) + "/" + href))
                    .build();

            StaticLog.info("			乡镇级数据:  {}  ", towntrArea);

            counties.add(towntrArea);
        }
        return counties;
    }
 
Example 16
@Override
public List<RetrievableDeck> getDeckList() throws IOException {
	String url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT);
	logger.debug("get List deck at " + url);
	List<RetrievableDeck> list = new ArrayList<>();
	int maxPage = getInt(MAX_PAGE);

	for (int i = 1; i <= maxPage; i++) {
		url = getString(URL) + "/magic/deck/search?format=" + getString(FORMAT) + "&page=" + i;
		Document d = Jsoup.parse(IncapsulaParser.readUrl(url));
		
		for (Element tr : d.getElementsByClass("gradeA")) {
			RetrievableDeck deck = new RetrievableDeck();

			String mana = "";

			Element manaEl = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(0);
			if (manaEl.toString().contains("white-mana"))
				mana += "{W}";
			if (manaEl.toString().contains("blue-mana"))
				mana += "{U}";
			if (manaEl.toString().contains("black-mana"))
				mana += "{B}";
			if (manaEl.toString().contains("red-mana"))
				mana += "{R}";
			if (manaEl.toString().contains("green-mana"))
				mana += "{G}";

			String deckName = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).text();
			String link = getString(URL) + tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(1).getElementsByTag("a").attr("href");
			String deckPlayer = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(2).text();
			String deckDesc = tr.getElementsByTag(MTGConstants.HTML_TAG_TD).get(3).text();
			
			deck.setColor(mana);
			deck.setAuthor(deckPlayer);
			deck.setName(deckName);
			deck.setDescription(deckDesc);
			
			try {
				deck.setUrl(new URI(link));
			} catch (URISyntaxException e) {
				deck.setUrl(null);
			}

			list.add(deck);

		}

	}

	return list;

}
 
Example 17
Source Project: mamute   File: CustomVRaptorIntegration.java    License: Apache License 2.0 4 votes vote down vote up
protected Elements getElementsByClass(String html, String cssClass) {
	Document document = Jsoup.parse(html);
	return document.getElementsByClass(cssClass);
}
 
Example 18
Source Project: zhangshangwuda   File: OneKeyWifi.java    License: Apache License 2.0 4 votes vote down vote up
public static String getErrorMessage(String html) {
	Document doc = null;
	doc = Jsoup.parse(html);
	Elements links = doc.getElementsByClass("msg");
	return links.text().toString();
}
 
Example 19
Source Project: xmpp   File: addPingLun.java    License: Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
	File in = new File("index.html");
	News_pinglunDaoImpl ndi=new News_pinglunDaoImpl();
	try {

		Document doc = Jsoup.parse(in, "UTF-8", "");

		Elements e1 = doc.getElementsByClass("comment_item");

		for (int i = e1.size()-1; i>=0; i--) {
			String ptime=e1.get(i).getElementsByClass("ptime").text();
			ptime=ptime.replaceAll("����", "");
			System.out.println(ptime
					+ "\t"
					+e1.get(i).getElementsByClass("username")
					.text()
					+ "\t"
					+ (e1.get(i).getElementsByTag("img").attr("src"))
					+ "\t"
					+ e1.get(i).getElementsByClass("comment_body").text());
			
			int id = 30;//����id
			String user = e1.get(i).getElementsByClass("username").text()+ ";"
			+ (e1.get(i).getElementsByTag("img").attr("src"));
			String plocation = "";
			String pcontent = e1.get(i).getElementsByClass("comment_body").text();
			String zan = "0";
			News_pinglun news = new News_pinglun(id, user, plocation, ptime,
					pcontent, zan);
			if (ndi.save(news)) {
				

			}

		}

	} catch (IOException e) {

		// TODO Auto-generated catch block

		e.printStackTrace();

	}

}
 
Example 20
public static Character getCharacter(String url, Language lg) throws IOException {
    Document doc = JSoupManager.getDocument(url);
    String bigSkinURL = doc.getElementsByClass("ak-entitylook").first().attr("style");
    bigSkinURL = bigSkinURL.substring(bigSkinURL.indexOf("https://"), bigSkinURL.indexOf(")"));
    String littleSkinURL = doc.getElementsByClass("ak-entitylook").last().toString();
    littleSkinURL = littleSkinURL.substring(littleSkinURL.indexOf("https://"), littleSkinURL.indexOf(")"));
    String pseudo = doc.getElementsByClass("ak-return-link").first().text();
    String level = doc.getElementsByClass("ak-directories-level").first().text()
            .replace(Translator.getLabel(lg, "whois.extract.level"), "").trim();
    String classe = doc.getElementsByClass("ak-directories-breed").first().text();
    String server = doc.getElementsByClass("ak-directories-server-name").first().text();
    String score = doc.getElementsByClass("ak-score-text").first().text() + " ("
            + doc.getElementsByClass("ak-progress-bar-text").first().text() + ")";

    // Optional
    String guildName = null;
    String guildUrl = null;
    String alliName = null;
    String alliUrl = null;

    Elements elem = doc.getElementsByClass("ak-infos-guildname");

    if (!elem.isEmpty()) {
        guildName = elem.first().text();
        guildUrl = elem.first().select("a").attr("abs:href");

        elem = doc.getElementsByClass("ak-infos-alliancename");

        if (!elem.isEmpty()) {
            alliName = elem.first().text();
            alliUrl = elem.first().select("a").attr("abs:href");
        }
    }

    StringBuilder ladderXP = new StringBuilder();
    StringBuilder ladderKoli = new StringBuilder();
    StringBuilder ladderSuccess = new StringBuilder();

    elem = doc.getElementsByClass("ak-container ak-table ak-responsivetable");
    if (!elem.isEmpty()) {
        ladderXP.append(doc.getElementsByClass("ak-total-xp").first().text()).append("\n");

        for(Element cote : doc.getElementsByClass("ak-total-kolizeum"))
            if (! cote.text().endsWith("-1"))
                ladderKoli.append(cote.text().replace(Translator.getLabel(lg, "whois.extract.koli"), "").trim()).append("\n");

        Elements trs = elem.first().getElementsByTag("tbody").first().getElementsByTag("tr");
        for (Element tr : trs) {
            String ladderText = tr.getElementsByTag("td").first().text() + " : ";
            tr.getElementsByTag("td").first().remove();
            if (!tr.getElementsByTag("td").first().text().equals("-"))
                ladderXP.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").first().text())).append("\n");
            if (!tr.getElementsByTag("td").get(1).text().equals("-"))
                ladderKoli.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").get(1).text())).append("\n");
            if (!tr.getElementsByTag("td").last().text().equals("-"))
                ladderSuccess.append(ladderText).append(EmojiManager.getEmojiForLadder(tr.getElementsByTag("td").last().text())).append("\n");
        }
    }

    return new Character(pseudo, level, classe, server, score,
            guildName, guildUrl, alliName, alliUrl, littleSkinURL, bigSkinURL, url,
            ladderXP.toString(), ladderKoli.toString(), ladderSuccess.toString());
}