Java Code Examples for org.jsoup.select.Elements#forEach()

The following examples show how to use org.jsoup.select.Elements#forEach() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: BangumiCrawlerService.java    From Pixiv-Illustration-Collection-Backend with Apache License 2.0 6 votes vote down vote up
private List<Integer> querySubjectId(Integer pageNum) throws IOException, InterruptedException {
    List<Integer> idList = new ArrayList<>(24);
    int currentIndex = 0;
    //开始查找id并添加到文件
    for (; currentIndex < pageNum; currentIndex++) {
        System.out.println("开始爬取第" + currentIndex + "页");
        HttpRequest request = HttpRequest.newBuilder()
                .uri(URI.create("https://bangumi.tv/anime/browser/?sort=date&page=" + currentIndex)).GET().build();
        String body = httpClient.send(request, HttpResponse.BodyHandlers.ofString()).body();
        //jsoup提取文本
        Document doc = Jsoup.parse(body);
        Elements elements = doc.getElementsByClass("subjectCover cover ll");
        elements.forEach(e -> {
            idList.add(Integer.parseInt(e.attr("href").replaceAll("\\D", "") + "\n"));
        });
    }
    return idList;
}
 
Example 2
Source File: ZIMuKuCommon.java    From SubTitleSearcher with Apache License 2.0 5 votes vote down vote up
/**
 * 获取下载网址列表
 * @return
 */
public static JSONArray getDetailList(String url) {
	String result = httpGet(baseUrl+url);
	//System.out.println(result);
	Document doc = Jsoup.parse(result);
	Elements matchList = doc.select("#subtb tbody tr");
	if(matchList.size() == 0)return new JSONArray();
	//System.out.println(matchList.html());
	JSONArray resList = new JSONArray();
	for(int i  = 0 ; i < matchList.size(); i++) {
		Element row = matchList.get(i);
		JSONObject resRow = new JSONObject();
		resRow.put("url", row.selectFirst("a").attr("href"));
		resRow.put("title", row.selectFirst("a").attr("title"));
		resRow.put("ext", row.selectFirst(".label-info").text());
		Elements authorInfos = row.select(".gray");
		StringBuffer authorInfo = new StringBuffer();
		authorInfos.forEach(element ->{
			authorInfo.append(element.text() + ",");
		});
		if(authorInfo.length() > 0) {
			resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1));
		}else {
			resRow.put("authorInfo", "");
		}
		
		resRow.put("lang", row.selectFirst("img").attr("alt"));
		resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", ""));
		resRow.put("downCount", row.select("td").get(3).text());
		resList.add(resRow);
	}
	return resList;
}
 
Example 3
Source File: Scraper.java    From rxjava2-lab with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
    Document doc = Jsoup.connect("https://www.superherodb.com/characters/").get();
    System.out.println("Scraping " + doc.title());
    Elements links = doc.select("a[title]");

    Map<String, String> names = new LinkedHashMap<>();
    links.forEach(element -> {
        String name = element.attr("title");
        String href = element.attr("href");
        if (name != null && !name.trim().isEmpty()  && ! isExcluded(name)) {
            names.put(name, href);
        }
    });

    System.out.println(names.size() + " superheros and villains found");

    Vertx vertx = Vertx.vertx();
    WebClient client = WebClient.create(vertx);
    
    AtomicInteger counter = new AtomicInteger();

    Flowable.fromIterable(names.entrySet())
        .flatMapSingle(entry -> scrap(client, entry.getKey(), "https://www.superherodb.com" + entry.getValue()))
        .doOnNext(superStuff -> System.out.println("Retrieved " + superStuff + " (" + counter.incrementAndGet() + " / " +
            names.size() + ")"))
        .toList()
        .flatMapCompletable(list -> vertx.fileSystem()
            .rxWriteFile("src/main/resources/characters.json", new Buffer(Json.encodeToBuffer(list)))
        )
        .subscribe(
            () -> System.out.println("Written " + names.size() + " super heroes and villains"),
            Throwable::printStackTrace
        );
}
 
Example 4
Source File: ZIMuKuCommon.java    From SubTitleSearcher with Apache License 2.0 5 votes vote down vote up
/**
 * 获取下载网址列表
 * @return
 */
public static JSONArray getDetailList(String url) {
	String result = httpGet(baseUrl+url);
	//System.out.println(result);
	Document doc = Jsoup.parse(result);
	Elements matchList = doc.select("#subtb tbody tr");
	if(matchList.size() == 0)return new JSONArray();
	//System.out.println(matchList.html());
	JSONArray resList = new JSONArray();
	for(int i  = 0 ; i < matchList.size(); i++) {
		Element row = matchList.get(i);
		JSONObject resRow = new JSONObject();
		resRow.put("url", row.selectFirst("a").attr("href"));
		resRow.put("title", row.selectFirst("a").attr("title"));
		resRow.put("ext", row.selectFirst(".label-info").text());
		Elements authorInfos = row.select(".gray");
		StringBuffer authorInfo = new StringBuffer();
		authorInfos.forEach(element ->{
			authorInfo.append(element.text() + ",");
		});
		if(authorInfo.length() > 0) {
			resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1));
		}else {
			resRow.put("authorInfo", "");
		}
		
		resRow.put("lang", row.selectFirst("img").attr("alt"));
		resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", ""));
		resRow.put("downCount", row.select("td").get(3).text());
		resList.add(resRow);
	}
	return resList;
}
 
Example 5
Source File: ApsvTimerTask.java    From AlipayOrdersSupervisor-GUI with MIT License 5 votes vote down vote up
private ArrayList<ApsvOrder> findOrders(String html) {
    //logger.info("Html: {}", html);
    ArrayList<ApsvOrder> orders = new ArrayList<>();

    Document doc = Jsoup.parse(html);

    Element ordersForm = doc.getElementById("J-submit-form");
    if (ordersForm == null) {
        logger.error("Cannot find order list form, maybe cookie expires");
        // 标记task status为异常
        // TODO 弹窗提醒cookie异常
        RunTasksModel.getInstance().MarkTaskException(task.id);
        return orders;
    }

    Elements tableBody = doc.select("#tradeRecordsIndex>tbody");
    Elements orderRows = tableBody.select("tr");

    orderRows.forEach(row -> {
        Elements timeNodes = row.select("td.time p");
        String[] orderNoData = row.select("td.tradeNo p").text().split("\\|");
        ApsvOrder order = new ApsvOrder(){
            {
                taskId = task.id;
                time = timeNodes.get(0).text() + " " + timeNodes.get(timeNodes.size() - 1).text();
                description = row.select(".memo-info").text();
                memo = row.select("td.memo p").text();
                tradeNo = orderNoData.length > 1 ? orderNoData[1].split(":")[1] : orderNoData[0].split(":")[1];
                username = Unicode.unicodeToString(row.select("td.other p").text());
                amount = Float.parseFloat(row.select("td.amount span").text().replaceAll("\\s+", ""));
                status = row.select("td.status p").text();
            }
        };
        order.sig = Order.Sign(order, task.pushSecret);
        orders.add(order);
    });
    return orders;
}
 
Example 6
Source File: QuietSpeculationDashboard.java    From MtgDesktopCompanion with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected EditionsShakers getOnlineShakesForEdition(MagicEdition ed) throws IOException {
	String uri = "https://www.quietspeculation.com/tradertools/prices/sets/"+ed.getSet().replace(" ", "%20");
	EditionsShakers ret = new EditionsShakers();
		ret.setEdition(ed);
		ret.setDate(new Date());
		ret.setProviderName(getName());
	
	
	Document content = URLTools.extractHtml(uri);
	
	Elements trs = content.getElementById("SetCards").select("tr[id]");
	trs.forEach(tr->{
		CardShake cs = new CardShake();
			cs.setName(tr.getElementsByTag("td").get(0).text());
			
			try {
			cs.setPrice(Double.parseDouble(tr.getElementsByTag("td").get(5).text().replaceAll("\\$", "")));
			}
			catch(Exception ex)
			{
				cs.setPrice(0.0);
			}
			
			
			cs.setEd(ed.getSet());
			cs.setDateUpdate(new Date());
			cs.setCurrency(Currency.getInstance("USD"));
			ret.addShake(cs);
			notify(cs);
	});
	return ret;
}
 
Example 7
Source File: ChannelFireballPricer.java    From MtgDesktopCompanion with GNU General Public License v3.0 5 votes vote down vote up
@Override
public List<MagicPrice> getLocalePrice(MagicEdition me, MagicCard card) throws IOException {
	ArrayList<MagicPrice> list = new ArrayList<>();

	Document root = URLTools.extractHtml(baseUrl+"/products/search?query="+ URLTools.encode(card.getName()));
	
	Elements lis = root.select("ul.products li div.meta");
	
	
	lis.forEach(li->{
		
		if(!li.getElementsByTag("form").text().contains("Wishlist") && li.getElementsByTag("a").first().text().toLowerCase().startsWith(card.getName().toLowerCase())) 
		{
		
		MagicPrice p = new MagicPrice();
				   p.setCountry("USA");
				   p.setCurrency("USD");
				   p.setSite(getName());
				   p.setUrl(baseUrl+li.getElementsByTag("a").first().attr("href"));
				   p.setSeller(li.getElementsByTag("a").get(1).text());
				   p.setValue(UITools.parseDouble(li.select("span[itemprop].price").first().text().replaceAll("\\$","").trim()));
				   p.setFoil(li.getElementsByTag("a").first().text().contains("- Foil"));
				   list.add(p);
		}
	});

	logger.info(getName() + " found " + list.size() + " item(s)");

	return list;
}
 
Example 8
Source File: BootstrapHandlerTest.java    From flow with Apache License 2.0 5 votes vote down vote up
@Test
public void getBootstrapPage_jsModulesDoNotContainDeferAttribute()
        throws ServiceException {
    List<DependencyFilter> filters = (List<DependencyFilter>) service
            .getDependencyFilters();
    filters.add((list, context) -> {
        list.clear(); // remove everything
        return list;
    });
    filters.add((list, context) -> {
        list.add(new Dependency(Dependency.Type.JS_MODULE, "//module.js",
                LoadMode.EAGER));
        return list;
    });

    initUI(testUI);

    BootstrapContext bootstrapContext = new BootstrapContext(request, null,
            session, testUI, this::contextRootRelativePath);
    Document page = pageBuilder.getBootstrapPage(bootstrapContext);

    Elements scripts = page.head().getElementsByTag("script");
    scripts.forEach(s -> System.err.println(s.outerHtml()));

    Element element = scripts.stream()
            .filter(elem -> elem.attr("src").equals("//module.js"))
            .findFirst().get();
    Assert.assertFalse(element.hasAttr("defer"));

    Element bundle = scripts.stream()
            .filter(el -> el.attr("src")
                    .equals("./VAADIN/build/vaadin-bundle-1111.cache.js"))
            .findFirst().get();
    Assert.assertFalse(bundle.hasAttr("defer"));
}
 
Example 9
Source File: JsoupParserIntegrationTest.java    From tutorials with MIT License 5 votes vote down vote up
@Test
public void examplesTraversing() {
    Elements sections = doc.select("section");

    Element firstSection = sections.first();
    Element lastSection = sections.last();
    Element secondSection = sections.get(2);
    Elements allParents = firstSection.parents();
    Element parent = firstSection.parent();
    Elements children = firstSection.children();
    Elements siblings = firstSection.siblingElements();

    sections.forEach(el -> System.out.println("section: " + el));
}
 
Example 10
Source File: SubHDCommon.java    From SubTitleSearcher with Apache License 2.0 4 votes vote down vote up
/**
 * 获取下载网址列表
 * @return
 */
public static JSONArray getDetailList(String url) {
	String result = HtHttpUtil.http.get(baseUrl+url, HtHttpUtil.http.default_charset, HtHttpUtil.http._ua, baseUrl+url);
	Document doc = Jsoup.parse(result);
	Elements matchList = doc.select(".d_table tr");
	//System.out.println(matchList.html());
	JSONArray detailList = new JSONArray();
	for (Element matchRow : matchList) {
		if(matchRow.select(".dt_edition").size() == 0)continue;
		String html = matchRow.html();
		String htmlLower = html.toLowerCase();
		String downUrl = matchRow.select(".dt_down a").attr("href");
		String title = matchRow.select(".dt_edition a").text().trim();
		int downCount = Integer.valueOf(RegexUtil.getMatchStr(matchRow.select(".dt_count").text(), "([\\d]+)"));
		String ext = "";
		for(String extName : AppConfig.subExtNames) {
			//if(StrUtil.isNotEmpty(RegexUtil.getMatchStr(html, "(>"+extName+"<)", Pattern.CASE_INSENSITIVE))) {
			if(htmlLower.contains(">"+extName+"<")) {
				ext += extName;
				ext += ",";
			}
		}
		if(ext.endsWith(",")) {
			ext=ext.substring(0, ext.length()-1);
		}else {
			ext="其它";
		}
		
		String lang = "";
		String[] langList = new String[] {"双语", "简体", "繁体", "英文"};
		for(String langName : langList) {
			if(htmlLower.contains(">"+langName+"<")) {
				lang += langName;
				lang += ",";
			}
		}
		if(lang.endsWith(",")) {
			lang=lang.substring(0, lang.length()-1);
		}else {
			lang="其它";
		}
		
		Elements labels = matchRow.select(".label");
		StringBuffer labelInfo = new StringBuffer();
		labels.forEach(element ->{
			labelInfo.append(element.text() + ",");
		});
		if(labelInfo.length() > 0) {
			labelInfo.delete(labelInfo.length()-1, labelInfo.length());
		}
		String zimuzu = matchRow.select("a.gray").text();
		
		JSONObject dataRow = new JSONObject();
		dataRow.put("url", downUrl);
		dataRow.put("title", title);
		dataRow.put("ext", ext);
		dataRow.put("lang",lang);
		dataRow.put("rate", "-");
		dataRow.put("downCount", downCount);
		dataRow.put("labelInfo", labelInfo);
		dataRow.put("zimuzu", zimuzu);
		detailList.add(dataRow);
	}
	return detailList;
}
 
Example 11
Source File: SubHDCommon.java    From SubTitleSearcher with Apache License 2.0 4 votes vote down vote up
/**
 * 获取下载网址列表
 * @return
 */
public static JSONArray getDetailList(String url) {
	String result = HtHttpUtil.http.get(baseUrl+url, HtHttpUtil.http.default_charset, HtHttpUtil.http._ua, baseUrl+url);
	Document doc = Jsoup.parse(result);
	Elements matchList = doc.select(".d_table tr");
	//System.out.println(matchList.html());
	JSONArray detailList = new JSONArray();
	for (Element matchRow : matchList) {
		if(matchRow.select(".dt_edition").size() == 0)continue;
		String html = matchRow.html();
		String htmlLower = html.toLowerCase();
		String downUrl = matchRow.select(".dt_down a").attr("href");
		String title = matchRow.select(".dt_edition a").text().trim();
		int downCount = Integer.valueOf(RegexUtil.getMatchStr(matchRow.select(".dt_count").text(), "([\\d]+)"));
		String ext = "";
		for(String extName : AppConfig.subExtNames) {
			//if(StrUtil.isNotEmpty(RegexUtil.getMatchStr(html, "(>"+extName+"<)", Pattern.CASE_INSENSITIVE))) {
			if(htmlLower.contains(">"+extName+"<")) {
				ext += extName;
				ext += ",";
			}
		}
		if(ext.endsWith(",")) {
			ext=ext.substring(0, ext.length()-1);
		}else {
			ext="其它";
		}
		
		String lang = "";
		String[] langList = new String[] {"双语", "简体", "繁体", "英文"};
		for(String langName : langList) {
			if(htmlLower.contains(">"+langName+"<")) {
				lang += langName;
				lang += ",";
			}
		}
		if(lang.endsWith(",")) {
			lang=lang.substring(0, lang.length()-1);
		}else {
			lang="其它";
		}
		
		Elements labels = matchRow.select(".label");
		StringBuffer labelInfo = new StringBuffer();
		labels.forEach(element ->{
			labelInfo.append(element.text() + ",");
		});
		if(labelInfo.length() > 0) {
			labelInfo.delete(labelInfo.length()-1, labelInfo.length());
		}
		String zimuzu = matchRow.select("a.gray").text();
		
		JSONObject dataRow = new JSONObject();
		dataRow.put("url", downUrl);
		dataRow.put("title", title);
		dataRow.put("ext", ext);
		dataRow.put("lang",lang);
		dataRow.put("rate", "-");
		dataRow.put("downCount", downCount);
		dataRow.put("labelInfo", labelInfo);
		dataRow.put("zimuzu", zimuzu);
		detailList.add(dataRow);
	}
	return detailList;
}
 
Example 12
Source File: EchoMTGDashBoard.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
@Override
protected EditionsShakers getOnlineShakesForEdition(MagicEdition ed) throws IOException {
	
	EditionsShakers variations = new EditionsShakers();
	variations.setDate(new Date());
	variations.setEdition(ed);
	variations.setProviderName(getName());
	
	Document d = RequestBuilder.build().method(METHOD.GET).setClient(client)
	 .url(EchoMTGExport.BASE_URL+"/set/"+ed.getId().toUpperCase()+"/"+ed.getSet().replace(" ", "-").toLowerCase()+"/")
	 .addHeader(URLTools.HOST, WEBSITE)
	 .addHeader(URLTools.REFERER, EchoMTGExport.BASE_URL)
	 .toHtml();
	
	
	Elements trs = d.select("table#set-table tr");
	trs.remove(trs.first());
	trs.remove(trs.last());
	
	trs.forEach(tr->{
		
		Elements tds = tr.getElementsByTag("td");
		CardShake cs = new CardShake();
				  cs.setEd(ed.getId());
				  cs.setName(tds.get(2).getElementsByTag("a").first().text());
			  
				  double price =Double.parseDouble(tds.get(4).getElementsByTag("a").first().attr("data-price"));
				  double lastWeekPrice = price;
				  
				  if(!tds.get(3).text().isEmpty())
				  {
					  double pc = Double.parseDouble(tds.get(3).text().replace("%",""))/100;
					  lastWeekPrice = price - (lastWeekPrice*pc);
				  }
				  cs.init(price, price, lastWeekPrice);
				  
				  
				  
				  
				  cs.setCurrency(getCurrency());
		variations.addShake(cs);
	});
	return variations;
}
 
Example 13
Source File: EuropeanGrader.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
@Override
public Grading loadGrading(String identifier) throws IOException {
	
	String url=getWebSite()+"/en/card-verifier.html";
	
	
	Document d = RequestBuilder.build().method(METHOD.GET)
									   .setClient(URLTools.newClient())
									   .url(url)
									   .addContent("certificate",identifier).toHtml();
	
	Elements trs = d.select("table.center tr");
	
	if(trs.isEmpty())
		return null;
	
	
	Grading grad = new Grading();
			grad.setGraderName(getName());
			grad.setNumberID(identifier);
			grad.setUrlInfo(url+"?certificate="+identifier);
			
	logger.debug("Found " + trs.text());
	
	trs.forEach(tr->{
		
		if(tr.text().startsWith("Centring"))
			grad.setCentering(Double.parseDouble(tr.text().replace("Centring grade : ","").replace(',', '.').trim()));
		
		if(tr.text().startsWith("Corner"))
			grad.setCorners(Double.parseDouble(tr.text().replace("Corner grade : ","").replace(',', '.').trim()));
		
		if(tr.text().startsWith("Edges"))
			grad.setEdges(Double.parseDouble(tr.text().replace("Edges grade : ","").replace(',', '.').trim()));
		
		if(tr.text().startsWith("Surface"))
			grad.setSurface(Double.parseDouble(tr.text().replace("Surface grade : ","").replace(',', '.').trim()));
		
		if(tr.text().startsWith("Final"))
			grad.setGradeNote(Double.parseDouble(tr.text().replace("Final grade : ","").replace(',', '.').trim()));
		
		if(tr.text().startsWith("Grading date"))
		{
			try {
				grad.setGradeDate(new SimpleDateFormat("dd/MM/yyyy").parse(tr.text().replace("Grading date : ","").replace(',', '.').trim()));
			} catch (ParseException e) {
				logger.error(e);
			}
		}
			
		
	});
	return grad;
}
 
Example 14
Source File: BeckettGrader.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
@Override
public Grading loadGrading(String identifier) throws IOException {
	
	URLToolsClient c = URLTools.newClient();
	
	String urlLogin = getWebSite()+"/login?utm_content=bkthp&utm_term=login";
	String urlCheking = getWebSite()+"/grading/card-lookup";
	
	Document d = RequestBuilder.build().url(urlLogin).setClient(c).method(METHOD.GET).toHtml();
	String token = d.select("input[name='login_token']").first().attr("value");
	
	
		d=RequestBuilder.build().url(urlLogin).setClient(c).method(METHOD.POST)
					  .addContent("redirect_url", getWebSite()+"/account")
					  .addContent("login_token", token)
					  .addContent("email",getString("EMAIL"))
					  .addContent("password", getString("PASS"))
					  .toHtml();
		
	boolean	connected = !d.getElementsByTag("title").html().equalsIgnoreCase("Member Login");

		
	if(!connected)
		throw new IOException("Error when login to website");
	
	
		d=RequestBuilder.build().url(urlCheking).setClient(c).method(METHOD.GET)
				.addContent("item_type", "BGS")
				.addContent("item_id", identifier)
				 .toHtml();
		
		Element table = d.select("table.cardDetail").first();
		
		if(table==null)
			return null;
			
			
			
		Elements trs=table.select("tr");
		Grading grad = new Grading();
		grad.setGraderName(getName());
		grad.setNumberID(identifier);
		grad.setUrlInfo(getWebSite()+"?item_id="+identifier);
		
		trs.forEach(tr->{
			if(tr.text().startsWith("Centering"))
				grad.setCentering(Double.parseDouble(tr.text().replace("Centering Grade : ","").trim()));
			
			if(tr.text().startsWith("Corner"))
				grad.setCorners(Double.parseDouble(tr.text().replace("Corner Grade : ","").trim()));
			
			if(tr.text().startsWith("Edges"))
				grad.setEdges(Double.parseDouble(tr.text().replace("Edges Grade : ","").trim()));
			
			if(tr.text().startsWith("Surfaces"))
				grad.setSurface(Double.parseDouble(tr.text().replace("Surfaces Grade : ","").trim()));

			if(tr.text().startsWith("Final"))
				grad.setGradeNote(Double.parseDouble(tr.text().replace("Final Grade : ","").trim()));
			
			if(tr.text().startsWith("Date"))
			{
				try {
					grad.setGradeDate(new SimpleDateFormat("EEEEE, MMMMM dd, yyyy",Locale.US).parse(tr.text().replace("Date Graded : ","").trim()));
				}
				catch(ParseException e)
				{
					logger.error(e);
				}
			}
			
			
		});
	return grad;
}