Java Code Examples for org.jsoup.select.Elements#get()

The following examples show how to use org.jsoup.select.Elements#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ResourceQuote.java    From templatespider with Apache License 2.0 6 votes vote down vote up
/**
 * Tag标签的引用资源替换,替换为绝对路径
 * @param doc Document,整个页面
 * @param tagName tag的名字,如 img、   script
 * @param tagProperty 上面的tag中资源引用的标签,如 src
 * @return 替换好的Document
 */
public Document tagReplace(Document doc, String tagName, String tagProperty){
	Elements imgElements = doc.getElementsByTag(tagName);
	for (int i = 0; i < imgElements.size(); i++) {
		Element e = imgElements.get(i);
		String url = e.attr(tagProperty);
		if(url.length() > 3 && url.indexOf(baseUri) == -1){
			String absUrl = hierarchyReplace(this.baseUri, url);
			if((!url.equals(absUrl)) && url.indexOf("://") == -1){
				//如果url未替换过,且不是绝对路径,那么进行替换操作
				e.attr(tagProperty, absUrl);
			}
		}
	}
	return doc;
}
 
Example 2
Source File: SearchThread.java    From tv-search with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
@Override
public void run() {
	// TODO Auto-generated method stub
	try {
		if(keyWord == null)
		{
			return;
		}
		String temp = Const.REQUESTURL+keyWord;
		Document doc = Jsoup.connect(Const.REQUESTURL+keyWord).get();
		Elements test = doc.select("div.main_content");
		org.jsoup.nodes.Element element = test.get(0);
		Elements test3 = element.getAllElements();
		data = element.toString();
	}catch(Exception e){
		e.printStackTrace();
	}finally{
		Const.data = data;
	}
}
 
Example 3
Source File: TestPutHTMLElement.java    From localization_nifi with Apache License 2.0 6 votes vote down vote up
@Test
public void testAddNewElementToRoot() throws Exception {
    final String MOD_VALUE = "<p>modified value</p>";
    testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "body");
    testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.PREPEND_ELEMENT);
    testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);

    testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
    testRunner.run();

    testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
    testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0);
    testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
    testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);

    List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS);
    assertTrue(ffs.size() == 1);
    String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));

    //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
    Document doc = Jsoup.parse(data);
    Elements eles = doc.select("body > p");
    Element ele = eles.get(0);

    assertTrue(StringUtils.equals(MOD_VALUE.replace("<p>", "").replace("</p>", ""), ele.html()));
}
 
Example 4
Source File: SearchUtils.java    From emotional_analysis with Apache License 2.0 6 votes vote down vote up
/**
 * 获取歌曲名称
 * <p>Title: getSongNameById</p>
 * <p>Description: </p>
 * @param songId
 * @return
 * @throws Exception
 */
public static String getSongNameById(long songId) throws Exception{
	String songName = null;
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	if(elementsByClass.size() > 0){
		Element element = elementsByClass.get(0);
		Node childNode = element.childNode(0);
		songName = childNode.toString();
	}else{
		songName = "ES中歌曲在网易云音乐中找不到";
	}
	return songName;
}
 
Example 5
Source File: IPSpider.java    From HttpProxy with GNU General Public License v3.0 6 votes vote down vote up
private List<IPBean> crawl(String api, int index){
    String html = HttpUtils.getResponseContent(api + index);
    System.out.println(html);

    Document document = Jsoup.parse(html);
    Elements eles = document.selectFirst("table").select("tr");

    for (int i = 0; i < eles.size(); i++){
        if (i == 0) continue;
        Element ele = eles.get(i);
        String ip = ele.children().get(1).text();
        int port = Integer.parseInt(ele.children().get(2).text().trim());
        String typeStr = ele.children().get(5).text().trim();

        int type;
        if ("HTTP".equalsIgnoreCase(typeStr))
            type = IPBean.TYPE_HTTP;
        else
            type = IPBean.TYPE_HTTPS;

        IPBean ipBean = new IPBean(ip, port, type);
        ipList.add(ipBean);
    }
    return ipList;
}
 
Example 6
Source File: ZIMuKuCommon.java    From SubTitleSearcher with Apache License 2.0 5 votes vote down vote up
/**
 * 获取下载网址列表
 * @return
 */
public static JSONArray getDetailList(String url) {
	String result = httpGet(baseUrl+url);
	//System.out.println(result);
	Document doc = Jsoup.parse(result);
	Elements matchList = doc.select("#subtb tbody tr");
	if(matchList.size() == 0)return new JSONArray();
	//System.out.println(matchList.html());
	JSONArray resList = new JSONArray();
	for(int i  = 0 ; i < matchList.size(); i++) {
		Element row = matchList.get(i);
		JSONObject resRow = new JSONObject();
		resRow.put("url", row.selectFirst("a").attr("href"));
		resRow.put("title", row.selectFirst("a").attr("title"));
		resRow.put("ext", row.selectFirst(".label-info").text());
		Elements authorInfos = row.select(".gray");
		StringBuffer authorInfo = new StringBuffer();
		authorInfos.forEach(element ->{
			authorInfo.append(element.text() + ",");
		});
		if(authorInfo.length() > 0) {
			resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1));
		}else {
			resRow.put("authorInfo", "");
		}
		
		resRow.put("lang", row.selectFirst("img").attr("alt"));
		resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", ""));
		resRow.put("downCount", row.select("td").get(3).text());
		resList.add(resRow);
	}
	return resList;
}
 
Example 7
Source File: Handian.java    From ankihelper with GNU General Public License v3.0 5 votes vote down vote up
public List<Definition> wordLookup(String key) {
        try {
//            Document doc = Jsoup.connect(wordUrl + key)
//                    .userAgent(DEFAULT_UA)
//                    .timeout(5000)
//                    .get();
//            String html = doc.toString();
            Request request = new Request.Builder().url(wordUrl + key)
                    .header("User-Agent", Constant.UA)
                    .build();
            String rawhtml = MyApplication.getOkHttpClient().newCall(request).execute().body().string();
            Document doc = Jsoup.parse(rawhtml);
            Elements entrys = doc.select("div.cdnr, div.tagContent");
            ArrayList<Definition> defList = new ArrayList<>();
            if (entrys.size() > 0) {
                    Element ele = entrys.get(0);
                    String word = key;
                    String meaning = ele.toString();
                    meaning = meaning.replaceAll("<img src=\"/", "<img src=\"http://www.zdic.net/");
                    meaning = meaning.replaceAll("&amp;","&");
                    HashMap<String, String> defMap = new HashMap<>();
                    String definition = meaning;
                    defMap.put(EXP_ELE[0], word);
                    defMap.put(EXP_ELE[1], definition);
                    defList.add(new Definition(defMap, definition));
            }
            return defList;
        } catch (IOException ioe) {
            Log.d("time out", Log.getStackTraceString(ioe));
            //Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show();
            return new ArrayList<Definition>();
        }

    }
 
Example 8
Source File: JsoupUtil.java    From materialup with Apache License 2.0 5 votes vote down vote up
public static List<User> getUpvoters(MuResponse mu) {
    List<User> users = new ArrayList<>();
    if (!OK.equalsIgnoreCase(mu.status)) {
        return users;
    }

    final Element document = Jsoup.parse(mu.content);
    final Elements upvoters = document.select(".post__upvoters .post__upvoter");
    if (upvoters != null && !upvoters.isEmpty()) {
        int size = upvoters.size();
        for (int i = size - 1; i >= 0; i--) {
            Element e = upvoters.get(i);
            Element link = e.select("a").first();
            String path = attr(link, "href");
            if (TextUtils.isEmpty(path)) {
                continue;
            }
            Element img = e.select("img").first();
            String avatar = attr(img, "src");
            String alt = attr(img, "alt");
            User user = new User();
            user.setAvatarUrl(avatar);
            user.setPath(path);
            users.add(user);
        }
    }

    return users;
}
 
Example 9
Source File: YoupornRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public List<String> getURLsFromPage(Document doc) {
    List<String> results = new ArrayList<>();
    Elements videos = doc.select("video");

    Element video = videos.get(0);
    results.add(video.attr("src"));
    return results;
}
 
Example 10
Source File: JsoupUtils.java    From EhViewer with Apache License 2.0 5 votes vote down vote up
@Nullable
public static Element getElementByTag(Element element, String tagName) {
    Elements elements = element.getElementsByTag(tagName);
    if (elements != null && elements.size() > 0) {
        return elements.get(0);
    } else {
        return null;
    }
}
 
Example 11
Source File: FileReader.java    From calcite with Apache License 2.0 5 votes vote down vote up
private Element getSelectedTable(Document doc, String selector)
    throws FileReaderException {
  // get selected elements
  Elements list = doc.select(selector);

  // get the element
  Element el;

  if (this.index == null) {
    if (list.size() != 1) {
      throw new FileReaderException("" + list.size()
          + " HTML element(s) selected");
    }

    el = list.first();
  } else {
    el = list.get(this.index);
  }

  // verify element is a table
  if (el.tag().getName().equals("table")) {
    return el;
  } else {
    throw new FileReaderException("selected (" + selector + ") element is a "
        + el.tag().getName() + ", not a table");
  }
}
 
Example 12
Source File: ZIMuKuCommon.java    From SubTitleSearcher with Apache License 2.0 5 votes vote down vote up
/**
 * 获取下载网址列表
 * @return
 */
public static JSONArray getDetailList(String url) {
	String result = httpGet(baseUrl+url);
	//System.out.println(result);
	Document doc = Jsoup.parse(result);
	Elements matchList = doc.select("#subtb tbody tr");
	if(matchList.size() == 0)return new JSONArray();
	//System.out.println(matchList.html());
	JSONArray resList = new JSONArray();
	for(int i  = 0 ; i < matchList.size(); i++) {
		Element row = matchList.get(i);
		JSONObject resRow = new JSONObject();
		resRow.put("url", row.selectFirst("a").attr("href"));
		resRow.put("title", row.selectFirst("a").attr("title"));
		resRow.put("ext", row.selectFirst(".label-info").text());
		Elements authorInfos = row.select(".gray");
		StringBuffer authorInfo = new StringBuffer();
		authorInfos.forEach(element ->{
			authorInfo.append(element.text() + ",");
		});
		if(authorInfo.length() > 0) {
			resRow.put("authorInfo", authorInfo.toString().substring(0, authorInfo.length()-1));
		}else {
			resRow.put("authorInfo", "");
		}
		
		resRow.put("lang", row.selectFirst("img").attr("alt"));
		resRow.put("rate", row.selectFirst(".rating-star").attr("title").replace("字幕质量:", ""));
		resRow.put("downCount", row.select("td").get(3).text());
		resList.add(resRow);
	}
	return resList;
}
 
Example 13
Source File: WordToHtmlRenditionProviderTest.java    From spring-content with Apache License 2.0 5 votes vote down vote up
@Test
public void testConvert() throws Exception {
	InputStream converted = service.convert(
			this.getClass().getResourceAsStream("/sample-docx.docx"), "text/html");

	Document doc = Jsoup.parse(converted, "UTF8", "http://example.com");
	Elements htmls = doc.getElementsByTag("HTML");
	assertThat(htmls.size(), is(1));
	Element html = htmls.get(0);
	assertThat(html, is(not(nullValue())));
}
 
Example 14
Source File: TestPutHTMLElement.java    From localization_nifi with Apache License 2.0 5 votes vote down vote up
@Test
public void testAppendPElementToDiv() throws Exception {
    final String MOD_VALUE = "<p>modified value</p>";
    testRunner.setProperty(PutHTMLElement.CSS_SELECTOR, "#put");
    testRunner.setProperty(PutHTMLElement.PUT_LOCATION_TYPE, PutHTMLElement.APPEND_ELEMENT);
    testRunner.setProperty(PutHTMLElement.PUT_VALUE, MOD_VALUE);

    testRunner.enqueue(new File("src/test/resources/Weather.html").toPath());
    testRunner.run();

    testRunner.assertTransferCount(PutHTMLElement.REL_SUCCESS, 1);
    testRunner.assertTransferCount(PutHTMLElement.REL_INVALID_HTML, 0);
    testRunner.assertTransferCount(PutHTMLElement.REL_ORIGINAL, 1);
    testRunner.assertTransferCount(PutHTMLElement.REL_NOT_FOUND, 0);

    List<MockFlowFile> ffs = testRunner.getFlowFilesForRelationship(PutHTMLElement.REL_SUCCESS);
    assertTrue(ffs.size() == 1);
    String data = new String(testRunner.getContentAsByteArray(ffs.get(0)));

    //Contents will be the entire HTML doc. So lets use Jsoup again just the grab the element we want.
    Document doc = Jsoup.parse(data);
    Elements eles = doc.select("#put");
    Element ele = eles.get(0);

    assertTrue(StringUtils.equals("<a href=\"httpd://localhost\"></a> \n" +
            "<p>modified value</p>", ele.html()));
}
 
Example 15
Source File: MagicVillePricer.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
public List<MagicPrice> getLocalePrice(MagicEdition me, MagicCard card) throws IOException {
	List<MagicPrice> list = new ArrayList<>();
		
	String res = httpclient.doPost(getString(WEBSITE)+"/fr/resultats.php?zbob=1", httpclient.buildMap().put("recherche_titre", card.getName()).build(), null);
	if(res.length()>100)
	{
		logger.error("too much result");
		return list;
	}
	
	String key = "ref=";
	String code = res.substring(res.indexOf(key), res.indexOf("\";"));
	String url = getString(WEBSITE)+"/fr/register/show_card_sale?"+code;
	
	logger.info(getName() + " looking for prices " + url);

	
	Document doc =URLTools.extractHtml(url);
	
	Element table = null;
	try {
		table = doc.select("table[width=98%]").get(2); // select the first table.
	} catch (IndexOutOfBoundsException e) {
		logger.info(getName() + " no sellers");
		return list;
	}

	Elements rows = table.select(MTGConstants.HTML_TAG_TR);

	for (int i = 3; i < rows.size(); i = i + 2) {
		Element ligne = rows.get(i);
		Elements cols = ligne.getElementsByTag(MTGConstants.HTML_TAG_TD);
		MagicPrice mp = new MagicPrice();

		String price = cols.get(4).text();
		price = price.substring(0, price.length() - 1);
		mp.setValue(Double.parseDouble(price));
		mp.setCurrency("EUR");
		mp.setSeller(cols.get(0).text());
		mp.setSite(getName());
		mp.setUrl(url);
		mp.setQuality(cols.get(2).text());
		mp.setLanguage(cols.get(1).getElementsByTag("span").text());
		mp.setCountry("France");
		mp.setFoil(mp.getLanguage().toLowerCase().contains("foil"));

		list.add(mp);

	}

	logger.info(getName() + " found " + list.size() + " item(s) return " + getString(MAX) + " items");

	if (list.size() > getInt(MAX) && getInt(MAX) > -1)
		return list.subList(0, getInt(MAX));

	return list;
}
 
Example 16
Source File: HiParser.java    From hipda with GNU General Public License v2.0 4 votes vote down vote up
private static SimpleListBean parseFavorites(Document doc) {
    if (doc == null) {
        return null;
    }

    SimpleListBean list = new SimpleListBean();

    int last_page = 1;
    //if this is the last page, page number is in <strong>
    Elements pagesES = doc.select("div.pages a");
    pagesES.addAll(doc.select("div.pages strong"));
    if (pagesES.size() > 0) {
        for (Node n : pagesES) {
            int tmp = Utils.getIntFromString(((Element) n).text());
            if (tmp > last_page) {
                last_page = tmp;
            }
        }
    }
    list.setMaxPage(last_page);

    Elements trES = doc.select("table.datatable tbody tr");
    for (int i = 0; i < trES.size(); ++i) {
        Element trE = trES.get(i);
        SimpleListItemBean item = new SimpleListItemBean();

        Elements subjectES = trE.select("th");
        if (subjectES.size() == 0) {
            continue;
        }
        item.setTitle(subjectES.first().text());

        Elements subjectAES = subjectES.first().select("a");
        if (subjectAES.size() == 0) {
            continue;
        }
        String href = subjectAES.first().attr("href");
        item.setTid(Utils.getMiddleString(href, "tid=", "&"));

        Elements timeES = trE.select("td.lastpost");
        if (timeES.size() > 0) {
            item.setTime(timeES.first().text().trim());
        }

        Elements forumES = trE.select("td.forum");
        if (forumES.size() > 0) {
            item.setForum(forumES.first().text().trim());
        }

        list.add(item);
    }

    return list;
}
 
Example 17
Source File: MagicBazarShopper.java    From MtgDesktopCompanion with GNU General Public License v3.0 4 votes vote down vote up
private List<OrderEntry> parse(Document doc, String id, Date date) {
	List<OrderEntry> entries = new ArrayList<>();
	Elements table = doc.select("div.table div.tr");
	table.remove(0);
	
	
	for(int i=0;i<table.size();i++)
	{
		Element e = table.get(i);
		boolean iscard=e.hasClass("filterElement");
		String name = e.select("div.td.name").text();
		
		
		if(!name.isEmpty())
		{

			OrderEntry entrie = new OrderEntry();
				entrie.setIdTransation(id);
				entrie.setSource(getName());
				entrie.setCurrency(Currency.getInstance("EUR"));
				entrie.setSeller(getName());
				entrie.setTypeTransaction(TYPE_TRANSACTION.BUY);
				entrie.setTransactionDate(date);
				entrie.setDescription(name);
				if(iscard)
				{
					entrie.setType(TYPE_ITEM.CARD);
					entrie.setDescription(e.select("div.td.name.name_mobile").text());
					entrie.setItemPrice(UITools.parseDouble(e.attr("attribute_price")));
					String set = e.select("div.td.ext img").attr("title");
					try {
						
						entrie.setEdition(MTGControler.getInstance().getEnabled(MTGCardsProvider.class).getSetByName(set));
					}
					catch(Exception ex)
					{
						logger.error(set + " is not found");
					}
					
					
				}
				else
				{
					String price =e.select("div.new_price").html().replaceAll("&nbsp;"+Currency.getInstance("EUR").getSymbol(), "").trim(); 
					entrie.setItemPrice(UITools.parseDouble(price));
					if(entrie.getDescription().contains("Set")||entrie.getDescription().toLowerCase().contains("collection"))
						entrie.setType(TYPE_ITEM.FULLSET);
					else if(entrie.getDescription().toLowerCase().contains("booster"))
						entrie.setType(TYPE_ITEM.BOOSTER);
					else if(entrie.getDescription().toLowerCase().startsWith("boite de") || entrie.getDescription().contains("Display") )
						entrie.setType(TYPE_ITEM.BOX);
					else
						entrie.setType(TYPE_ITEM.LOTS);
				}
				notify(entrie);
				entries.add(entrie);	
		}
		
		
		
	}
	
	
	
	return entries;
}
 
Example 18
Source File: JsoupText.java    From MD with Apache License 2.0 4 votes vote down vote up
public static void main(String[] str) throws IOException {

        Document doc = Jsoup.parse(new URL(urls), 5000);
        //获取页数
        Elements es_page = doc.getElementsByClass("page").first().getElementsByTag("select").first().getElementsByTag("option");

        for (int i = 0; i < es_page.size(); i++) {
            Element et = es_page.get(i);
            if (et != null) {
                System.out.println(et.attr("value"));
            }

        }
//
//
//        //Video
//        ArrayList<Video> list = new ArrayList<>();
//        // Document doc = Jsoup.parse(new URL(url), 5000);
//        Elements es_item = doc.getElementsByClass("item");
//        for (int i = 0; i < es_item.size(); i++) {
//            Element et = es_item.get(i).getElementsByTag("h3").first();
//            if (et != null) {
//                String title = et.getElementsByTag("b").text();
//                String img = es_item.get(i).select("img").first().attr("src");
//                String url = es_item.get(i).getElementsByClass("read").first().attr("href");
//                Document docs = Jsoup.parse(new URL(Ip.url + url), 5000);
//                String urls = docs.getElementsByTag("iframe").attr("src");
//                list.add(new Video(title, img, urls));
//
//            }
//
//        }
//        for (int i = 0; i < list.size(); i++) {
//            System.out.println(list.get(i).toString());
//        }


        //GIF
//        ArrayList<Gif> list = new ArrayList<>();
//        // Document doc = Jsoup.parse(new URL(url), 5000);
//        Elements es_item = doc.getElementsByClass("item");
//        for (int i = 0; i < es_item.size(); i++) {
//            Element et = es_item.get(i).getElementsByTag("h3").first();
//            if (et != null) {
//                String title = et.getElementsByTag("b").text();
//                String img = es_item.get(i).select("img").first().attr("src");
//                String url = es_item.get(i).getElementsByClass("read").first().attr("href");
//                list.add(new Gif(title, url));
//
//            }
//
//        }
//        for (int i = 0; i < list.size(); i++) {
//            System.out.println(list.get(i).toString());
//        }


    }
 
Example 19
Source File: ParseProxy.java    From v9porn with MIT License 4 votes vote down vote up
public static BaseResult<List<ProxyModel>> parseXiCiDaiLi(String html, int page) {
    BaseResult<List<ProxyModel>> baseResult = new BaseResult<>();
    baseResult.setTotalPage(1);
    Document doc = Jsoup.parse(html);

    Element ipList = doc.getElementById("ip_list");
    Elements trs = ipList.select("tr");
    int trSize = trs.size();
    List<ProxyModel> proxyModelList = new ArrayList<>();
    for (int i = 0; i < trSize; i++) {
        //第一是标题,跳过
        if (i == 0) {
            continue;
        }
        //tr里的td
        Elements tds = trs.get(i).select("td");
        ProxyModel proxyModel = new ProxyModel();
        for (int j = 0; j < tds.size(); j++) {
            Element td = tds.get(j);
            switch (j) {
                case 0:
                    //国家
                    break;
                case 1:
                    //ip
                    String ip = td.text();
                    proxyModel.setProxyIp(ip);
                    break;
                case 2:
                    //端口
                    String port = td.text();
                    proxyModel.setProxyPort(port);
                    break;
                case 3:
                    //城市
                    break;
                case 4:
                    //匿名度
                    String anonymous = td.text();
                    proxyModel.setAnonymous(anonymous);
                    break;
                case 5:
                    //类型 http https socket
                    String type = td.text();
                    if ("http".equalsIgnoreCase(type)) {
                        proxyModel.setType(ProxyModel.TYPE_HTTP);
                    } else if ("https".equalsIgnoreCase(type)) {
                        proxyModel.setType(ProxyModel.TYPE_HTTPS);
                    } else {
                        proxyModel.setType(ProxyModel.TYPE_SOCKS);
                    }
                    break;
                case 6:
                    //速度
                    break;
                case 7:
                    //连接时间
                    String responseTime = td.select("div").first().attr("title");
                    proxyModel.setResponseTime(responseTime);
                    break;
                case 8:
                    //存活时间
                    break;
                case 9:
                    //验证时间
                    break;
                default:
            }
        }
        proxyModelList.add(proxyModel);
    }
    baseResult.setData(proxyModelList);
    if (page == 1) {
        Elements elements = doc.getElementsByClass("pagination").first().select("a");
        if (elements.size() > 3) {
            String totalPageStr = elements.get(elements.size() - 2).text();
            Logger.t(TAG).d(totalPageStr);
            if (TextUtils.isDigitsOnly(totalPageStr)) {
                baseResult.setTotalPage(Integer.parseInt(totalPageStr));
            }
        }
    }
    return baseResult;
}
 
Example 20
Source File: FreeSSRCrawlerServiceImpl.java    From ShadowSocks-Share with Apache License 2.0 4 votes vote down vote up
/**
 * 网页内容解析 ss 信息
 */
@Override
protected Set<ShadowSocksDetailsEntity> parse(Document document) {
	Elements ssList = document.select("div.text-center");

	Set<ShadowSocksDetailsEntity> set = new HashSet<>(ssList.size());
	for (int i = 0; i < ssList.size(); i++) {
		try {
			Element element = ssList.get(i);
			// 取 h4 信息,为 ss 信息
			Elements ssHtml = element.select("h4");

			if (ssHtml.size() >= 5) {
				// server
				String server = StringUtils.remove(ssHtml.get(0).text(), "服务器地址:");
				Assert.hasLength(server, "server 不能为空");

				int server_port = NumberUtils.toInt(StringUtils.remove(ssHtml.get(1).text(), "端口:"));
				// Assert.isNull(port, "port 不能为空");


				String password = StringUtils.remove(ssHtml.get(2).text(), "密码:");
				Assert.hasLength(password, "password 不能为空");

				String method = StringUtils.remove(ssHtml.get(3).text(), "加密方式:");
				Assert.hasLength(method, "method 不能为空");

				// 账号状态
				String status = ssHtml.get(4).text();
				if (status.contains("正常")) {
					ShadowSocksDetailsEntity ss = new ShadowSocksDetailsEntity(server, server_port, password, method, SS_PROTOCOL, SS_OBFS);
					ss.setValid(false);
					ss.setValidTime(new Date());
					ss.setTitle(document.title());
					ss.setRemarks(TARGET_URL);
					ss.setGroup("ShadowSocks-Share");

					// 测试网络
					if (isReachable(ss))
						ss.setValid(true);

					// 无论是否可用都入库
					set.add(ss);

					log.debug("*************** 第 {} 条 ***************{}{}", i + 1, System.lineSeparator(), ss);
					// log.debug("{}", ss.getLink());
				}
			}
		} catch (Exception e) {
			log.error(e.getMessage(), e);
		}
	}
	return set;
}