Java Code Examples for org.jsoup.Jsoup

The following are top voted examples for showing how to use org.jsoup.Jsoup. These examples are extracted from open source projects. You can vote up the examples you like and your votes will be used in our system to generate more good examples.
Example 1
Project: bboxapi-voicemail   File: VoiceMailApi.java   View source code 7 votes vote down vote up
private UserInfo getUserInfo() {

        UserInfo userInfo = new UserInfo();

        try {
            Connection.Response customerInfo = Jsoup.connect(VOICEMAIL_SERVICE_URI).cookies(loadCookies()).execute();

            Document doc = customerInfo.parse();

            Elements pseudo = doc.select("input[name=pseudo]");
            Elements phoneNumber = doc.select("input[name=voip_num]");
            Elements login = doc.select("input[name=login]");
            Elements email = doc.select("input[name=email]");
            Elements uid = doc.select("input[name=uid]");

            userInfo.setPseudo((pseudo.size() > 0) ? pseudo.get(0).attr("value") : "");
            userInfo.setPhoneNumber((phoneNumber.size() > 0) ? phoneNumber.get(0).attr("value") : "");
            userInfo.setLogin((login.size() > 0) ? login.get(0).attr("value") : "");
            userInfo.setEmail((email.size() > 0) ? email.get(0).attr("value") : "");
            userInfo.setUid((uid.size() > 0) ? uid.get(0).attr("value") : "");

        } catch (IOException e) {
            e.printStackTrace();
        }
        return userInfo;
    }
 
Example 2
Project: PicKing   File: Aitaotu.java   View source code 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements title = document.select("#photos h1");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements time = document.select(".tsmaincont-desc span");
    String sTime = "";
    if (time.size() > 0)
        sTime = time.get(0).text();

    Elements elements = document.select("#big-pic img");
    for (Element element : elements) {
        urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
    }

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 3
Project: Shadbot   File: AudioEventListener.java   View source code 6 votes vote down vote up
@Override
public void onTrackException(AudioPlayer player, AudioTrack track, FriendlyException err) {
	errorCount++;

	String errMessage = Jsoup.parse(StringUtils.remove(err.getMessage(), "Watch on YouTube")).text().trim();

	if(errorCount <= 3) {
		BotUtils.sendMessage(String.format(Emoji.RED_CROSS + " Sorry, %s. I'll try to play the next available song.",
				errMessage.toLowerCase()), guildMusic.getChannel());
	}

	if(errorCount == 3) {
		BotUtils.sendMessage(Emoji.RED_FLAG + " Too many errors in a row, I will ignore them until finding a music that can be played.",
				guildMusic.getChannel());
		LogUtils.infof("{Guild ID: %d} Too many errors in a row. They will be ignored until music can be played.",
				guildMusic.getChannel().getGuild().getLongID());
	}

	LogUtils.infof("{Guild ID: %d} %sTrack exception: %s",
			guildMusic.getChannel().getGuild().getLongID(), errorCount > 3 ? "(Ignored) " : "", errMessage);

	if(!guildMusic.getScheduler().nextTrack()) {
		guildMusic.end();
	}
}
 
Example 4
Project: Slide-RSS   File: ReorderFeeds.java   View source code 6 votes vote down vote up
@Override
protected String doInBackground(String... strings) {
    String urlBase = strings[0];
    Document doc = null;
    try {
        String url = urlBase.startsWith("http")? urlBase : "http://" + urlBase;
        doc = Jsoup.connect(url).get();
        Elements links = doc.select("link[type=application/rss+xml]");

        if (links.size() > 0) {
            String rss_url = links.get(0).attr("abs:href");
            return rss_url;
        } else {
            return null;
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}
 
Example 5
Project: doubanbook   File: TestSpider.java   View source code 6 votes vote down vote up
public Document getDocument(String url) {
	Document doc = null;
	try {

		doc = Jsoup.connect(url)
				.userAgent(
						"User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36")
				.header("accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
				.header("scheme", "https").header("version", "HTTP/1.1")
				.header("accept-encoding", "gzip, deflate, sdch").header("accept-language", "zh-CN,zh;q=0.8")
				// .header("cookie", "bid=\"Q5KWZL7y8g7\";")
				.header("cache-control", "max-age=0").get();

	} catch (Exception e) {

	}
	return doc;

}
 
Example 6
Project: Android-Scrapper   File: EspnScoreboardParser.java   View source code 6 votes vote down vote up
private void init(int i) {
    try {
        this.documentDefault = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/" + i + "/date/" + DateUtils.getDatePlus("yyyyMMdd", -1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.document = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/" + i + "/date/" + DateUtils.getDatePlus("yyyyMMdd", 0))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.documentTomorrow = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/" + i + "/date/" + DateUtils.getDatePlus("yyyyMMdd", 1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example 7
Project: PicKing   File: Aitaotu.java   View source code 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements title = document.select("#photos h1");
    String sTitle = "";
    if (title.size() > 0)
        sTitle = title.get(0).text();

    Elements time = document.select(".tsmaincont-desc span");
    String sTime = "";
    if (time.size() > 0)
        sTime = time.get(0).text();

    Elements elements = document.select("#big-pic img");
    for (Element element : elements) {
        urls.add(new PicInfo(element.attr("src")).setTitle(sTitle).setTime(sTime));
    }

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 8
Project: ovh-java-sdk   File: ApiOvhUtils.java   View source code 6 votes vote down vote up
/**
 * Create a new OVH Application using https://eu.api.ovh.com/createApp/
 * Outout the Application Key and Application Secret in std-out
 * @param nic
 * @param password
 * @throws IOException
 */
public void createApplication(String nic, String password) throws IOException {
	String url = "https://eu.api.ovh.com/createApp/";
	Document doc = Jsoup.connect(url)
			.data("nic", nic)
			.data("password", password)
			.data("applicationName", "One Shoot Token")
			.data("applicationDescription", "One Shoot Token")
			.post();
	String body = doc.toString();
	Pattern extract = Pattern.compile(" Application (\\w+)<pre><name>([^<]+)</name></pre>");
	Matcher m = extract.matcher(body);
	String Key = null;
	String Secret = null;
	while (m.find()) {
		String k = m.group(1);
		String v = m.group(2);
		if (k.equals("Key"))
			Key = v;
		if (k.equals("Secret"))
			Secret = v;
	}
	log.warn("Key:{} Secret:{}", Key, Secret);
}
 
Example 9
Project: ProxyPool   File: Ip66ProxyListPageParser.java   View source code 6 votes vote down vote up
@Override
    public List<Proxy> parse(String content) {
        List<Proxy> proxyList = new ArrayList<>();
        if (content == null || content.equals("")){
            return proxyList;
        }
        Document document = Jsoup.parse(content);
        Elements elements = document.select("table tr:gt(1)");
        for (Element element : elements){
            String ip = element.select("td:eq(0)").first().text();
            String port  = element.select("td:eq(1)").first().text();
            String isAnonymous = element.select("td:eq(3)").first().text();
//            if(!anonymousFlag || isAnonymous.contains("匿")){
            proxyList.add(new Proxy(ip, Integer.valueOf(port), Constant.TIME_INTERVAL));
//            }
        }
        return proxyList;
    }
 
Example 10
Project: ZhihuQuestionsSpider   File: ParseRegularUtil.java   View source code 6 votes vote down vote up
public static void parseZhihuTopics1(Page page, Result result) {
    String json = page.getContent();
    JSONObject object = JSON.parseObject(json);
    JSONArray array = object.getJSONArray("msg");
    if(array.size()==0) {
        result.setSkip(true);
        return;
    }
    for (int i = 0; i < array.size(); i++) {
        String topicStr = array.getString(i);
        Document doc = Jsoup.parseBodyFragment(topicStr);
        Element a = doc.body().select("div.item").first().select("a[target]").first();
        String href = "https://www.zhihu.com" + a.attr("href")+"/newest";
        result.addRequest(new Request(href, HttpMethod.GET));
    }
    Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
    JSONObject object1 = new JSONObject();
    object1.put("topic_id", page.getRequest().getAddch("topic_id"));
    object1.put("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20));
    object1.put("hash_id", "22e50cd21ed9df7085ff76d62175e986");
    request.addParame("method", "next")
            .addParame("params", object1.toJSONString()).addAttach("offset", Integer.valueOf(((Integer) page.getRequest().getAddch("offset")) + 20)).addAttach("topic_id", page.getRequest().getAddch("topic_id"));
    result.addRequest(request);
}
 
Example 11
Project: PicKing   File: Mntu92.java   View source code 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    PicInfo picInfo = new PicInfo();
    Elements elements = document.select("#bigpic img");
    for (Element element : elements) {
        picInfo.setPicUrl(baseUrl + element.attr("src"));
    }
    Elements title = document.select("#entry h1");
    if (title.size() > 0)
        picInfo.setTitle(title.text());
    Elements tags = document.select(".postinfo a");
    if (tags.size() > 0) {
        List<String> tagList = new ArrayList<>();
        for (Element t : tags)
            tagList.add(t.text());
        picInfo.setTags(tagList);
    }
    urls.add(picInfo);

    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 12
Project: BackOffice   File: TestDataController.java   View source code 6 votes vote down vote up
private List<Map<String, String>> getParsedData(Content content){
    List<Map<String,String >> testData = new ArrayList<>();
    JSONObject jsonObject = new JSONObject(content.toString());
    JSONObject body = (JSONObject) jsonObject.get("body");
    JSONObject storage = (JSONObject) body.get("storage");
    Document doc = Jsoup.parse(storage.get("value").toString());
    Elements tRows = doc.select("tr");
    LOG.info("Парсим данные страницы");
    for (Element row : tRows){
        HashMap<String,String> rowData = new HashMap<>();
        Elements tds = row.select("td[colspan!="+countColumns+"]");
        if (tds.size()!=0 && (!tds.get(0).text().equals(""))){
            rowData.put("dKey",tds.get(0).text());
            rowData.put("dValue",tds.get(1).text());
            rowData.put("dComment",tds.get(2).text());
            rowData.put("dHltValue",tds.get(3).text());
            rowData.put("dDevValue",tds.get(4).text());
            testData.add(rowData);
        }
        LOG.debug(row.text());
    }
    return testData;
}
 
Example 13
Project: android-apps   File: JsoupUtil.java   View source code 6 votes vote down vote up
public static void fetchBody(final String url, final Callback callback) {
  new Thread() {
    @Override
    public void run() {
      try {
        Document document = Jsoup
            .connect(url)
            .userAgent(FormatUtil.USER_AGENT_PC)
            .get();
        callback.output(document.body());
      } catch (IOException e) {
        e.printStackTrace();
      }
    }
  }.start();
}
 
Example 14
Project: MyAnimeViewer   File: Parser.java   View source code 6 votes vote down vote up
public String getSafeUploadVideo(String url) {
    //http://www.safeupload.org/getembed/f93fb4096e0875979215c0307dd53ff5
    String lVideoUrl = "";
    try {
        Document docdata = Jsoup.connect(url).userAgent(mUserAgent)
                .referrer("http://www.google.com")
                .timeout(Parser.getParseTimeOut()).get();
        Elements eles = docdata.select("body").select("div + script");
        if (eles != null && !eles.isEmpty()) {
            lVideoUrl = eles.get(eles.size() - 2).html();
            if (!TextUtils.isEmpty(lVideoUrl)) {
                final Pattern pattern = Pattern.compile("'file': '(.+?)'");
                final Matcher matcher = pattern.matcher(lVideoUrl);
                matcher.find();
                lVideoUrl = matcher.group(1);
            }
        }
    } catch (IOException e) {
        WriteLog.appendLog(Log.getStackTraceString(e));
    }
    return lVideoUrl;
}
 
Example 15
Project: 9AnimeAndroid   File: NineAnimeApi.java   View source code 6 votes vote down vote up
public List<Anime> search(String query) throws IOException, InterruptedException, ExecutionException, JSONException {
    String url = "https://9anime.to/search?keyword=" + query.replace(" ", "+");
    Document doc = Jsoup.connect(url).get();
    int totalPages = 1;

    List<Anime> animes = new ArrayList<>();

    if (doc.select("div.paging").size() > 0) {
        totalPages = Math.max(Integer.parseInt(doc.select("span.total").first().text()), totalPages);
    }

    animes.addAll(parseSearchPage(doc));

    for (int i = 1; i < totalPages; ++i) {
        animes.addAll(parseSearchPage(Jsoup.connect(url + "&page=" + (i + 1)).get()));
    }

    return animes;
}
 
Example 16
Project: GoSCELE   File: BaseProvider.java   View source code 6 votes vote down vote up
@Override
@Deprecated
protected List<Elements> doInBackground(String... params) {
    List<Elements> results = new ArrayList<>();
    int idx = 0;
    try {
        Connection.Response response = Jsoup.connect(url())
                .data(data())
                .method(method())
                .cookies(cookies())
                .execute();
        cookies = response.cookies();
        for (String param : params) {
            if (!TextUtils.isEmpty(param)) {
                results.add(response.parse().select(param));
            }
            publishProgress((int) ((double) (++idx / params.length)) * 100);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    return results;
}
 
Example 17
Project: PicKing   File: Aitaotu.java   View source code 6 votes vote down vote up
@Override
public Map<DetailActivity.parameter, Object> getDetailContent(String baseUrl, String currentUrl, byte[] result, Map<DetailActivity.parameter, Object> resultMap) throws UnsupportedEncodingException {
    List<PicInfo> urls = new ArrayList<>();
    Document document = Jsoup.parse(new String(result, "utf-8"));
    Elements elements = document.select("#big-pic img");
    Elements title = document.select("#photos h1");
    String sTitle = title.size() > 0 ? title.get(0).text() : "";
    Elements tags = document.select(".fbl a");
    List<String> tagList = new ArrayList<>();
    if (tags.size() > 0) {
        for (Element tag : tags)
            tagList.add(tag.text());
    }
    for (Element element : elements) {
        PicInfo picInfo = new PicInfo()
                .setTags(tagList)
                .setTitle(sTitle)
                .setPicUrl(element.attr("src"));
        urls.add(picInfo);
    }
    resultMap.put(DetailActivity.parameter.CURRENT_URL, currentUrl);
    resultMap.put(DetailActivity.parameter.RESULT, urls);
    return resultMap;
}
 
Example 18
Project: quiz_helper   File: Mnemonic.java   View source code 6 votes vote down vote up
public List<Definition> wordLookup(String key) {
    try {
        Document doc = Jsoup.connect(wordUrl + key)
                .userAgent("Mozilla")
                .timeout(5000)
                .get();
        List<Definition> definitionList = new ArrayList<>();
        for(Element memo : doc.select(".span9")){
            HashMap<String, String> eleMap = new HashMap<>();
            eleMap.put(EXP_ELE[0], memo.text());
            definitionList.add(new Definition(eleMap, memo.text()));
        }
        return definitionList;

    } catch (IOException ioe) {
        //Log.d("time out", Log.getStackTraceString(ioe));
        Toast.makeText(MyApplication.getContext(), Log.getStackTraceString(ioe), Toast.LENGTH_SHORT).show();
        return new ArrayList<Definition>();
    }

}
 
Example 19
Project: EducationalAdministrationSystem   File: HttpUtils.java   View source code 6 votes vote down vote up
public static Response getCourseInfo(Map<String, String> cookiesMap, String id, String name) {

        try {
            Connection con = Jsoup.connect(Constant.CoursePraise.COURSE_URL);
            con.ignoreContentType(true);
            Iterator<Map.Entry<String, String>> it = cookiesMap.entrySet().iterator();
            while (it.hasNext()) {
                Map.Entry<String, String> en = it.next();
                con = con.cookie(en.getKey(), en.getValue());
            }

            return con.method(Connection.Method.GET)
                    .data("id", id)
                    .data("name", name)
                    .data("pId", "")
                    .data("level", "0")
                    .data(Constant.CoursePraise.COURSE_URL_OTHER_PARAM, "zTreeAsyncTest")
                    .data(Constant.CoursePraise.COURSE_URL_, "1507812989512")
                    .timeout(10000)
                    .execute();

        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }
 
Example 20
Project: Android-Scrapper   File: EspnScoreboardParser.java   View source code 6 votes vote down vote up
private void init() {
    try {
        this.documentDefault = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/50/" + "date/" + DateUtils.getDatePlus("yyyyMMdd", -1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.document = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/50/" + "date/" + DateUtils.getDatePlus("yyyyMMdd", 0))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
        this.documentTomorrow = Jsoup.connect(league.getBaseScoreUrl() + "/scoreboard/_/group/50/" + "date/" + DateUtils.getDatePlus("yyyyMMdd", 1))
                .timeout(60 * 1000)
                .maxBodySize(0)
                .get();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
 
Example 21
Project: ZhihuQuestionsSpider   File: ParseRegularUtil.java   View source code 6 votes vote down vote up
public static void parseZhihuTopics(Page page, Result result){
    String html = page.getContent();
    Document doc = Jsoup.parse(html);
    Element zmTopicCatMainUl = doc.body().select("div.zg-wrap.zu-main.clearfix").first().select("ul.zm-topic-cat-main.clearfix").first();
    Elements lis = zmTopicCatMainUl.getElementsByTag("li");
    for(Element element : lis){
        String topicId = element.attr("data-id");
        Request request = new Request("https://www.zhihu.com/node/TopicsPlazzaListV2", HttpMethod.POST);
        JSONObject object = new JSONObject();
        object.put("topic_id",Integer.parseInt(topicId));
        object.put("offset",0);
        object.put("hash_id","22e50cd21ed9df7085ff76d62175e986");
        request.addParame("method","next")
                .addParame("params",object.toJSONString()).addAttach("offset",Integer.valueOf(0)).addAttach("topic_id",Integer.parseInt(topicId));
        result.addRequest(request);
    }
}
 
Example 22
Project: Linda-AI   File: Act.java   View source code 6 votes vote down vote up
static void Wikipedia(String dico) {
    Document significatowikipedia = null;
    String cercowikipedia = dico.substring((dico.indexOf("'")) + 1, (dico.lastIndexOf("'")));
    try {
        significatowikipedia = Jsoup.connect("https://it.wikipedia.org/wiki/" + cercowikipedia.replace(" ", "_")).userAgent("Mozilla").get();
        String divs = significatowikipedia.select("p").text();
        if (!divs.equals("")) {
            new GUI().giveResponse("La ricerca di " + cercowikipedia + " su wikipedia ha restituito il seguente risultato:" + '\n' + divs);
        } else {
            new GUI().giveResponse("Mi dispiace, non ho trovato informazioni su " + cercowikipedia + " su Wikipedia...");
        }
    } catch (HttpStatusException e) {
        new GUI().giveResponse("Mi dispiace, Wikipedia sembra non avere una voce per '" + cercowikipedia +"'...");
    } catch (java.io.IOException f) {
        f.printStackTrace();
    } catch (StringIndexOutOfBoundsException g) {
        new GUI().giveResponse("Ricorda che, perché io cerchi informazioni riguardo a qualcosa, occorre che tu la definisca fra due virgolette!");
    }
}
 
Example 23
Project: UnicesuLabs   File: PaginaLab.java   View source code 6 votes vote down vote up
public static void getPaginaLabs(){

        Document doc = Jsoup.parse(html.toString());
        Elements blocos = doc.select("table[class=bloco]");
        Element bloco;
        String nBloco;
        Elements labs;
        String nLab;

        for (int i = 0; i < blocos.size(); i++){

            bloco = blocos.get(i);
            nBloco = bloco.select("tr").get(0).select("th").get(0).html();

            labs = bloco.select("table[class=tableReserva");
            for (int j = 0; j < labs.size();j++){

                labo = labs.get(j);
                nLab = labo.select("tr").get(0).select("td").html();
                Lab.array.add(new Lab(nBloco, nLab, aula(1), aula(2)));
                Log.d("nLAB","-->"+nBloco +" - "+ nLab +" - "+ aula(1) +" - "+ aula(2));

            }

        }
    }
 
Example 24
Project: NTPaprEng   File: AdvSearchedWebPage.java   View source code 6 votes vote down vote up
@Override
public List<? extends Link> extractAll() {
    System.out.println("Links parsing: url=" + getUrl() + " type=AdvSearched");

    final Document dom = Jsoup.parse(getText());

    // 所有链接集合
    final List<Link> allLinks = new ArrayList<>();

    if (isFirstPage()) {
        allLinks.addAll(getSiblingLinks(dom));
    }

    // 得到目前页面论文链接
    final List<? extends Link> paperLinks =
            getPaperLinks(parsePaperLinks(dom));

    allLinks.addAll(paperLinks);

    return allLinks;
}
 
Example 25
Project: HouseSearch   File: HouseController.java   View source code 6 votes vote down vote up
/**
 * 获取总页数,返回给前台
 * 参数
 *
 * @param cityCode 城市
 * @param minPrice 最低价格
 * @param maxPrice 最高价格
 * @return
 */
@ResponseBody
@RequestMapping(value = "/GetTotalPages", method = RequestMethod.POST)
public int GetTotalPages(String cityCode, int minPrice, int maxPrice, String area, String subway) {
    //构建URL
    String oldUrl = "http://" + cityCode + ".58.com";
    Connection conn = Jsoup.connect(oldUrl);
    int pages = 0;
    try {
        Response response = conn.method(Method.GET).execute();
        newUrl = response.url().toString() + "/pinpaigongyu/pn/";
        String nowUrl = newUrl + "1/?minprice=" + minPrice + "_" + maxPrice + area + subway;
        Document doc = Jsoup.connect(nowUrl).get();
        int listsum = Integer.valueOf(doc.getElementsByClass("listsum").select("em").text());
        pages = listsum % 20 == 0 ? listsum / 20 : listsum / 20 + 1;  //计算页数
    } catch (IOException ex) {

    }
    return pages;
}
 
Example 26
Project: Babler   File: YouTubeCaptionsScraper.java   View source code 6 votes vote down vote up
/**
 * Fetches captions/transcript for a given video
 * @param videoID to fetch
 * @param lang this captions should be in
 * @throws IOException
 */
public void getAndSaveTranscript(String videoID, String lang) throws IOException {

    lang = LanguageCode.convertIso2toIso1(lang);

    String url = captionEndPoint+"lang="+lang+"&v="+videoID;
    GetMethod get = new GetMethod(url);
    this.client.executeMethod(get);
    String xmlData = get.getResponseBodyAsString();

    //parse XML
    Document doc = Jsoup.parse(xmlData, "", Parser.xmlParser());
    String allCaps = "";
    for (Element e : doc.select("text")) {
        allCaps += e.text();
    }

    FileSaver file = new FileSaver(allCaps, lang, "youtube_caps", url, videoID);
    file.save(logDb);

}
 
Example 27
Project: NewPipeExtractor   File: YoutubePlaylistExtractor.java   View source code 6 votes vote down vote up
private void setupNextStreamsAjax(Downloader downloader) throws IOException, ReCaptchaException, ParsingException {
    String ajaxDataRaw = downloader.download(nextStreamsUrl);
    try {
        JsonObject ajaxData = JsonParser.object().from(ajaxDataRaw);

        String htmlDataRaw = "<table><tbody id=\"pl-load-more-destination\">" + ajaxData.getString("content_html") + "</tbody></table>";
        nextStreamsAjax = Jsoup.parse(htmlDataRaw, nextStreamsUrl);

        String nextStreamsHtmlDataRaw = ajaxData.getString("load_more_widget_html");
        if (!nextStreamsHtmlDataRaw.isEmpty()) {
            nextStreamsUrl = getNextStreamsUrlFrom(Jsoup.parse(nextStreamsHtmlDataRaw, nextStreamsUrl));
        } else {
            nextStreamsUrl = "";
        }
    } catch (JsonParserException e) {
        throw new ParsingException("Could not parse json data for next streams", e);
    }
}
 
Example 28
Project: NewPipeExtractor   File: SoundcloudPlaylistUrlIdHandler.java   View source code 6 votes vote down vote up
@Override
public String cleanUrl(String complexUrl) throws ParsingException {
    Utils.checkUrl(URL_PATTERN, complexUrl);

    try {
        Element ogElement = Jsoup.parse(NewPipe.getDownloader().download(complexUrl))
                .select("meta[property=og:url]").first();

        return ogElement.attr("content");
    } catch (Exception e) {
        throw new ParsingException(e.getMessage(), e);
    }
}
 
Example 29
Project: newblog   File: LibraryUtil.java   View source code 6 votes vote down vote up
public static List<Myreading> htmltoJavaBean() {
    String html = getHTML();
    Element element = Jsoup.parse(html).select("table.jieyue-table").get(0).select("tbody").get(0);
    List<Myreading> list = new ArrayList<>();
    Elements trs = element.select("tr");
    for (Element tr : trs) {
        Elements tds = tr.select("td");
        Myreading myreading = new Myreading();
        myreading.setTitle(tds.get(1).text());
        myreading.setAuthor(tds.get(2).text());
        myreading.setBookindex(tds.get(3).text());
        list.add(myreading);
        logger.info("借阅记录抓取成功" + myreading.getTitle());
    }
    return list;
}
 
Example 30
Project: LushX   File: JsoupUtils.java   View source code 5 votes vote down vote up
public static Document getDocWithPC(String url) {
    try {
        return Jsoup.connect(url).userAgent(UA_PC).timeout(TIME_OUT).ignoreContentType(true).get();
    } catch (IOException e) {
        logger.error("网址请求失败:" + url);
        throw new LushXException("网址请求失败:" + url);
    }
}
 
Example 31
Project: sipsoup   File: JsoupParseTest.java   View source code 5 votes vote down vote up
public static void main(String[] args) {
    Document test = Jsoup.parse("test");
    System.out.println(test);

    //没用
    boolean test1 = Jsoup.isValid("test", Whitelist.none());
    System.out.println(test1);

    Document document = Jsoup.parse(null);
    System.out.println(document);
}
 
Example 32
Project: matrix-appservice-email   File: GmailClientFormatter.java   View source code 5 votes vote down vote up
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
 
Example 33
Project: FacetExtract   File: test111111111.java   View source code 5 votes vote down vote up
public static void main(String[] args) {
    Document doc = null;
    try {
        doc = Jsoup.connect("https://en.wikipedia.org/wiki/Array_data_structure").get();
    } catch (IOException e) {
        e.printStackTrace();
    }
    System.out.println(doc.text());
}
 
Example 34
Project: crawling-framework   File: CyberscoopExtractorTest.java   View source code 5 votes vote down vote up
@Test
public void testTitleExtraction000() throws Exception {
    String url = "https://www.cyberscoop.com/u-s-oil-gas-companies-still-trying-catch-cybersecurity-experts-say/";
    String html = loadArticle("cyberscoop1");
    Document document = Jsoup.parse(html, url);
    HttpArticleParseResult article = ArticleExtractor.extractArticleWithDetails(html, url, cyberscoopSourceWithoutTitleSelector(), null);
    assertEquals(1, article.getTitleMatches().size());
    assertEquals("META:og:title", article.getTitleMatches().get(0));
}
 
Example 35
Project: CNode-OAuth-Login-Android   File: CNodeOAuthLoginView.java   View source code 5 votes vote down vote up
private void startGetAccessTokenAsyncTask(final String cookie) {
    showLoadingLayout();
    executorService.execute(new Runnable() {

        @Override
        public void run() {
            try {
                Document document = Jsoup.connect("https://cnodejs.org/setting").header("Cookie", cookie).get();
                final String accessToken = document.getElementById("content").getElementsByClass("panel").get(2).child(1).child(0).text().replace("字符串:", "").replace(" ", "").trim();
                handler.post(new Runnable() {

                    @Override
                    public void run() {
                        showFinishLayout();
                        if (loginCallback != null) {
                            loginCallback.onLoginSuccess(accessToken);
                        }
                    }

                });
            } catch (IOException e) {
                handler.post(new Runnable() {

                    @Override
                    public void run() {
                        showErrorLayout();
                    }

                });
            }
        }

    });
}
 
Example 36
Project: UpdogFarmer   File: SteamWebHandler.java   View source code 5 votes vote down vote up
public boolean autoVote() {
    final String url = STEAM_STORE + "SteamAwards/?l=english";
    try {
        final Document doc = Jsoup.connect(url)
                .referrer(url)
                .followRedirects(true)
                .cookies(generateWebCookies())
                .get();
        final Element container = doc.select("div.vote_nominations").first();
        if (container == null) {
            return false;
        }
        final String voteId = container.attr("data-voteid");
        final Elements voteNominations = container.select("div.vote_nomination");
        if (voteNominations == null) {
            return false;
        }
        final Element choice = voteNominations.get(new Random().nextInt(voteNominations.size()));
        final String appId = choice.attr("data-vote-appid");
        final Document doc2 = Jsoup.connect(STEAM_STORE + "salevote")
                .referrer(STEAM_STORE)
                .cookies(generateWebCookies())
                .data("sessionid", sessionId)
                .data("voteid", voteId)
                .data("appid", appId)
                .post();
        return true;
    } catch (IOException e) {
        e.printStackTrace();
    }
    return false;
}
 
Example 37
Project: case-html-data-gather   File: HTMLDataGather.java   View source code 5 votes vote down vote up
public HanZiDM from(String url) throws Exception {
	Document doc = Jsoup.connect(url).timeout(timeout).get();
	HanZiDM dm=this.parse(doc);
	if(dm==null){
		throw new RuntimeException();
	}
	return dm;
}
 
Example 38
Project: PicKing   File: Yesky.java   View source code 5 votes vote down vote up
@Override
public String getDetailNext(String baseUrl, String currentUrl, byte[] result) throws UnsupportedEncodingException {
    Document document = Jsoup.parse(new String(result, "gb2312"));
    Elements elements = document.select(".l_effect_bottom li a");
    if (elements.size() > 0)
        if (elements.get(elements.size() - 1).attr("href").equals(currentUrl))
            return "";
    Elements elements1 = document.select(".l_effect_img_mid a");
    if (elements1.size() > 0)
        return elements1.get(0).attr("href");
    return "";
}
 
Example 39
Project: NFLFantasyAnalyzer   File: WebScraper.java   View source code 5 votes vote down vote up
public void initializeRecordAndTopPlayerURL(String recAndPlayerURL) throws IOException{
	try {
		this.recAndPlayerURL = Jsoup.connect(recAndPlayerURL).get();
	} catch(IOException e) {
		throw new IOException("Error initializing RecordAndTopPlayerURL", e);
	}
}
 
Example 40
Project: matrix-appservice-email   File: ThunderbirdClientFormatter.java   View source code 5 votes vote down vote up
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    body.select("blockquote[cite]").remove();
    body.select("div.moz-cite-prefix").remove();

    while (body.children().size() > 0 && body.children().last().is("br")) {
        body.children().last().remove();
    }

    return Jsoup.clean(body.html(), Whitelist.basic());
}