Java Code Examples for org.jsoup.Connection.Response#parse()

The following examples show how to use org.jsoup.Connection.Response#parse() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: ripme   File: DeviantartRipper.java    License: MIT License 6 votes vote down vote up
/**
 * Returns next page Document using offset.
 */
@Override
public Document getNextPage(Document doc) throws IOException {
	this.offset += 24;
	this.conn.url(urlWithParams(this.offset)).cookies(getDACookie());
	Response re = this.conn.execute();
	//updateCookie(re.cookies());
	Document docu = re.parse();
	Elements messages = docu.getElementsByClass("message");
	LOGGER.info("Current Offset: " + this.offset);

	if (messages.size() > 0) {

		// if message exists -> last page
		LOGGER.info("Messages amount: " + messages.size() + " - Next Page does not exists");
		throw new IOException("No more pages");
	}

	return Http.url(urlWithParams(this.offset)).referrer(referer).userAgent(userAgent).cookies(getDACookie()).get();

}
 
Example 2
Source Project: emotional_analysis   File: SongTest.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * 解析出歌手 专辑
 * <p>Title: test4</p>
 * <p>Description: </p>
 * @throws Exception
 */
@Test
public void test4() throws Exception{
	 Response execute = Jsoup.connect("http://music.163.com/song?id=63650")
				.ignoreContentType(true).execute();
	 Document parse = execute.parse();
	 Elements elements = parse.getElementsByClass("s-fc7");
	 Element singerElement = elements.get(1);
	 Node singerChildNode = singerElement.childNode(0);
	 String singer = singerChildNode.toString();
	 //Album
	 Element albumElement = elements.get(2);
	 Node albumChildNode = albumElement.childNode(0);
	 String album = albumChildNode.toString();
	 System.out.println(singer+"--------"+album);
}
 
Example 3
Source Project: emotional_analysis   File: SearchUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 *获取该歌曲的歌手
 * <p>Title: getSingerById</p>
 * <p>Description: </p>
 * @param songId
 * @return
 */
public static String getSingerById(long songId) throws Exception{
	String singer = "";
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elements = parse.getElementsByClass("s-fc7");
	if(elements.size() > 0){
		Element singerElement = elements.get(1);
		if(elements.size() > 0){
			Node singerChildNode = singerElement.childNode(0);
			singer = singerChildNode.toString();
		}
	}
	return singer;
}
 
Example 4
Source Project: emotional_analysis   File: SearchUtils.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * 获取歌曲名称
 * <p>Title: getSongNameById</p>
 * <p>Description: </p>
 * @param songId
 * @return
 * @throws Exception
 */
public static String getSongNameById(long songId) throws Exception{
	String songName = null;
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	if(elementsByClass.size() > 0){
		Element element = elementsByClass.get(0);
		Node childNode = element.childNode(0);
		songName = childNode.toString();
	}else{
		songName = "ES中歌曲在网易云音乐中找不到";
	}
	return songName;
}
 
Example 5
Source Project: ripme   File: EroShareRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    String urlToDownload = this.url.toExternalForm();
    Response resp = Http.url(urlToDownload.replace("eroshare.com", "eroshae.com"))
                        .ignoreContentType()
                        .response();

    return resp.parse();
}
 
Example 6
Source Project: ripme   File: EroShareRipper.java    License: MIT License 5 votes vote down vote up
public static List<URL> getURLs(URL url) throws IOException{

        Response resp = Http.url(url)
                            .ignoreContentType()
                            .response();

        Document doc = resp.parse();

        List<URL> URLs = new ArrayList<>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
        for (Element img : imgs) {
            if (img.hasClass("album-image")) {
                String imageURL = img.attr("src");
                URLs.add(new URL(imageURL));
            }
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
        for (Element vid : vids) {
            if (vid.hasClass("album-video")) {
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(new URL(videoURL));
            }
        }

        return URLs;
    }
 
Example 7
Source Project: ripme   File: EightmusesRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    if (albumDoc == null) {
        Response resp = Http.url(url).response();
        cookies.putAll(resp.cookies());
        albumDoc = resp.parse();
    }
    return albumDoc;
}
 
Example 8
Source Project: ripme   File: FuraffinityRipper.java    License: MIT License 5 votes vote down vote up
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.isEmpty()) {
            LOGGER.debug("No description at " + page);
            throw new IOException("No description found");
        }
        LOGGER.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        LOGGER.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
 
Example 9
Source Project: ripme   File: SankakuComplexRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    if (albumDoc == null) {
        Response resp = Http.url(url).response();
        cookies.putAll(resp.cookies());
        albumDoc = resp.parse();
    }
    return albumDoc;
}
 
Example 10
Source Project: ripme   File: EromeRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    Response resp = Http.url(this.url)
                        .ignoreContentType()
                        .response();

    return resp.parse();
}
 
Example 11
Source Project: ripme   File: ZizkiRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    if (albumDoc == null) {
        Response resp = Http.url(url).response();
        cookies.putAll(resp.cookies());
        albumDoc = resp.parse();
    }
    return albumDoc;
}
 
Example 12
Source Project: ripme   File: FuskatorRipper.java    License: MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    // return Http.url(url).get();
    Response res = Http.url(url).response();
    cookies = res.cookies();
    return res.parse();
}
 
Example 13
Source Project: MMDownloader   File: Downloader.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * Jsoup을 이용한 HTML 코드 파싱.
 *
 * @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소
 * @return 성공하면 html 코드를 리턴
 */
private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception {
	print.info("고속 연결 시도중...\n");

	// pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음!
	String pageSource = null;

	// POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달
	Response response = Jsoup.connect(eachArchiveAddress)
			.userAgent(UserAgent.getUserAgent())
			.header("charset", "utf-8")
			.header("Accept-Encoding", "gzip") //20171126 gzip 추가
			.timeout(MAX_WAIT_TIME) // timeout
			.data("pass", PASSWORD)    // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음
			.followRedirects(true)
			.execute();

	Document preDoc = response.parse(); //받아온 HTML 코드를 저장

	// <div class="gallery-template">이 만화 담긴 곳.
	if (preDoc.select("div.gallery-template").isEmpty()) {
		throw new RuntimeException("Jsoup Parsing Failed: No tag found");
	} else { // 만약 Jsoup 파싱 시 내용 있으면 성공
		pageSource = preDoc.toString();
	}

	print.info("고속 연결 성공!\n");
	return pageSource; //성공 시 html코드 리턴
}
 
Example 14
Source Project: emotional_analysis   File: IpProxy.java    License: Apache License 2.0 5 votes vote down vote up
public static List<IpEntity> getProxyIp(String url) throws Exception{
	ArrayList<IpEntity> ipList = new ArrayList<>();
	Response execute = Jsoup.connect(url)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "max-age=60").header("Accept", "*/*")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
			.header("Referer", "http://music.163.com/song?id=186016")
			.header("Origin", "http://music.163.com").header("Host", "music.163.com")
			.header("Content-Type", "application/x-www-form-urlencoded")
			.header("Cookie",
					"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
			.method(Method.GET).ignoreContentType(true)
			.timeout(2099999999).execute();
	Document pageJson = execute.parse();
	Element body = pageJson.body();
	List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
	//把前10位的代理IP放到List中
	for(int i = 2;i <= 30;i += 2){
		IpEntity ipEntity = new IpEntity();
		Node node = childNodes.get(i);
		List<Node> nodes = node.childNodes();
		String ip = nodes.get(3).childNode(0).toString();
		int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
		ipEntity.setIp(ip);
		ipEntity.setPort(port);
		ipList.add(ipEntity);
	}
	return ipList;
}
 
Example 15
Source Project: emotional_analysis   File: GetSongName.java    License: Apache License 2.0 5 votes vote down vote up
@Test
public void test1() throws Exception{
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + 91445)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	Element element = elementsByClass.get(0);
	Node childNode = element.childNode(0);
	// 获取歌曲名称
	String songName = childNode.toString();
	System.out.println(songName);
}
 
Example 16
public static Card getCardByBlizzardId(String id) throws IOException{
	Response response= Jsoup.connect("http://metastats.net/cardstats/" + id.toUpperCase() + "/")
	           .ignoreContentType(true)
	           .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0")  
	           .referrer("http://www.google.com")   
	           .timeout(12000) 
	           .followRedirects(true)
	           .execute();
	Document doc = response.parse();
	return getCardByName(doc.getElementsByTag("h3").get(0).text());
}
 
Example 17
Source Project: coolreader   File: DownloadPageTask.java    License: MIT License 5 votes vote down vote up
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
	try {
		Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
		Response response = Jsoup.connect(arg0[0].toString())
								 .timeout(7000)
								 .execute();
		Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
		return new AsyncTaskResult<Document>(response.parse());
	} catch (Exception e) {
		return new AsyncTaskResult<Document>(e);
	}		
}
 
Example 18
Source Project: ripme   File: ErotivRipper.java    License: MIT License 4 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    Response resp = Http.url(this.url).ignoreContentType().response();

    return resp.parse();
}
 
Example 19
Source Project: ripme   File: DeviantartRipper.java    License: MIT License 4 votes vote down vote up
/**
 * Stores logged in Cookies. Needed for art pieces only visible to logged in
 * users. 
 * 
 * 
 * @throws IOException when failed to load webpage or failed to read/write
 *                     cookies in file (used when running multiple instances of
 *                     RipMe)
 */
private void login() throws IOException {

	String customUsername = Utils.getConfigString("DeviantartCustomLoginUsername", this.username);
	String customPassword = Utils.getConfigString("DeviantartCustomLoginPassword", this.password);
	try {
		String dACookies = Utils.getConfigString(utilsKey, null);
		updateCookie(dACookies != null ? deserialize(dACookies) : null);
	} catch (ClassNotFoundException e) {
		e.printStackTrace();
	}
	if (getDACookie() == null || !checkLogin()) {
		LOGGER.info("Do Login now");
		// Do login now

		Map<String, String> tmpCookies = new HashMap<String, String>();
		
		// Load login page
		Response res = Http.url("https://www.deviantart.com/users/login").connection().method(Method.GET)
				.referrer(referer).userAgent(userAgent).execute();

		tmpCookies.putAll(res.cookies());

		// Find tokens
		Document doc = res.parse();
		
		tmpCookies.putAll(res.cookies());
		
		Element form = doc.getElementById("login");
		String token = form.select("input[name=\"validate_token\"]").first().attr("value");
		String key = form.select("input[name=\"validate_key\"]").first().attr("value");
		LOGGER.info("Token: " + token + " & Key: " + key);

		// Build Login Data
		HashMap<String, String> loginData = new HashMap<String, String>();
		loginData.put("challenge", "");
		loginData.put("username", customUsername);
		loginData.put("password", customPassword);
		loginData.put("remember_me", "1");
		loginData.put("validate_token", token);
		loginData.put("validate_key", key);

		// Log in using data. Handle redirect
		res = Http.url("https://www.deviantart.com/users/login").connection().referrer(referer).userAgent(userAgent)
				.method(Method.POST).data(loginData).cookies(tmpCookies).followRedirects(false).execute();
		
		tmpCookies.putAll(res.cookies());

		res = Http.url(res.header("location")).connection().referrer(referer).userAgent(userAgent)
				.method(Method.GET).cookies(tmpCookies).followRedirects(false).execute();

		// Store cookies
		tmpCookies.putAll(res.cookies());
		
		updateCookie(tmpCookies);
		

	} else {
		LOGGER.info("No new Login needed");
	}

	LOGGER.info("DA Cookies: " + getDACookie());
}
 
Example 20
Source Project: ripme   File: HentaifoundryRipper.java    License: MIT License 4 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    Response resp;
    Document doc;

    resp = Http.url("https://www.hentai-foundry.com/?enterAgree=1&size=1500")
            .referrer("https://www.hentai-foundry.com/")
            .cookies(cookies)
            .response();
    // The only cookie that seems to matter in getting around the age wall is the phpsession cookie
    cookies.putAll(resp.cookies());

    doc = resp.parse();
    String csrf_token = doc.select("input[name=YII_CSRF_TOKEN]")
                           .first().attr("value");
    if (csrf_token != null) {
        Map<String,String> data = new HashMap<>();
        data.put("YII_CSRF_TOKEN"  , csrf_token);
        data.put("rating_nudity"   , "3");
        data.put("rating_violence" , "3");
        data.put("rating_profanity", "3");
        data.put("rating_racism"   , "3");
        data.put("rating_sex"      , "3");
        data.put("rating_spoilers" , "3");
        data.put("rating_yaoi"     , "1");
        data.put("rating_yuri"     , "1");
        data.put("rating_teen"     , "1");
        data.put("rating_guro"     , "1");
        data.put("rating_furry"    , "1");
        data.put("rating_beast"    , "1");
        data.put("rating_male"     , "1");
        data.put("rating_female"   , "1");
        data.put("rating_futa"     , "1");
        data.put("rating_other"    , "1");
        data.put("rating_scat"     , "1");
        data.put("rating_incest"   , "1");
        data.put("rating_rape"     , "1");
        data.put("filter_media"    , "A");
        data.put("filter_order"    , Utils.getConfigString("hentai-foundry.filter_order","date_old"));
        data.put("filter_type"     , "0");

        resp = Http.url("https://www.hentai-foundry.com/site/filters")
                   .referrer("https://www.hentai-foundry.com/")
                   .cookies(cookies)
                   .data(data)
                   .method(Method.POST)
                   .response();
        cookies.putAll(resp.cookies());
    }
    else {
        LOGGER.info("unable to find csrf_token and set filter");
    }

    resp = Http.url(url)
            .referrer("https://www.hentai-foundry.com/")
            .cookies(cookies)
            .response();
    cookies.putAll(resp.cookies());
    return resp.parse();
}