org.jsoup.Connection.Response Java Examples

The following examples show how to use org.jsoup.Connection.Response. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: DeviantartRipper.java    From ripme with MIT License 6 votes vote down vote up
/**
 * Checks if the current cookies are still valid/usable. 
 * Also checks if agegate is given.
 * 
 * 
 * @return True when all is good.
 */
private boolean checkLogin() {
	if (!getDACookie().containsKey("agegate_state")) {
		LOGGER.info("No agegate key");
		return false;
	} else if (!getDACookie().get("agegate_state").equals("1")) { // agegate == 1 -> all is fine. NSFW is visible
		LOGGER.info("Wrong agegate value");
		return false;
	}

	try {
		LOGGER.info("Login with Cookies: " + getDACookie());
		Response res = Http.url("https://www.deviantart.com/users/login").connection().followRedirects(true)
				.cookies(getDACookie()).referrer(this.referer).userAgent(this.userAgent).execute();
		if (!res.url().toExternalForm().equals("https://www.deviantart.com/users/login") && !res.url().toExternalForm().startsWith("https://www.deviantart.com/users/wrong-password")) {
			LOGGER.info("Cookies are valid: " + res.url());
			return true;
		} else {
			LOGGER.info("Cookies invalid. Wrong URL: " + res.url() + "  " + res.statusCode());
			return false;
		}
	} catch (IOException e) {
		e.printStackTrace();
		return false;
	}
}
 
Example #2
Source File: DeviantartRipper.java    From ripme with MIT License 6 votes vote down vote up
/**
 * Starts new Thread to find download link + filename + filetype
 * 
 * @param url The URL to an image site.
 */
@Override
protected void downloadURL(URL url, int index) {
	this.downloadCount += 1;
	LOGGER.info("Downloading URL Number " + this.downloadCount);
	LOGGER.info("Deviant Art URL: " + url.toExternalForm());
	try {
		Response re = Http.url(urlWithParams(this.offset)).cookies(getDACookie()).referrer(referer)
				.userAgent(userAgent).response();
		//updateCookie(re.cookies());
	} catch (IOException e) {
		e.printStackTrace();
	}

	// Start Thread and add to pool.
	DeviantartImageThread t = new DeviantartImageThread(url);
	deviantartThreadPool.addThread(t);

}
 
Example #3
Source File: DeviantartRipper.java    From ripme with MIT License 6 votes vote down vote up
/**
 * Returns next page Document using offset.
 */
@Override
public Document getNextPage(Document doc) throws IOException {
	this.offset += 24;
	this.conn.url(urlWithParams(this.offset)).cookies(getDACookie());
	Response re = this.conn.execute();
	//updateCookie(re.cookies());
	Document docu = re.parse();
	Elements messages = docu.getElementsByClass("message");
	LOGGER.info("Current Offset: " + this.offset);

	if (messages.size() > 0) {

		// if message exists -> last page
		LOGGER.info("Messages amount: " + messages.size() + " - Next Page does not exists");
		throw new IOException("No more pages");
	}

	return Http.url(urlWithParams(this.offset)).referrer(referer).userAgent(userAgent).cookies(getDACookie()).get();

}
 
Example #4
Source File: TwodgalleriesRipper.java    From ripme with MIT License 6 votes vote down vote up
private void login() throws IOException {
    Response resp = Http.url(this.url).response();
    cookies = resp.cookies();
    String ctoken = resp.parse().select("form > input[name=ctoken]").first().attr("value");

    Map<String,String> postdata = new HashMap<>();
    postdata.put("user[login]", new String(Base64.decode("cmlwbWU=")));
    postdata.put("user[password]", new String(Base64.decode("cmlwcGVy")));
    postdata.put("rememberme", "1");
    postdata.put("ctoken", ctoken);

    resp = Http.url("http://en.2dgalleries.com/account/login")
               .referrer("http://en.2dgalleries.com/")
               .cookies(cookies)
               .data(postdata)
               .method(Method.POST)
               .response();
    cookies = resp.cookies();
}
 
Example #5
Source File: ArtStationRipper.java    From ripme with MIT License 6 votes vote down vote up
private JSONObject getJson(URL url) throws IOException {
        Connection con = Http.url(url).method(Method.GET).connection();
        con.ignoreHttpErrors(true);
        con.ignoreContentType(true);
        con.userAgent(
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
        con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        con.header("Accept-Language", "en-US,en;q=0.5");
//        con.header("Accept-Encoding", "gzip, deflate, br");
        con.header("Upgrade-Insecure-Requests", "1");
        Response res = con.execute();
        int status = res.statusCode();
        if (status / 100 == 2) {
            String jsonString = res.body();
            return new JSONObject(jsonString);
        }
        throw new IOException("Error fetching json. Status code:" + status);
    }
 
Example #6
Source File: SearchUtils.java    From emotional_analysis with Apache License 2.0 6 votes vote down vote up
/**
 * 获取歌曲名称
 * <p>Title: getSongNameById</p>
 * <p>Description: </p>
 * @param songId
 * @return
 * @throws Exception
 */
public static String getSongNameById(long songId) throws Exception{
	String songName = null;
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	if(elementsByClass.size() > 0){
		Element element = elementsByClass.get(0);
		Node childNode = element.childNode(0);
		songName = childNode.toString();
	}else{
		songName = "ES中歌曲在网易云音乐中找不到";
	}
	return songName;
}
 
Example #7
Source File: SearchUtils.java    From emotional_analysis with Apache License 2.0 6 votes vote down vote up
/**
 *获取该歌曲的歌手
 * <p>Title: getSingerById</p>
 * <p>Description: </p>
 * @param songId
 * @return
 */
public static String getSingerById(long songId) throws Exception{
	String singer = "";
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + songId)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elements = parse.getElementsByClass("s-fc7");
	if(elements.size() > 0){
		Element singerElement = elements.get(1);
		if(elements.size() > 0){
			Node singerChildNode = singerElement.childNode(0);
			singer = singerChildNode.toString();
		}
	}
	return singer;
}
 
Example #8
Source File: SongTest.java    From emotional_analysis with Apache License 2.0 6 votes vote down vote up
/**
 * 解析出歌手 专辑
 * <p>Title: test4</p>
 * <p>Description: </p>
 * @throws Exception
 */
@Test
public void test4() throws Exception{
	 Response execute = Jsoup.connect("http://music.163.com/song?id=63650")
				.ignoreContentType(true).execute();
	 Document parse = execute.parse();
	 Elements elements = parse.getElementsByClass("s-fc7");
	 Element singerElement = elements.get(1);
	 Node singerChildNode = singerElement.childNode(0);
	 String singer = singerChildNode.toString();
	 //Album
	 Element albumElement = elements.get(2);
	 Node albumChildNode = albumElement.childNode(0);
	 String album = albumChildNode.toString();
	 System.out.println(singer+"--------"+album);
}
 
Example #9
Source File: PersonalTest.java    From emotional_analysis with Apache License 2.0 6 votes vote down vote up
/**
 * 抓取个人页面
 * <p>Title: test2</p>
 * <p>Description: </p>
 * @throws Exception
 */
@Test
public void test2() throws Exception{
	System.setProperty("http.maxRedirects", "5000");
	System.getProperties().setProperty("proxySet", "true");
	// 如果不设置,只要代理IP和代理端口正确,此项不设置也可以
	System.getProperties().setProperty("http.proxyHost", "139.224.80.139");
	System.getProperties().setProperty("http.proxyPort", "3128");
	String secKey = new BigInteger(100, new SecureRandom()).toString(32).substring(0, 16);
       String encText = EncryptUtils.aesEncrypt(EncryptUtils.aesEncrypt("{\"uid\":2763211,\"offset\":0,\"limit\":50};","0CoJUm6Qyw8W8jud"), secKey);
       String encSecKey = EncryptUtils.rsaEncrypt(secKey);
    Response execute = Jsoup.connect("http://music.163.com/weapi/user/playlist")
			.data("params",encText)
			.data("encSecKey",encSecKey)
			.method(Method.POST).ignoreContentType(true).execute();
	String string = execute.body().toString();
	System.out.println(string);
}
 
Example #10
Source File: CommonThread.java    From emotional_analysis with Apache License 2.0 5 votes vote down vote up
@Override
public void run() {
	
	for(int i = 0;i <= pageSize;i++){
		try{
			String secKey = new BigInteger(100, new SecureRandom()).toString(32).substring(0, 16);//limit
			String encText = EncryptUtils.aesEncrypt(EncryptUtils.aesEncrypt("{\"offset\":"+ i * 10 +",\"limit\":"+(i+1) * 10+"};","0CoJUm6Qyw8W8jud"), secKey);
	        String encSecKey = EncryptUtils.rsaEncrypt(secKey);
		    Response execute = Jsoup.connect("http://music.163.com/weapi/v1/resource/comments/R_SO_4_"+songId)
					.data("params",encText)
					.data("encSecKey",encSecKey)
					.method(Method.POST).ignoreContentType(true).timeout(2000000000).execute();
			String string = execute.body().toString();
			System.out.println(string);
			ObjectMapper objectMapper = new ObjectMapper();
			CommentBean readValue = objectMapper.readValue(string.getBytes(), CommentBean.class);
			long total = readValue.getTotal();
			pageSize = total / 10;
			List<Comments> comments = readValue.getComments();
			for (Comments comments2 : comments) {
				String content = comments2.getContent();
				long time = comments2.getTime();
				User user = comments2.getUser();
				String avatarUrl = user.getAvatarUrl();
				String nickname = user.getNickname();
				long userId = user.getUserId();
				//=========================================数据持久化==========================
				System.out.println("昵称:"+nickname+"评论内容为:"+content+"评论时间为:"+time+"头像地址"+avatarUrl+"用户的ID"+userId);
			}
		}catch(Exception e){
		}
	  }
	}
 
Example #11
Source File: Downloader.java    From MMDownloader with Apache License 2.0 5 votes vote down vote up
/**
 * Jsoup을 이용한 HTML 코드 파싱.
 *
 * @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소
 * @return 성공하면 html 코드를 리턴
 */
private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception {
	print.info("고속 연결 시도중...\n");

	// pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음!
	String pageSource = null;

	// POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달
	Response response = Jsoup.connect(eachArchiveAddress)
			.userAgent(UserAgent.getUserAgent())
			.header("charset", "utf-8")
			.header("Accept-Encoding", "gzip") //20171126 gzip 추가
			.timeout(MAX_WAIT_TIME) // timeout
			.data("pass", PASSWORD)    // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음
			.followRedirects(true)
			.execute();

	Document preDoc = response.parse(); //받아온 HTML 코드를 저장

	// <div class="gallery-template">이 만화 담긴 곳.
	if (preDoc.select("div.gallery-template").isEmpty()) {
		throw new RuntimeException("Jsoup Parsing Failed: No tag found");
	} else { // 만약 Jsoup 파싱 시 내용 있으면 성공
		pageSource = preDoc.toString();
	}

	print.info("고속 연결 성공!\n");
	return pageSource; //성공 시 html코드 리턴
}
 
Example #12
Source File: IpProxy.java    From emotional_analysis with Apache License 2.0 5 votes vote down vote up
public static List<IpEntity> getProxyIp(String url) throws Exception{
	ArrayList<IpEntity> ipList = new ArrayList<>();
	Response execute = Jsoup.connect(url)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "max-age=60").header("Accept", "*/*")
			.header("Accept-Language", "zh-CN,zh;q=0.8,en;q=0.6").header("Connection", "keep-alive")
			.header("Referer", "http://music.163.com/song?id=186016")
			.header("Origin", "http://music.163.com").header("Host", "music.163.com")
			.header("Content-Type", "application/x-www-form-urlencoded")
			.header("Cookie",
					"UM_distinctid=15e9863cf14335-0a09f939cd2af9-6d1b137c-100200-15e9863cf157f1; vjuids=414b87eb3.15e9863cfc1.0.ec99d6f660d09; _ntes_nnid=4543481cc76ab2fd3110ecaafd5f1288,1505795231854; _ntes_nuid=4543481cc76ab2fd3110ecaafd5f1288; __s_=1; __gads=ID=6cbc4ab41878c6b9:T=1505795247:S=ALNI_MbCe-bAY4kZyMbVKlS4T2BSuY75kw; usertrack=c+xxC1nMphjBCzKpBPJjAg==; NTES_CMT_USER_INFO=100899097%7Cm187****4250%7C%7Cfalse%7CbTE4NzAzNDE0MjUwQDE2My5jb20%3D; [email protected]|1507178162|2|mail163|00&99|CA&1506163335&mail163#hun&430800#10#0#0|187250&1|163|[email protected]; vinfo_n_f_l_n3=8ba0369be425c0d2.1.7.1505795231863.1507950353704.1508150387844; vjlast=1505795232.1508150167.11; Province=0450; City=0454; _ga=GA1.2.1044198758.1506584097; _gid=GA1.2.763458995.1508907342; JSESSIONID-WYYY=Zm%2FnBG6%2B1vb%2BfJp%5CJP8nIyBZQfABmnAiIqMM8fgXABoqI0PdVq%2FpCsSPDROY1APPaZnFgh14pR2pV9E0Vdv2DaO%2BKkifMncYvxRVlOKMEGzq9dTcC%2F0PI07KWacWqGpwO88GviAmX%2BVuDkIVNBEquDrJ4QKhTZ2dzyGD%2Bd2T%2BbiztinJ%3A1508946396692; _iuqxldmzr_=32; playerid=20572717; MUSIC_U=39d0b2b5e15675f10fd5d9c05e8a5d593c61fcb81368d4431bab029c28eff977d4a57de2f409f533b482feaf99a1b61e80836282123441c67df96e4bf32a71bc38be3a5b629323e7bf122d59fa1ed6a2; __remember_me=true; __csrf=2032a8f34f1f92412a49ba3d6f68b2db; __utma=94650624.1044198758.1506584097.1508939111.1508942690.40; __utmb=94650624.20.10.1508942690; __utmc=94650624; __utmz=94650624.1508394258.18.4.utmcsr=xujin.org|utmccn=(referral)|utmcmd=referral|utmcct=/")
			.method(Method.GET).ignoreContentType(true)
			.timeout(2099999999).execute();
	Document pageJson = execute.parse();
	Element body = pageJson.body();
	List<Node> childNodes = body.childNode(11).childNode(3).childNode(5).childNode(1).childNodes();
	//把前10位的代理IP放到List中
	for(int i = 2;i <= 30;i += 2){
		IpEntity ipEntity = new IpEntity();
		Node node = childNodes.get(i);
		List<Node> nodes = node.childNodes();
		String ip = nodes.get(3).childNode(0).toString();
		int port = Integer.parseInt(nodes.get(5).childNode(0).toString());
		ipEntity.setIp(ip);
		ipEntity.setPort(port);
		ipList.add(ipEntity);
	}
	return ipList;
}
 
Example #13
Source File: FollowingTest.java    From emotional_analysis with Apache License 2.0 5 votes vote down vote up
@Test
	public void test3() throws Exception {
		String secKey = new BigInteger(100, new SecureRandom()).toString(32).substring(0, 16);
		String encSecKey = EncryptUtils.rsaEncrypt(secKey);
		String encText = EncryptUtils.aesEncrypt(
				EncryptUtils.aesEncrypt("{\"userId\":63362967,\"offset\":0,\"limit\":100};", "0CoJUm6Qyw8W8jud"),
				secKey);
		Response execute = Jsoup.connect("http://music.163.com/weapi/user/getfolloweds").data("params", encText)
				.data("encSecKey", encSecKey).method(Method.POST).ignoreContentType(true).execute();
//		String string = execute.body().toString();
//		System.out.println(string);
//		System.out.println(string.equals("{\"code\":200,\"more\":false,\"followeds\":[]}"));
//		new FileSourceUtils().importData(string, "follow");
		int i = 0;
		while (true) {
			encText = EncryptUtils.aesEncrypt(
					EncryptUtils.aesEncrypt("{\"userId\":92271210,\"offset\":" + i + ",\"limit\":" + 100 + "};",
							"0CoJUm6Qyw8W8jud"),
					secKey);
			execute = Jsoup.connect("http://music.163.com/weapi/user/getfolloweds").data("params", encText)
					.data("encSecKey", encSecKey).method(Method.POST).ignoreContentType(true).execute();
			String string1 = execute.body().toString();
			if(string1.equals("") || i == 1000){
				break;
			}
			System.out.println(string1);
			new FileSourceUtils().importData(string1,"follow");
			i += 100;
		}
	}
 
Example #14
Source File: FollowerTest.java    From emotional_analysis with Apache License 2.0 5 votes vote down vote up
/**
 * <p>Title: test3</p>
 * <p>Description: </p>
 * @throws Exception 
 */
@Test
public void test3() throws Exception{
		String secKey = new BigInteger(100, new SecureRandom()).toString(32).substring(0, 16);
        String encText = EncryptUtils.aesEncrypt(EncryptUtils.aesEncrypt("{\"offset\":0,\"offset\":0,\"limit\":50};","0CoJUm6Qyw8W8jud"), secKey);
        String encSecKey = EncryptUtils.rsaEncrypt(secKey);
	    Response execute = Jsoup.connect("http://music.163.com/weapi/user/getfollows/380377129")
				.data("params",encText)
				.data("encSecKey",encSecKey)
				.method(Method.POST).ignoreContentType(true).execute();
		String string = execute.body().toString();
		System.out.println(string);
}
 
Example #15
Source File: PersonalTest.java    From emotional_analysis with Apache License 2.0 5 votes vote down vote up
/**
 * 个人动态
 * @throws Exception
 */
@Test
public void test3() throws Exception{
	String secKey = new BigInteger(100, new SecureRandom()).toString(32).substring(0, 16);
       String encText = EncryptUtils.aesEncrypt(EncryptUtils.aesEncrypt("{\"uid\":2763211,\"offset\":0,\"limit\":50};","0CoJUm6Qyw8W8jud"), secKey);
       String encSecKey = EncryptUtils.rsaEncrypt(secKey);
    Response execute = Jsoup.connect("http://music.163.com/weapi/event/get/2763211")
			.data("params",encText)
			.data("encSecKey",encSecKey)
			.method(Method.POST).ignoreContentType(true).execute();
	String string = execute.body().toString();
	System.out.println(string);
}
 
Example #16
Source File: FileServiceImpl.java    From albert with MIT License 5 votes vote down vote up
public byte[] getTorrent(String href) {
	Response response=null;
	try {
		response = getConnect(href).ignoreContentType(true).execute();
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
	
	return response.bodyAsBytes();
}
 
Example #17
Source File: GetSongName.java    From emotional_analysis with Apache License 2.0 5 votes vote down vote up
@Test
public void test1() throws Exception{
	Response execute = Jsoup.connect("http://music.163.com/m/song?id=" + 91445)
			.header("User-Agent",
					"Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36")
			.header("Cache-Control", "no-cache").timeout(2000000000)
			.execute();
	Document parse = execute.parse();
	Elements elementsByClass = parse.getElementsByClass("f-ff2");
	Element element = elementsByClass.get(0);
	Node childNode = element.childNode(0);
	// 获取歌曲名称
	String songName = childNode.toString();
	System.out.println(songName);
}
 
Example #18
Source File: AmazonLoginApater.java    From crawler-jsoup-maven with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) {
    // grab login form page first
    try {
        
        //lets make data map containing all the parameters and its values found in the form
        Map<String, String> mapParamsData = new HashMap<String, String>();
        mapParamsData.put("email", "[email protected]");
        mapParamsData.put("password", "bluetata");
        
        Response loginResponse = Jsoup.connect("https://passport.jd.com/new/login.aspx")
                .userAgent(USER_AGENT)
                .timeout(TIMEOUT_UNIT * TIMEOUT_TIMES)
                .data(mapParamsData)
                .method(Method.POST)
                .followRedirects(true)
                .execute();
        
        System.out.println("Fetched login page");
        // System.out.println(loginResponse.toString());
        
      //get the cookies from the response, which we will post to the action URL
        Map<String, String> mapLoginPageCookies = loginResponse.cookies();
        
        System.out.println(mapLoginPageCookies);
        
    } catch (Exception e) {
        e.printStackTrace();
    }
}
 
Example #19
Source File: TestApiOfConnect.java    From crawler-jsoup-maven with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
//        Connection connection = Jsoup.connect("http://bluetata.com");

//        // connection.data("aaa","ccc");  // 这是重点
//
//        connection.header("Content-Type", "application/json; charset=UTF-8");  // 这是重点
//
//        connection.header("Accept", "text/plain, */*; q=0.01");  
//
//        connection.timeout(15000);  
//
//        //String body = "{\"CategoryType\":\"SiteHome\",\"ParentCategoryId\":0,\"CategoryId\":808,\"PageIndex\":2,\"TotalPostCount\":4000,\"ItemListActionName\":\"PostList\"}"; 
//
//        //connection.requestBody(body);  
//
//        Document document = connection.post();
        
        
        String jsonBody = "{\"name\":\"ACTIVATE\",\"value\":\"E0010\"}";
        
        Connection connection = Jsoup.connect("http://bluetata.com/")
                .userAgent("Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36") // User-Agent of Chrome 55
                .referrer("http://bluetata.com/")
                .header("Content-Type", "application/json; charset=UTF-8")
                .header("Accept", "text/plain, */*; q=0.01")
                .header("Accept-Encoding", "gzip,deflate,sdch")
                .header("Accept-Language", "es-ES,es;q=0.8")
                .header("Connection", "keep-alive")
                .header("X-Requested-With", "XMLHttpRequest")
                .requestBody(jsonBody)
                .maxBodySize(100)
                .timeout(1000 * 10)
                .method(Connection.Method.POST);

        Response response = connection.execute();
        
        
    }
 
Example #20
Source File: AccResHead2ParseHTML.java    From crawler-jsoup-maven with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        
        // String _url = "http://61.161.221.4:8088/mainservlet?actionType=INDEX";
        String _url = "https://rate.taobao.com/feedRateList.htm?auctionNumId=552463737787&userNumId=1703495879&currentPageNum=1&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hvqvvRvPpvUpCkvvvvvjiPPLcyljlbRsqwsjnEPmPpsjt8RLMvtjiER2q9ljnvRsyCvvBvpvvv9phv2n1w3xqBzYswM20S79wCvvNwzHi4z0CNiQhvChCvCCptvpvhphvvvvyCvh1vVxOvITlz8eQEfaAK53n5WDKt5BwsWD6rfuVHR4hSoAZnD704deDHEcqhaXTAVAIanixreTt%2BCclWQRp4e0Q4b64B9CkaU6UsxI2hKphv8hCvvvvvvhCvphvZJ9vvpu1vpC9CvvC216CvHjIvvhPjphvZK9vvpYJivpvUphvh3cUYvR7EvpvVp6WUCEIXvphvCyCCvvvvvvGCvvpvvvvv3QhvChCCvvvtvpvhphvvv86CvvDvppWpJpCv7OQ%3D&_ksTS=1519956772500_2207&callback=jsonp_tbcrate_reviews_list";
        String defaultCharset = "UTF-8";
        
        // according to response header to get parsed page char-set.
        Connection connection = Jsoup.connect(_url)
                .userAgent("Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36") // User-Agent of Chrome 55
                .referrer("http://blog.csdn.net/")
                .header("Content-Type", "application/json; charset=GBK")
                .header("Accept", "text/plain, */*; q=0.01")
                .header("Accept-Encoding", "gzip,deflate,sdch")
                .header("Accept-Language", "es-ES,es;q=0.8")
                .header("Connection", "keep-alive")
                .header("X-Requested-With", "XMLHttpRequest")
                .maxBodySize(100)
                .timeout(1000 * 10)
                .method(Connection.Method.POST);

        Response response = connection.execute();
        String charset = response.charset();
        System.out.println("charset:" + charset);
        
        if (null != charset && !charset.isEmpty()) {
            defaultCharset = charset;
        }
        
//        Document doc = Jsoup.connect("http://blog.csdn.net/")
//                .cookies(response.cookies())
//                .timeout(10 * 10000)
//                .get();
        
        Document doc = Jsoup.parse(new URL(_url).openStream(), defaultCharset, _url);
        System.out.println(doc);
        
        System.out.println("----------:" + JsoupUtil.getStaticCharset(doc));
    }
 
Example #21
Source File: WebConnector.java    From JavaSkype with MIT License 5 votes vote down vote up
private Response sendRequest(Method method, String apiPath, boolean absoluteApiPath, String... keyval) throws IOException {
  String url = absoluteApiPath ? apiPath : SERVER_HOSTNAME + apiPath;
  Connection conn = Jsoup.connect(url).maxBodySize(100 * 1024 * 1024).timeout(10000).method(method).ignoreContentType(true).ignoreHttpErrors(true);
  logger.finest("Sending " + method + " request at " + url);
  if (skypeToken != null) {
    conn.header("X-Skypetoken", skypeToken);
  } else {
    logger.fine("No token sent for the request at: " + url);
  }
  conn.data(keyval);
  return conn.execute();
}
 
Example #22
Source File: CardCatalogue.java    From metastone with GNU General Public License v2.0 5 votes vote down vote up
public static Card getCardByBlizzardId(String id) throws IOException{
	Response response= Jsoup.connect("http://metastats.net/cardstats/" + id.toUpperCase() + "/")
	           .ignoreContentType(true)
	           .userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:25.0) Gecko/20100101 Firefox/25.0")  
	           .referrer("http://www.google.com")   
	           .timeout(12000) 
	           .followRedirects(true)
	           .execute();
	Document doc = response.parse();
	return getCardByName(doc.getElementsByTag("h3").get(0).text());
}
 
Example #23
Source File: DownloadPageTask.java    From coolreader with MIT License 5 votes vote down vote up
@Override
protected AsyncTaskResult<Document> doInBackground(URL... arg0) {
	try {
		Log.d("DownloadPageTask", "Downloading: " + arg0[0].toString());
		Response response = Jsoup.connect(arg0[0].toString())
								 .timeout(7000)
								 .execute();
		Log.d("DownloadPageTask", "Complete: " + arg0[0].toString());
		return new AsyncTaskResult<Document>(response.parse());
	} catch (Exception e) {
		return new AsyncTaskResult<Document>(e);
	}		
}
 
Example #24
Source File: MovieServiceImpl.java    From albert with MIT License 5 votes vote down vote up
@Override
public byte[] getTorrent(String href) {
	Response response=null;
	try {
		response = getConnect(href).ignoreContentType(true).execute();
	} catch (IOException e) {
		throw new RuntimeException(e);
	}
	
	return response.bodyAsBytes();
}
 
Example #25
Source File: EroShareRipper.java    From ripme with MIT License 5 votes vote down vote up
public static List<URL> getURLs(URL url) throws IOException{

        Response resp = Http.url(url)
                            .ignoreContentType()
                            .response();

        Document doc = resp.parse();

        List<URL> URLs = new ArrayList<>();
        //Pictures
        Elements imgs = doc.getElementsByTag("img");
        for (Element img : imgs) {
            if (img.hasClass("album-image")) {
                String imageURL = img.attr("src");
                URLs.add(new URL(imageURL));
            }
        }
        //Videos
        Elements vids = doc.getElementsByTag("video");
        for (Element vid : vids) {
            if (vid.hasClass("album-video")) {
                Elements source = vid.getElementsByTag("source");
                String videoURL = source.first().attr("src");
                URLs.add(new URL(videoURL));
            }
        }

        return URLs;
    }
 
Example #26
Source File: EroShareRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    String urlToDownload = this.url.toExternalForm();
    Response resp = Http.url(urlToDownload.replace("eroshare.com", "eroshae.com"))
                        .ignoreContentType()
                        .response();

    return resp.parse();
}
 
Example #27
Source File: ThechiveRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public Document getNextPage(Document doc) throws IOException {
    Matcher matcher = p1.matcher(url.toExternalForm());

    if (matcher.matches()) {
        // url type thechive.com/YEAR/MONTH/DAY/POSTTITLE/ has a single page.
        return null;
    } else {
        if (nextSeed == null) {
            throw new IOException("No more pages.");
        }
    }

    // Following try block checks if the next JSON object has images or not.
    // This is done to avoid IOException in rip() method, caused when
    // getURLsFromPage() returns empty list.
    JSONArray imgList;
    try {
        Response response = Http.url(jsonUrl).data("seed", nextSeed).data("queryType", "by-username")
                .data("username", username).ignoreContentType().cookies(cookies).response();
        cookies = response.cookies();
        JSONObject json = new JSONObject(response.body());
        imgList = json.getJSONArray("uploads");
    } catch (Exception e) {
        throw new IOException("Error fetching next page.", e);
    }

    if (imgList != null && imgList.length() > 0) {
        // Pass empty document as it is of no use for thechive.com/userName url type.
        return new Document(url.toString());
    } else {
        // Return null as this is last page.
        return null;
    }
}
 
Example #28
Source File: EightmusesRipper.java    From ripme with MIT License 5 votes vote down vote up
@Override
public Document getFirstPage() throws IOException {
    if (albumDoc == null) {
        Response resp = Http.url(url).response();
        cookies.putAll(resp.cookies());
        albumDoc = resp.parse();
    }
    return albumDoc;
}
 
Example #29
Source File: FuraffinityRipper.java    From ripme with MIT License 5 votes vote down vote up
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.isEmpty()) {
            LOGGER.debug("No description at " + page);
            throw new IOException("No description found");
        }
        LOGGER.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        LOGGER.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
 
Example #30
Source File: VkRipper.java    From ripme with MIT License 5 votes vote down vote up
private Map<String,String> getPhotoIDsToURLs(String photoID) throws IOException {
    Map<String,String> photoIDsToURLs = new HashMap<>();
    Map<String,String> postData = new HashMap<>();
    // act=show&al=1&list=album45506334_172415053&module=photos&photo=45506334_304658196
    postData.put("list", getGID(this.url));
    postData.put("act", "show");
    postData.put("al", "1");
    postData.put("module", "photos");
    postData.put("photo", photoID);
    Response res = Jsoup.connect("https://vk.com/al_photos.php")
            .header("Referer", this.url.toExternalForm())
            .header("Accept", "*/*")
            .header("Accept-Language", "en-US,en;q=0.5")
            .header("Content-Type", "application/x-www-form-urlencoded")
            .header("X-Requested-With", "XMLHttpRequest")
            .ignoreContentType(true)
            .userAgent(USER_AGENT)
            .timeout(5000)
            .data(postData)
            .method(Method.POST)
            .execute();
    String jsonString = res.body();
    JSONObject json = new JSONObject(jsonString);
    JSONObject photoObject = findJSONObjectContainingPhotoId(photoID, json);
    String bestSourceUrl = getBestSourceUrl(photoObject);

    if (bestSourceUrl != null) {
        photoIDsToURLs.put(photoID, bestSourceUrl);
    } else {
        LOGGER.error("Could not find image source for " + photoID);
    }

    return photoIDsToURLs;
}