Java Code Examples for org.jsoup.Connection#execute()

The following examples show how to use org.jsoup.Connection#execute() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
/**
 * Test fetching a form, and submitting it with a file attached.
 */
@Test
public void postHtmlFile() throws IOException {
    Document index = Jsoup.connect("http://direct.infohound.net/tidy/").get();
    FormElement form = index.select("[name=tidy]").forms().get(0);
    Connection post = form.submit();

    File uploadFile = ParseTest.getFile("/htmltests/google-ipod.html");
    FileInputStream stream = new FileInputStream(uploadFile);
    
    Connection.KeyVal fileData = post.data("_file");
    fileData.value("check.html");
    fileData.inputStream(stream);

    Connection.Response res;
    try {
        res = post.execute();
    } finally {
        stream.close();
    }

    Document out = res.parse();
    assertTrue(out.text().contains("HTML Tidy Complete"));
}
 
Example 2
Source File: ArtStationRipper.java    From ripme with MIT License 6 votes vote down vote up
private JSONObject getJson(URL url) throws IOException {
        Connection con = Http.url(url).method(Method.GET).connection();
        con.ignoreHttpErrors(true);
        con.ignoreContentType(true);
        con.userAgent(
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
        con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        con.header("Accept-Language", "en-US,en;q=0.5");
//        con.header("Accept-Encoding", "gzip, deflate, br");
        con.header("Upgrade-Insecure-Requests", "1");
        Response res = con.execute();
        int status = res.statusCode();
        if (status / 100 == 2) {
            String jsonString = res.body();
            return new JSONObject(jsonString);
        }
        throw new IOException("Error fetching json. Status code:" + status);
    }
 
Example 3
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test
public void handles200WithNoContent() throws IOException {
    Connection con = Jsoup
        .connect("http://direct.infohound.net/tools/200-no-content.pl")
        .userAgent(browserUa);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(200, res.statusCode());

    con = Jsoup
        .connect("http://direct.infohound.net/tools/200-no-content.pl")
        .parser(Parser.xmlParser())
        .userAgent(browserUa);
    res = con.execute();
    doc = res.parse();
    assertEquals(200, res.statusCode());
}
 
Example 4
Source File: PasswordNetworkManager.java    From Shaarlier with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Method which publishes a link to shaarli
 * Assume being logged in
 * TODO: use the prefetch function
 */
@Override
public void pushLink(Link link) throws IOException {
    String encodedShareUrl = URLEncoder.encode(link.getUrl(), "UTF-8");
    retrievePostLinkToken(encodedShareUrl);

    if (NetworkUtils.isUrl(link.getUrl())) { // In case the url isn't really one, just post the one chosen by the server.
        this.mSharedUrl = link.getUrl();
    }

    final String postUrl = this.mShaarliUrl + "?post=" + encodedShareUrl;

    Connection postPageConn = this.newConnection(postUrl, Connection.Method.POST)
            .data("save_edit", "Save")
            .data("token", this.mToken)
            .data("lf_tags", link.getTags())
            .data("lf_linkdate", this.mDatePostLink)
            .data("lf_url", this.mSharedUrl)
            .data("lf_title", link.getTitle())
            .data("lf_description", link.getDescription());
    if (link.isPrivate()) postPageConn.data("lf_private", "on");
    if (link.isTweet()) postPageConn.data("tweet", "on");
    if (link.isToot()) postPageConn.data("toot", "on");
    postPageConn.execute(); // Then we post
}
 
Example 5
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void ignores500tExceptionIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/500.pl").ignoreHttpErrors(true);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(500, res.statusCode());
    assertEquals("Application Error", res.statusMessage());
    assertEquals("Woops", doc.select("h1").first().text());
}
 
Example 6
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void ignores200NoWithContentExceptionIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/200-no-content.pl").ignoreHttpErrors(true);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(200, res.statusCode());
    assertEquals("All Good", res.statusMessage());
}
 
Example 7
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void ignoresExceptionIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/404").ignoreHttpErrors(true);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(404, res.statusCode());
    assertEquals("404 Not Found", doc.select("h1").first().text());
}
 
Example 8
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void ignores500tExceptionIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/500.pl").ignoreHttpErrors(true);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(500, res.statusCode());
    assertEquals("Application Error", res.statusMessage());
    assertEquals("Woops", doc.select("h1").first().text());
}
 
Example 9
Source File: UrlConnectTest.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Test
public void multiCookieSet() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
    Connection.Response res = con.execute();

    // test cookies set by redirect:
    Map<String, String> cookies = res.cookies();
    assertEquals("asdfg123", cookies.get("token"));
    assertEquals("jhy", cookies.get("uid"));

    // send those cookies into the echo URL by map:
    Document doc = Jsoup.connect(echoURL).cookies(cookies).get();
    assertEquals("uid=jhy; token=asdfg123", ihVal("HTTP_COOKIE", doc));
}
 
Example 10
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void doesntRedirectIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl").followRedirects(false);
    Connection.Response res = con.execute();
    assertEquals(302, res.statusCode());
    assertEquals("http://jsoup.org", res.header("Location"));
}
 
Example 11
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void redirectsResponseCookieToNextResponse() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
    Connection.Response res = con.execute();
    assertEquals("asdfg123", res.cookie("token")); // confirms that cookies set on 1st hit are presented in final result
    Document doc = res.parse();
    assertEquals("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc)); // confirms that redirected hit saw cookie
}
 
Example 12
Source File: LoadUserDetailsTask.java    From SteamGifts with MIT License 5 votes vote down vote up
@Override
protected List<Giveaway> doInBackground(Void... params) {
    Log.d(TAG, "Fetching giveaways for user " + path + " on page " + page);

    try {
        // Fetch the Giveaway page
        Connection connection = Jsoup.connect("https://www.steamgifts.com/user/" + path + "/search")
                .userAgent(Constants.JSOUP_USER_AGENT)
                .timeout(Constants.JSOUP_TIMEOUT);
        connection.data("page", Integer.toString(page));
        if (SteamGiftsUserData.getCurrent(fragment.getContext()).isLoggedIn()) {
            connection.cookie("PHPSESSID", SteamGiftsUserData.getCurrent(fragment.getContext()).getSessionId());
            connection.followRedirects(false);
        }

        Connection.Response response = connection.execute();
        Document document = response.parse();

        if (response.statusCode() == 200) {

            SteamGiftsUserData.extract(fragment.getContext(), document);

            if (!user.isLoaded())
                foundXsrfToken = Utils.loadUserProfile(user, document);

            // Parse all rows of giveaways
            return Utils.loadGiveawaysFromList(document);
        } else {
            Log.w(TAG, "Got status code " + response.statusCode());
            return null;
        }
    } catch (Exception e) {
        Log.e(TAG, "Error fetching URL", e);
        return null;
    }
}
 
Example 13
Source File: UrlConnectTest.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Test
public void ignoresExceptionIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/404").ignoreHttpErrors(true);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(404, res.statusCode());
    assertEquals("404 Not Found", doc.select("h1").first().text());
}
 
Example 14
Source File: AccResHead2ParseHTML.java    From crawler-jsoup-maven with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws Exception {
        
        // String _url = "http://61.161.221.4:8088/mainservlet?actionType=INDEX";
        String _url = "https://rate.taobao.com/feedRateList.htm?auctionNumId=552463737787&userNumId=1703495879&currentPageNum=1&pageSize=20&rateType=&orderType=sort_weight&attribute=&sku=&hasSku=false&folded=0&ua=098%23E1hvqvvRvPpvUpCkvvvvvjiPPLcyljlbRsqwsjnEPmPpsjt8RLMvtjiER2q9ljnvRsyCvvBvpvvv9phv2n1w3xqBzYswM20S79wCvvNwzHi4z0CNiQhvChCvCCptvpvhphvvvvyCvh1vVxOvITlz8eQEfaAK53n5WDKt5BwsWD6rfuVHR4hSoAZnD704deDHEcqhaXTAVAIanixreTt%2BCclWQRp4e0Q4b64B9CkaU6UsxI2hKphv8hCvvvvvvhCvphvZJ9vvpu1vpC9CvvC216CvHjIvvhPjphvZK9vvpYJivpvUphvh3cUYvR7EvpvVp6WUCEIXvphvCyCCvvvvvvGCvvpvvvvv3QhvChCCvvvtvpvhphvvv86CvvDvppWpJpCv7OQ%3D&_ksTS=1519956772500_2207&callback=jsonp_tbcrate_reviews_list";
        String defaultCharset = "UTF-8";
        
        // according to response header to get parsed page char-set.
        Connection connection = Jsoup.connect(_url)
                .userAgent("Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36") // User-Agent of Chrome 55
                .referrer("http://blog.csdn.net/")
                .header("Content-Type", "application/json; charset=GBK")
                .header("Accept", "text/plain, */*; q=0.01")
                .header("Accept-Encoding", "gzip,deflate,sdch")
                .header("Accept-Language", "es-ES,es;q=0.8")
                .header("Connection", "keep-alive")
                .header("X-Requested-With", "XMLHttpRequest")
                .maxBodySize(100)
                .timeout(1000 * 10)
                .method(Connection.Method.POST);

        Response response = connection.execute();
        String charset = response.charset();
        System.out.println("charset:" + charset);
        
        if (null != charset && !charset.isEmpty()) {
            defaultCharset = charset;
        }
        
//        Document doc = Jsoup.connect("http://blog.csdn.net/")
//                .cookies(response.cookies())
//                .timeout(10 * 10000)
//                .get();
        
        Document doc = Jsoup.parse(new URL(_url).openStream(), defaultCharset, _url);
        System.out.println(doc);
        
        System.out.println("----------:" + JsoupUtil.getStaticCharset(doc));
    }
 
Example 15
Source File: TestApiOfConnect.java    From crawler-jsoup-maven with Apache License 2.0 5 votes vote down vote up
public static void main(String[] args) throws IOException {
        // TODO Auto-generated method stub
//        Connection connection = Jsoup.connect("http://bluetata.com");

//        // connection.data("aaa","ccc");  // 这是重点
//
//        connection.header("Content-Type", "application/json; charset=UTF-8");  // 这是重点
//
//        connection.header("Accept", "text/plain, */*; q=0.01");  
//
//        connection.timeout(15000);  
//
//        //String body = "{\"CategoryType\":\"SiteHome\",\"ParentCategoryId\":0,\"CategoryId\":808,\"PageIndex\":2,\"TotalPostCount\":4000,\"ItemListActionName\":\"PostList\"}"; 
//
//        //connection.requestBody(body);  
//
//        Document document = connection.post();
        
        
        String jsonBody = "{\"name\":\"ACTIVATE\",\"value\":\"E0010\"}";
        
        Connection connection = Jsoup.connect("http://bluetata.com/")
                .userAgent("Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36") // User-Agent of Chrome 55
                .referrer("http://bluetata.com/")
                .header("Content-Type", "application/json; charset=UTF-8")
                .header("Accept", "text/plain, */*; q=0.01")
                .header("Accept-Encoding", "gzip,deflate,sdch")
                .header("Accept-Language", "es-ES,es;q=0.8")
                .header("Connection", "keep-alive")
                .header("X-Requested-With", "XMLHttpRequest")
                .requestBody(jsonBody)
                .maxBodySize(100)
                .timeout(1000 * 10)
                .method(Connection.Method.POST);

        Response response = connection.execute();
        
        
    }
 
Example 16
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void ignores500WithNoContentExceptionIfSoConfigured() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/500-no-content.pl").ignoreHttpErrors(true);
    Connection.Response res = con.execute();
    Document doc = res.parse();
    assertEquals(500, res.statusCode());
    assertEquals("Application Error", res.statusMessage());
}
 
Example 17
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void sendHeadRequest() throws IOException {
    String url = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(url).method(Connection.Method.HEAD);
    final Connection.Response response = con.execute();
    assertEquals("text/xml", response.header("Content-Type"));
    assertEquals("", response.body()); // head ought to have no body
    Document doc = response.parse();
    assertEquals("", doc.text());
}
 
Example 18
Source File: GITHUBLoginApater.java    From crawler-jsoup-maven with Apache License 2.0 4 votes vote down vote up
/**
     * @param userName 用户名
     * @param pwd 密码
     * @throws Exception
     */
    public static void simulateLogin(String userName, String pwd) throws Exception {

        /* 
         * 第一次请求 
         * grab login form page first
         * 获取登陆提交的表单信息,及修改其提交data数据(login,password)
         */
        // get the response, which we will post to the action URL(rs.cookies())
        Connection con = Jsoup.connect(LOGIN_URL);  // 获取connection
        con.header(USER_AGENT, USER_AGENT_VALUE);   // 配置模拟浏览器
        Response rs = con.execute();                // 获取响应
        Document d1 = Jsoup.parse(rs.body());       // 转换为Dom树
        
        System.out.println(d1);
        
        List<Element> eleList = d1.select("form");  // 获取提交form表单,可以通过查看页面源码代码得知

        // 获取cooking和表单属性
        // lets make data map containing all the parameters and its values found in the form
        Map<String, String> datas = new HashMap<>();
        
        // 01/24/2019 17:45 bluetata 更新 -------------------------------------------------------------- Start ----------
        // GitHub多次改版更新,最新的提交request data为:
        
        // authenticity_token   ll0RJnG1f9XDAaN1DxnyTDzCs+YXweEZWel9kGkq8TvXH83HjCwPG048sJ/VVjDA94YmbF0qvUgcJx8/IKlP8Q==
        // commit  Sign+in
        // login   bluetata
        // password    password123
        // utf8    ✓
        
        for(int i = 0; i < eleList.size(); i++) {
        
            for (Element e : eleList.get(i).getAllElements()) {
                // 设置用户名
                if (e.attr("name").equals("login")) {
                    e.attr("value", userName);
                }
                // 设置用户密码
                if (e.attr("name").equals("password")) {
                    e.attr("value", pwd);
                }
                // 排除空值表单属性
                if (e.attr("name").length() > 0) {
                    datas.put(e.attr("name"), e.attr("value"));
                }
            }
        }

        
//      旧逻辑  delete  01/24/2019 17:49 bluetata --------------------------------------------start
//        for (Element e : eleList.get(0).getAllElements()) {
//            // 设置用户名
//            if (e.attr("name").equals("login")) {
//                e.attr("value", userName);
//            }
//            // 设置用户密码
//            if (e.attr("name").equals("password")) {
//                e.attr("value", pwd);
//            }
//            // 排除空值表单属性
//            if (e.attr("name").length() > 0) {
//                datas.put(e.attr("name"), e.attr("value"));
//            }
//        }
//      旧逻辑  delete  01/24/2019 17:49 bluetata --------------------------------------------end
        
        
        // 01/24/2019 17:45 bluetata 更新 --------------------------------------------------------------- End -----------
        
        /*
         * 第二次请求,以post方式提交表单数据以及cookie信息
         */
        Connection con2 = Jsoup.connect("https://github.com/session");
        con2.header(USER_AGENT, USER_AGENT_VALUE);
        // 设置cookie和post上面的map数据
        Response login = con2.ignoreContentType(true).followRedirects(true).method(Method.POST).data(datas).cookies(rs.cookies()).execute();
        // 打印,登陆成功后的信息
        System.out.println(login.body());

        // 登陆成功后的cookie信息,可以保存到本地,以后登陆时,只需一次登陆即可
        Map<String, String> map = login.cookies();
        for (String s : map.keySet()) {
            System.out.println(s + " : " + map.get(s));
        }
    }
 
Example 19
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
public static String loadPageSource(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        conn.ignoreContentType(true);
        conn.method(pageRequest.isIfPost()?Connection.Method.POST:Connection.Method.GET);

        // 发出请求
        Connection.Response resp = conn.execute();
        String pageSource = resp.body();
        return pageSource;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}
 
Example 20
Source File: ArtStationRipper.java    From ripme with MIT License 4 votes vote down vote up
/**
     * Parses an ArtStation URL.
     * 
     * @param url URL to an ArtStation user profile
     *            (https://www.artstation.com/username) or single project
     *            (https://www.artstation.com/artwork/projectid)
     * @return ParsedURL object containing URL type, JSON location and ID (stores
     *         account name or project hash, depending of the URL type identified)
     * 
     */
    private ParsedURL parseURL(URL url) {
        String htmlSource;
        ParsedURL parsedURL;

        // Load HTML Source of the specified URL
        try {
            // htmlSource = Http.url(url).get().html();
            Connection con = Http.url(url).method(Method.GET).connection();
            con.ignoreHttpErrors(true);
            con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
            con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
            con.header("Accept-Language", "en-US,en;q=0.5");
//            con.header("Accept-Encoding", "gzip, deflate, br");
            con.header("Upgrade-Insecure-Requests", "1");
            Response res = con.execute();
            int status = res.statusCode();

            if (status / 100 == 2) {
                htmlSource = res.parse().html();
            } else if (status == 403 && url.toString().contains("artwork/")) {
                // Catches cloudflare page. Error 403.
                // Usually caused by artwork URLs( arstation.com/artwork/someProjectId)
                String urlId = url.toString().substring(url.toString().lastIndexOf("/") + 1);
                String jsonURL = "https://www.artstation.com/projects/" + urlId + ".json";
                parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT, jsonURL, urlId);
                return parsedURL;
            } else {
                LOGGER.error("Couldnt fetch URL: " + url);
                throw new IOException("Error fetching URL: " + url + " Status Code: " + status);
            }
        } catch (IOException e) {
            htmlSource = "";
        }

        // Check if HTML Source of the specified URL references a project
        Pattern p = Pattern.compile("'/projects/(\\w+)\\.json'");
        Matcher m = p.matcher(htmlSource);
        if (m.find()) {
            parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT,
                    "https://www.artstation.com/projects/" + m.group(1) + ".json", m.group(1));
            return parsedURL;
        }

        // Check if HTML Source of the specified URL references a user profile
        p = Pattern.compile("'/users/([\\w-]+)/quick\\.json'");
        m = p.matcher(htmlSource);
        if (m.find()) {
            parsedURL = new ParsedURL(URL_TYPE.USER_PORTFOLIO,
                    "https://www.artstation.com/users/" + m.group(1) + "/projects.json", m.group(1));
            return parsedURL;
        }

        // HTML Source of the specified URL doesn't reference a user profile or project
        parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
        return parsedURL;
    }