Java Code Examples for org.jsoup.Connection#get()

The following examples show how to use org.jsoup.Connection#get() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: LoadGiveawayWinnersTask.java    From SteamGifts with MIT License 6 votes vote down vote up
@Override
protected List<Winner> doInBackground(Void... params) {
    Log.d(TAG, "Fetching giveaways for page " + page);

    try {
        // Fetch the Giveaway page

        Connection jsoup = Jsoup.connect("https://www.steamgifts.com/giveaway/" + path + "/winners/search")
                .userAgent(Constants.JSOUP_USER_AGENT)
                .timeout(Constants.JSOUP_TIMEOUT);
        jsoup.data("page", Integer.toString(page));

        if (SteamGiftsUserData.getCurrent(fragment.getContext()).isLoggedIn())
            jsoup.cookie("PHPSESSID", SteamGiftsUserData.getCurrent(fragment.getContext()).getSessionId());
        Document document = jsoup.get();

        SteamGiftsUserData.extract(fragment.getContext(), document);

        return loadAll(document);
    } catch (IOException e) {
        Log.e(TAG, "Error fetching URL", e);
        return null;
    }
}
 
Example 2
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void fetchHandlesXmlAsHtmlWhenParserSet() throws IOException {
    // should auto-detect xml and use XML parser, unless explicitly requested the html parser
    String xmlUrl = "http://direct.infohound.net/tools/parse-xml.xml";
    Connection con = Jsoup.connect(xmlUrl).parser(Parser.htmlParser());
    Document doc = con.get();
    Connection.Request req = con.request();
    assertTrue(req.parser().getTreeBuilder() instanceof HtmlTreeBuilder);
    assertEquals("<html> <head></head> <body> <xml> <link>one <table> Two </table> </xml> </body> </html>", StringUtil.normaliseWhitespace(doc.outerHtml()));
}
 
Example 3
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void combinesSameHeadersWithComma() throws IOException {
    // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
    String url = "http://direct.infohound.net/tools/q.pl";
    Connection con = Jsoup.connect(url);
    con.get();

    assertEquals("text/html", con.response().header("Content-Type"));
    assertEquals("no-cache, no-store", con.response().header("Cache-Control"));
}
 
Example 4
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void doesGet() throws IOException {
    Connection con = Jsoup.connect(echoURL + "?what=the")
        .userAgent("Mozilla")
        .referrer("http://example.com")
        .data("what", "about & me?");

    Document doc = con.get();
    assertEquals("what=the&what=about+%26+me%3F", ihVal("QUERY_STRING", doc));
    assertEquals("the, about & me?", ihVal("what", doc));
    assertEquals("Mozilla", ihVal("HTTP_USER_AGENT", doc));
    assertEquals("http://example.com", ihVal("HTTP_REFERER", doc));
}
 
Example 5
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void followsNewTempRedirect() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/307.pl"); // http://jsoup.org
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
    assertEquals("http://jsoup.org", con.response().url().toString());
}
 
Example 6
Source File: Internet.java    From aurous-app with GNU General Public License v2.0 5 votes vote down vote up
public static Document document(final String url) {
	final Connection conn = connect(url);
	try {
		return conn.get();
	} catch (final IOException e) {
		e.printStackTrace();
	}
	return null;
}
 
Example 7
Source File: LoadGameListTask.java    From SteamGifts with MIT License 5 votes vote down vote up
@Override
protected List<IEndlessAdaptable> doInBackground(Void... params) {
    try {
        // Fetch the Giveaway page

        Connection jsoup = Jsoup.connect("https://www.steamgifts.com/" + pathSegment + "/search")
                .userAgent(Constants.JSOUP_USER_AGENT)
                .timeout(Constants.JSOUP_TIMEOUT);
        jsoup.data("page", Integer.toString(page));

        if (searchQuery != null)
            jsoup.data("q", searchQuery);

        jsoup.cookie("PHPSESSID", SteamGiftsUserData.getCurrent(context).getSessionId());

        Document document = jsoup.get();

        SteamGiftsUserData.extract(context, document);

        // Fetch the xsrf token
        Element xsrfToken = document.select("input[name=xsrf_token]").first();
        if (xsrfToken != null)
            foundXsrfToken = xsrfToken.attr("value");

        // Do away with pinned giveaways.
        document.select(".pinned-giveaways__outer-wrap").html("");

        // Parse all rows of giveaways
        return loadAll(document);
    } catch (Exception e) {
        Log.e(TAG, "Error fetching URL", e);
        return null;
    }
}
 
Example 8
Source File: UrlConnectTest.java    From jsoup-learning with MIT License 5 votes vote down vote up
@Test
public void doesGet() throws IOException {
    Connection con = Jsoup.connect(echoURL + "?what=the")
        .userAgent("Mozilla")
        .referrer("http://example.com")
        .data("what", "about & me?");

    Document doc = con.get();
    assertEquals("what=the&what=about+%26+me%3F", ihVal("QUERY_STRING", doc));
    assertEquals("the, about & me?", ihVal("what", doc));
    assertEquals("Mozilla", ihVal("HTTP_USER_AGENT", doc));
    assertEquals("http://example.com", ihVal("HTTP_REFERER", doc));
}
 
Example 9
Source File: LoadMessagesTask.java    From SteamGifts with MIT License 5 votes vote down vote up
@Override
protected List<IEndlessAdaptable> doInBackground(Void... params) {
    try {
        // Fetch the messages page

        Connection jsoup = Jsoup.connect("https://www.steamgifts.com/messages/search")
                .userAgent(Constants.JSOUP_USER_AGENT)
                .timeout(Constants.JSOUP_TIMEOUT);
        jsoup.data("page", Integer.toString(page));
        jsoup.cookie("PHPSESSID", SteamGiftsUserData.getCurrent(context).getSessionId());

        Document document = jsoup.get();

        SteamGiftsUserData.extract(context, document);

        // Fetch the xsrf token
        Element xsrfToken = document.select("input[name=xsrf_token]").first();
        if (xsrfToken != null)
            foundXsrfToken = xsrfToken.attr("value");

        // Parse all rows of giveaways
        return loadMessages(document);
    } catch (Exception e) {
        Log.e(TAG, "Error fetching URL", e);
        return null;
    }
}
 
Example 10
Source File: LoadWhitelistBlacklistTask.java    From SteamGifts with MIT License 5 votes vote down vote up
@Override
protected List<BasicUser> doInBackground(Void... params) {
    try {
        // Fetch the Giveaway page
        String url = "https://www.steamgifts.com/account/manage/" + what.name().toLowerCase(Locale.ENGLISH) + "/search";
        Log.d(TAG, "Fetching URL " + url);

        Connection jsoup = Jsoup.connect(url)
                .userAgent(Constants.JSOUP_USER_AGENT)
                .timeout(Constants.JSOUP_TIMEOUT)
                .followRedirects(false);
        jsoup.data("page", Integer.toString(page));

        if (searchQuery != null)
            jsoup.data("q", searchQuery);

        jsoup.cookie("PHPSESSID", SteamGiftsUserData.getCurrent(fragment.getContext()).getSessionId());

        Document document = jsoup.get();

        SteamGiftsUserData.extract(fragment.getContext(), document);

        // Fetch the xsrf token
        Element xsrfToken = document.select("input[name=xsrf_token]").first();
        if (xsrfToken != null)
            foundXsrfToken = xsrfToken.attr("value");

        // Do away with pinned giveaways.
        document.select(".pinned-giveaways__outer-wrap").html("");

        // Parse all rows of giveaways
        return loadAll(document);
    } catch (Exception e) {
        Log.e(TAG, "Error fetching URL", e);
        return null;
    }
}
 
Example 11
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void followsRedirectToHttps() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com
    con.data("id", "5");
    Document doc = con.get();
    assertTrue(doc.title().contains("Google"));
}
 
Example 12
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void doesGet() throws IOException {
    Connection con = Jsoup.connect(echoURL + "?what=the")
        .userAgent("Mozilla")
        .referrer("http://example.com")
        .data("what", "about & me?");

    Document doc = con.get();
    assertEquals("what=the&what=about+%26+me%3F", ihVal("QUERY_STRING", doc));
    assertEquals("the, about & me?", ihVal("what", doc));
    assertEquals("Mozilla", ihVal("HTTP_USER_AGENT", doc));
    assertEquals("http://example.com", ihVal("HTTP_REFERER", doc));
}
 
Example 13
Source File: UrlConnectTest.java    From jsoup-learning with MIT License 4 votes vote down vote up
@Test
public void followsTempRedirect() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl"); // http://jsoup.org
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
}
 
Example 14
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void fetchURIWithWihtespace() throws IOException {
    Connection con = Jsoup.connect("http://try.jsoup.org/#with whitespaces");
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
}
 
Example 15
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void fetchURIWithWihtespace() throws IOException {
    Connection con = Jsoup.connect("http://try.jsoup.org/#with whitespaces");
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
}
 
Example 16
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void followsRedirectsWithWithespaces() throws IOException {
    Connection con = Jsoup.connect("http://tinyurl.com/kgofxl8"); // to http://www.google.com/?q=white spaces
    Document doc = con.get();
    assertTrue(doc.title().contains("Google"));
}
 
Example 17
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void followsTempRedirect() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302.pl"); // http://jsoup.org
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
}
 
Example 18
Source File: JsoupUtil.java    From crawler-jsoup-maven with Apache License 2.0 4 votes vote down vote up
/**
 * 方法说明:根据绑定的数据type选择绑定数据种类模拟浏览器
 * 
 * @param url           request url
 * @param bindData      bind data
 * @param requestType   request type: "headers" "data" "cookies" etc.
 * @return              Document object.
 * @throws Exception    Exception
 */
public static Document getDocument(String url, Map<String, String> bindData, String requestType) throws Exception {

    Document doc = null;
    Connection conn = null;
    StringWriter strWriter = new StringWriter();
    PrintWriter prtWriter = new PrintWriter(strWriter);

    // En:get max retry count from properties file(com-constants.properties)
    // Jp:プロパティファイルでロックタイムアウトのリトライ回数を取得する Zh:通过properties获取最大retry次数
    int maxRetry = Integer.parseInt(PropertyReader.getProperties(SystemConstants.COM_CONSTANTS)
            .getProperty(UtilsConstants.MAX_RETRY_COUNT));
    // En: get sleep time from properties file Jp:プロパティファイルでロックタイムアウトのスリープ時間を取得する
    int sleepTime = Integer.parseInt(PropertyReader.getProperties(SystemConstants.COM_CONSTANTS)
            .getProperty(UtilsConstants.SLEEP_TIME_COUNT));
    
    int temp = 0;

    // En: if exception is occurred then retry loop is continue to run;
    // Jp: 異常を起きる場合、ループを続き実行する。
    for (int j = 1; j <= maxRetry; j++) {

        try {
            if (j != 1) {
                Thread.sleep(sleepTime);
            }
            temp = Integer.parseInt(String.valueOf(Math.round(Math.random() * (UserAgent.length - 1))));
            conn = Jsoup.connect(url).timeout(10000)
                   // .userAgent(
                            // add userAgent. TODO There is a plan to configure userAgent to load that userAgent from a property file.
                     //       "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/534.30 (KHTML, like Gecko) Chrome/12.0.742.122 Safari/534.30");
                    .userAgent(UserAgent[temp]);
            if (bindData != null && bindData.size() != 0 && !StringUtil.isEmpty(requestType)) {
                switch (requestType) {
                case UtilsConstants.REQUEST_HEADERS:
                    // adds each of the supplied headers to the request. // bluetata 2017/03/22 add
                    conn.headers(bindData);
                    break;
                case UtilsConstants.REQUEST_DATA:
                    // adds all of the supplied data to the request data parameters. // 20170320 bluetata add
                    conn.data(bindData);
                    break;
                case UtilsConstants.REQUEST_COOKIES:
                    // adds each of the supplied cookies to the request. // bluetata 2017/03/22 add
                    conn.cookies(bindData);
                    break;
                default:
                    // TODO stream etc. logic is adding. bluetata 2017/03/22 add
                    break;
                }
            }
            doc = conn.get();

            // En: normal finish situation,loop is broken.
            // Jp: サービスが正常に終了した場合、ループを中止します。
            // Zh: 正常终了的情况、终止循环。
            break;

        } catch (Exception ex) {
            // if throw new Exception(ex); dead code is occurred, retry is invalid.

            // StackTraceを文字列で取得
            ex.printStackTrace(prtWriter);
            String stackTrace = strWriter.toString();

            if (strWriter != null) {
                try {
                    strWriter.close();
                } catch (IOException ioe) {
                    ioe.printStackTrace();
                }
            }
            if (prtWriter != null) {
                prtWriter.close();
            }

            // En:info log is output. Jp: Infoログとして、エラー内容を出力。 Zh:输出到info log。
            Log4jUtil.error("第" + j + "次请求异常。");
            Log4jUtil.error(stackTrace);
        }
    }
    return doc;
}
 
Example 19
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void gracefullyHandleBrokenLocationRedirect() throws IOException {
    Connection con = Jsoup.connect("http://aag-ye.com"); // has Location: http:/temp/AAG_New/en/index.php
    con.get(); // would throw exception on error
    assertTrue(true);
}
 
Example 20
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 加载页面
 *
 * @param pageRequest
 *
 * @return Document
 */
public static Document load(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        // 发出请求
        Document html = null;
        if (pageRequest.isIfPost()) {
            html = conn.post();
        } else {
            html = conn.get();
        }
        return html;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}