Java Code Examples for org.jsoup.Jsoup#connect()

The following examples show how to use org.jsoup.Jsoup#connect() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: HttpUtil.java    From JsDroidCmd with Mozilla Public License 2.0 6 votes vote down vote up
public static String get(String url, Map<String, String> headers,
		Map<String, String> params) {
	try {
		Connection connect = Jsoup.connect(url);
		connect.ignoreContentType(true);
		connect.ignoreHttpErrors(true);
		if (params != null) {
			connect.data(params);
		}
		if (headers != null) {
			connect.headers(headers);
		}
		return connect.execute().body();
	} catch (Throwable e) {
	}
	return null;
}
 
Example 2
Source File: JsoupRequestor.java    From http-api-invoker with MIT License 6 votes vote down vote up
/**
 * @param request the request
 */
private Response uploadFile(HttpRequest request) throws IOException {
    Connection conn = Jsoup.connect(request.getUrl());
    conn.method(Method.POST)
            .timeout(request.getTimeout())
            .ignoreHttpErrors(true)
            // unlimited size
            .maxBodySize(0)
            .ignoreContentType(true);
    addHeadersAndCookies(request, conn);
    Object body = request.getBody();
    // handle MultiPart
    if (body instanceof MultiPart) {
        return handleMultiPart(conn, (MultiPart) body);
    } else {
        return handleMultiPart(conn, convertInputStreamAndFile(request));
    }
}
 
Example 3
Source File: PostComment.java    From WordPressHelper with MIT License 6 votes vote down vote up
@Override
protected Object doInBackground(Object[] params) {
    Connection connection  = Jsoup.connect(URL_WORDPRESS + "/wp-comments-post.php");
    connection.method(Connection.Method.POST);
    connection.data("author", name);
    connection.data("email", email);
    connection.data("url", url);
    connection.data("comment", comment);
    connection.data("comment_post_ID", id);
    connection.data("comment_parent", commentParent);
    try {
        response = connection.execute().statusCode();
    } catch (IOException e) {
        e.printStackTrace();
    }
    return null;
}
 
Example 4
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void followsNewTempRedirect() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/307.pl"); // http://jsoup.org
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
    assertEquals("https://jsoup.org/", con.response().url().toString());
}
 
Example 5
Source File: AbstractHotProcessor.java    From hot-crawler with MIT License 5 votes vote down vote up
protected Document getDocument(HttpRequest httpRequest){
    Document doc = null;
    Connection connection = Jsoup.connect(httpRequest.getUrl());
    if (httpRequest.getHeader() != null) {
        connection.headers(getBasicHeaders());
        connection.headers(httpRequest.getHeader());
    }
    try {
        doc = connection.timeout(10 * 1000).get();
    } catch (IOException e) {
        log.error("Fail to connect!", e);
    }
    return doc;
}
 
Example 6
Source File: InternetBrowser.java    From petscii-bbs with Mozilla Public License 2.0 5 votes vote down vote up
public Document getWebpage(String url) throws Exception {
    Connection conn;
    try {
        conn = Jsoup.connect(url);
    } catch (Exception e1) {
        log("Couldn't connect with the website.");
        return null;
    }
    return conn
            //.header("HTTP-User-Agent", "")
            .get();
}
 
Example 7
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void followsRelativeDotRedirect() throws IOException {
    // redirects to "./ok.html", should resolve to http://direct.infohound.net/tools/ok.html
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel-dot.pl"); // to ./ok.html
    Document doc = con.post();
    assertTrue(doc.title().contains("OK"));
    assertEquals(doc.location(), "http://direct.infohound.net/tools/ok.html");
}
 
Example 8
Source File: Http.java    From ripme with MIT License 5 votes vote down vote up
private void defaultSettings() {
    this.retries = Utils.getConfigInteger("download.retries", 1);
    connection = Jsoup.connect(this.url);
    connection.userAgent(AbstractRipper.USER_AGENT);
    connection.method(Method.GET);
    connection.timeout(TIMEOUT);
    connection.maxBodySize(0);

    // Extract cookies from config entry:
    // Example config entry:
    // cookies.reddit.com = reddit_session=<value>; other_cookie=<value>
    connection.cookies(cookiesForURL(this.url));
}
 
Example 9
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void multiCookieSet() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-cookie.pl");
    Connection.Response res = con.execute();

    // test cookies set by redirect:
    Map<String, String> cookies = res.cookies();
    assertEquals("asdfg123", cookies.get("token"));
    assertEquals("jhy", cookies.get("uid"));

    // send those cookies into the echo URL by map:
    Document doc = Jsoup.connect(echoURL).cookies(cookies).get();
    assertEquals("token=asdfg123; uid=jhy", ihVal("HTTP_COOKIE", doc));
}
 
Example 10
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void followsRedirectToHttps() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com
    con.data("id", "5");
    Document doc = con.get();
    assertTrue(doc.title().contains("Google"));
}
 
Example 11
Source File: WebpageLoader.java    From TinkerTime with GNU General Public License v3.0 5 votes vote down vote up
@Override
protected Document loadPage(URL url) throws IOException {
	if (connection == null){
		connection = Jsoup.connect(url.toString());
	} else {
		connection.url(url);
	}

	return connection.get();
}
 
Example 12
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void combinesSameHeadersWithComma() throws IOException {
    // http://www.w3.org/Protocols/rfc2616/rfc2616-sec4.html#sec4.2
    String url = "http://direct.infohound.net/tools/q.pl";
    Connection con = Jsoup.connect(url);
    con.get();

    assertEquals("text/html", con.response().header("Content-Type"));
    assertEquals("no-cache, no-store", con.response().header("Cache-Control"));
}
 
Example 13
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void followsRedirectToHttps() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-secure.pl"); // https://www.google.com
    con.data("id", "5");
    Document doc = con.get();
    assertTrue(doc.title().contains("Google"));
}
 
Example 14
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void followsRedirectsWithWithespaces() throws IOException {
    Connection con = Jsoup.connect("http://tinyurl.com/kgofxl8"); // to http://www.google.com/?q=white spaces
    Document doc = con.get();
    assertTrue(doc.title().contains("Google"));
}
 
Example 15
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void fetchURIWithWihtespace() throws IOException {
    Connection con = Jsoup.connect("http://try.jsoup.org/#with whitespaces");
    Document doc = con.get();
    assertTrue(doc.title().contains("jsoup"));
}
 
Example 16
Source File: Jsoup403ForbiddenExample.java    From crawler-jsoup-maven with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) {
    
    try{
        
        // connect to the website         '1
        Connection connection = Jsoup.connect("http://www.bluetata.com");
        
        // get the HTML document          '2
        Document doc = connection.get();
        
        // parse text from HTML           '3
        String strHTML = doc.text();
        
        // out put dom                    '4
        System.out.println(strHTML);
        
    }catch(IOException ioex){
        ioex.printStackTrace();
    }
 
}
 
Example 17
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void followsRelativeRedirect() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel.pl"); // to /tidy/
    Document doc = con.post();
    assertTrue(doc.title().contains("HTML Tidy Online"));
}
 
Example 18
Source File: JsoupUtil.java    From xxl-crawler with GNU General Public License v3.0 4 votes vote down vote up
/**
 * 加载页面
 *
 * @param pageRequest
 *
 * @return Document
 */
public static Document load(PageRequest pageRequest) {
    if (!UrlUtil.isUrl(pageRequest.getUrl())) {
        return null;
    }
    try {
        // 请求设置
        Connection conn = Jsoup.connect(pageRequest.getUrl());
        if (pageRequest.getParamMap() != null && !pageRequest.getParamMap().isEmpty()) {
            conn.data(pageRequest.getParamMap());
        }
        if (pageRequest.getCookieMap() != null && !pageRequest.getCookieMap().isEmpty()) {
            conn.cookies(pageRequest.getCookieMap());
        }
        if (pageRequest.getHeaderMap()!=null && !pageRequest.getHeaderMap().isEmpty()) {
            conn.headers(pageRequest.getHeaderMap());
        }
        if (pageRequest.getUserAgent()!=null) {
            conn.userAgent(pageRequest.getUserAgent());
        }
        if (pageRequest.getReferrer() != null) {
            conn.referrer(pageRequest.getReferrer());
        }
        conn.timeout(pageRequest.getTimeoutMillis());
        conn.validateTLSCertificates(pageRequest.isValidateTLSCertificates());
        conn.maxBodySize(0);    // 取消默认1M限制

        // 代理
        if (pageRequest.getProxy() != null) {
            conn.proxy(pageRequest.getProxy());
        }

        // 发出请求
        Document html = null;
        if (pageRequest.isIfPost()) {
            html = conn.post();
        } else {
            html = conn.get();
        }
        return html;
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        return null;
    }
}
 
Example 19
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void gracefullyHandleBrokenLocationRedirect() throws IOException {
    Connection con = Jsoup.connect("http://aag-ye.com"); // has Location: http:/temp/AAG_New/en/index.php
    con.get(); // would throw exception on error
    assertTrue(true);
}
 
Example 20
Source File: UrlConnectTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test
public void followsRelativeRedirect() throws IOException {
    Connection con = Jsoup.connect("http://direct.infohound.net/tools/302-rel.pl"); // to /tidy/
    Document doc = con.post();
    assertTrue(doc.title().contains("HTML Tidy Online"));
}