Java Code Examples for org.jsoup.Connection#header()

The following examples show how to use org.jsoup.Connection#header() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: AiPaUtil.java    From AIPa with Apache License 2.0 6 votes vote down vote up
/**
 * 默认爬虫方法
 *
 * @return Document
 * @throws IOException
 */
public Document getHtmlDocument(String link) throws IOException {
    // 爬虫开始运行
    Connection connection = Jsoup.connect(link).method(method);
    // 设置请求头
    if (header != null) {
        for (Map.Entry<String, String> entry : header.entrySet()) {
            connection.header(entry.getKey(), entry.getValue());
        }
    }
    //设置Cookies
    if (cookies != null) {
        connection.cookies(cookies);
    }
    // 开爬
    Connection.Response response = connection.execute().charset(charset.name());
    // 转码
    return response.parse();
}
 
Example 2
Source File: ArtStationRipper.java    From ripme with MIT License 6 votes vote down vote up
private JSONObject getJson(URL url) throws IOException {
        Connection con = Http.url(url).method(Method.GET).connection();
        con.ignoreHttpErrors(true);
        con.ignoreContentType(true);
        con.userAgent(
                "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.95 Safari/537.11");
        con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        con.header("Accept-Language", "en-US,en;q=0.5");
//        con.header("Accept-Encoding", "gzip, deflate, br");
        con.header("Upgrade-Insecure-Requests", "1");
        Response res = con.execute();
        int status = res.statusCode();
        if (status / 100 == 2) {
            String jsonString = res.body();
            return new JSONObject(jsonString);
        }
        throw new IOException("Error fetching json. Status code:" + status);
    }
 
Example 3
Source File: PasswordNetworkManager.java    From Shaarlier with GNU General Public License v3.0 6 votes vote down vote up
/**
 * Helper method which create a new connection to Shaarli
 *
 * @param url    the url of the shaarli
 * @param method set the HTTP method to use
 * @return pre-made jsoupConnection
 */
private Connection newConnection(String url, Connection.Method method) {
    Connection jsoupConnection = Jsoup.connect(url);

    if (!"".equals(this.mBasicAuth)) {
        jsoupConnection = jsoupConnection.header("Authorization", "Basic " + this.mBasicAuth);
    }
    if (this.mCookies != null) {
        jsoupConnection = jsoupConnection.cookies(this.mCookies);
    }

    return jsoupConnection
            .validateTLSCertificates(this.mValidateCert)
            .timeout(NetworkUtils.TIME_OUT)
            .followRedirects(true)
            .method(method);
}
 
Example 4
Source File: WebConnector.java    From JavaSkype with MIT License 5 votes vote down vote up
private Response sendRequest(Method method, String apiPath, boolean absoluteApiPath, String... keyval) throws IOException {
  String url = absoluteApiPath ? apiPath : SERVER_HOSTNAME + apiPath;
  Connection conn = Jsoup.connect(url).maxBodySize(100 * 1024 * 1024).timeout(10000).method(method).ignoreContentType(true).ignoreHttpErrors(true);
  logger.finest("Sending " + method + " request at " + url);
  if (skypeToken != null) {
    conn.header("X-Skypetoken", skypeToken);
  } else {
    logger.fine("No token sent for the request at: " + url);
  }
  conn.data(keyval);
  return conn.execute();
}
 
Example 5
Source File: ConnectionProvider.java    From yahnac with Apache License 2.0 5 votes vote down vote up
private static Connection defaultConnection(String baseUrlExtension) {
    Connection conn = Jsoup.connect(BASE_URL + baseUrlExtension)
            .timeout(TIMEOUT_MILLIS)
            .userAgent(USER_AGENT);
    conn.header("Accept-Encoding", "gzip");

    return conn;
}
 
Example 6
Source File: JsoupRequestor.java    From http-api-invoker with MIT License 4 votes vote down vote up
private void setContentType(HttpRequest request, Connection conn) {
    // set a default Content-Type if not provided
    if (request.getHeaders() == null || !request.getHeaders().containsKey(CONTENT_TYPE)) {
        conn.header(CONTENT_TYPE, APPLICATION_JSON);
    }
}
 
Example 7
Source File: ArtStationRipper.java    From ripme with MIT License 4 votes vote down vote up
/**
     * Parses an ArtStation URL.
     * 
     * @param url URL to an ArtStation user profile
     *            (https://www.artstation.com/username) or single project
     *            (https://www.artstation.com/artwork/projectid)
     * @return ParsedURL object containing URL type, JSON location and ID (stores
     *         account name or project hash, depending of the URL type identified)
     * 
     */
    private ParsedURL parseURL(URL url) {
        String htmlSource;
        ParsedURL parsedURL;

        // Load HTML Source of the specified URL
        try {
            // htmlSource = Http.url(url).get().html();
            Connection con = Http.url(url).method(Method.GET).connection();
            con.ignoreHttpErrors(true);
            con.userAgent("Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:67.0) Gecko/20100101 Firefox/67.0");
            con.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
            con.header("Accept-Language", "en-US,en;q=0.5");
//            con.header("Accept-Encoding", "gzip, deflate, br");
            con.header("Upgrade-Insecure-Requests", "1");
            Response res = con.execute();
            int status = res.statusCode();

            if (status / 100 == 2) {
                htmlSource = res.parse().html();
            } else if (status == 403 && url.toString().contains("artwork/")) {
                // Catches cloudflare page. Error 403.
                // Usually caused by artwork URLs( arstation.com/artwork/someProjectId)
                String urlId = url.toString().substring(url.toString().lastIndexOf("/") + 1);
                String jsonURL = "https://www.artstation.com/projects/" + urlId + ".json";
                parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT, jsonURL, urlId);
                return parsedURL;
            } else {
                LOGGER.error("Couldnt fetch URL: " + url);
                throw new IOException("Error fetching URL: " + url + " Status Code: " + status);
            }
        } catch (IOException e) {
            htmlSource = "";
        }

        // Check if HTML Source of the specified URL references a project
        Pattern p = Pattern.compile("'/projects/(\\w+)\\.json'");
        Matcher m = p.matcher(htmlSource);
        if (m.find()) {
            parsedURL = new ParsedURL(URL_TYPE.SINGLE_PROJECT,
                    "https://www.artstation.com/projects/" + m.group(1) + ".json", m.group(1));
            return parsedURL;
        }

        // Check if HTML Source of the specified URL references a user profile
        p = Pattern.compile("'/users/([\\w-]+)/quick\\.json'");
        m = p.matcher(htmlSource);
        if (m.find()) {
            parsedURL = new ParsedURL(URL_TYPE.USER_PORTFOLIO,
                    "https://www.artstation.com/users/" + m.group(1) + "/projects.json", m.group(1));
            return parsedURL;
        }

        // HTML Source of the specified URL doesn't reference a user profile or project
        parsedURL = new ParsedURL(URL_TYPE.UNKNOWN, null, null);
        return parsedURL;
    }
 
Example 8
Source File: GITHUBLoginApater.java    From crawler-jsoup-maven with Apache License 2.0 4 votes vote down vote up
/**
     * @param userName 用户名
     * @param pwd 密码
     * @throws Exception
     */
    public static void simulateLogin(String userName, String pwd) throws Exception {

        /* 
         * 第一次请求 
         * grab login form page first
         * 获取登陆提交的表单信息,及修改其提交data数据(login,password)
         */
        // get the response, which we will post to the action URL(rs.cookies())
        Connection con = Jsoup.connect(LOGIN_URL);  // 获取connection
        con.header(USER_AGENT, USER_AGENT_VALUE);   // 配置模拟浏览器
        Response rs = con.execute();                // 获取响应
        Document d1 = Jsoup.parse(rs.body());       // 转换为Dom树
        
        System.out.println(d1);
        
        List<Element> eleList = d1.select("form");  // 获取提交form表单,可以通过查看页面源码代码得知

        // 获取cooking和表单属性
        // lets make data map containing all the parameters and its values found in the form
        Map<String, String> datas = new HashMap<>();
        
        // 01/24/2019 17:45 bluetata 更新 -------------------------------------------------------------- Start ----------
        // GitHub多次改版更新,最新的提交request data为:
        
        // authenticity_token   ll0RJnG1f9XDAaN1DxnyTDzCs+YXweEZWel9kGkq8TvXH83HjCwPG048sJ/VVjDA94YmbF0qvUgcJx8/IKlP8Q==
        // commit  Sign+in
        // login   bluetata
        // password    password123
        // utf8    ✓
        
        for(int i = 0; i < eleList.size(); i++) {
        
            for (Element e : eleList.get(i).getAllElements()) {
                // 设置用户名
                if (e.attr("name").equals("login")) {
                    e.attr("value", userName);
                }
                // 设置用户密码
                if (e.attr("name").equals("password")) {
                    e.attr("value", pwd);
                }
                // 排除空值表单属性
                if (e.attr("name").length() > 0) {
                    datas.put(e.attr("name"), e.attr("value"));
                }
            }
        }

        
//      旧逻辑  delete  01/24/2019 17:49 bluetata --------------------------------------------start
//        for (Element e : eleList.get(0).getAllElements()) {
//            // 设置用户名
//            if (e.attr("name").equals("login")) {
//                e.attr("value", userName);
//            }
//            // 设置用户密码
//            if (e.attr("name").equals("password")) {
//                e.attr("value", pwd);
//            }
//            // 排除空值表单属性
//            if (e.attr("name").length() > 0) {
//                datas.put(e.attr("name"), e.attr("value"));
//            }
//        }
//      旧逻辑  delete  01/24/2019 17:49 bluetata --------------------------------------------end
        
        
        // 01/24/2019 17:45 bluetata 更新 --------------------------------------------------------------- End -----------
        
        /*
         * 第二次请求,以post方式提交表单数据以及cookie信息
         */
        Connection con2 = Jsoup.connect("https://github.com/session");
        con2.header(USER_AGENT, USER_AGENT_VALUE);
        // 设置cookie和post上面的map数据
        Response login = con2.ignoreContentType(true).followRedirects(true).method(Method.POST).data(datas).cookies(rs.cookies()).execute();
        // 打印,登陆成功后的信息
        System.out.println(login.body());

        // 登陆成功后的cookie信息,可以保存到本地,以后登陆时,只需一次登陆即可
        Map<String, String> map = login.cookies();
        for (String s : map.keySet()) {
            System.out.println(s + " : " + map.get(s));
        }
    }
 
Example 9
Source File: CentralnicLoinApater.java    From crawler-jsoup-maven with Apache License 2.0 4 votes vote down vote up
public static void main(String[] args) throws Exception {
    
    String url = "https://registrar-console.centralnic.com/dashboard/login";
    String userAgent = "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36";
    
    Response res = Jsoup.connect(url).userAgent(userAgent).timeout(1000 * 3).execute();

    Document d1 = Jsoup.parse(res.body());
    
    List<Element> et = d1.select("#login_form");// 获取form表单
    String XSRF = null;
    // 获取,cooking和表单属性,下面map存放post时的数据
    Map<String, String> datas = new java.util.HashMap<>();
    for (Element e : et.get(0).getAllElements()) {
      if (e.attr("name").equals("regid")) {
        e.attr("value", "*****");// 设置id
      }
      if (e.attr("name").equals("user")) {
        e.attr("value", "*****");// 设置用户名
      }
      if (e.attr("name").equals("password")) {
        e.attr("value", "*****"); // 设置用户密码
      }
      if (e.attr("name").equals("XSRF")) {
        XSRF = e.attr("value");
      }
      if (e.attr("name").length() > 0) {// 排除空值表单属性
        datas.put(e.attr("name"), e.attr("value"));
      }
    }
    
    System.out.println(datas);
    System.out.println("======================");
    System.out.println(res.url());
    System.out.println(res.statusCode());
    
    Map<String, String> cookies = res.cookies();
    cookies.put("remember_me", "H2454482%3Atoddhan");
    cookies.put("lang", "en");
    
    System.out.println(res.cookies());
    
    /* 
     * 第二次请求,以post方式提交表单数据以及cookie信息 
     */  
    Connection con2 = Jsoup.connect("https://registrar-console.centralnic.com/dashboard/login_target");  
    con2.header("User-Agent", userAgent);  
    // 设置cookie和post上面的map数据  
    Response login = con2.ignoreContentType(true).followRedirects(true).method(Method.POST)  
                            .data(datas).cookies(cookies).referrer("https://registrar-console.centralnic.com/dashboard/login")
                            .header("host", "registrar-console.centralnic.com")
                            .execute();  
    
    Map<String, String> cookies2 = login.cookies();
    cookies2.put("remember_me", "H2454482%3Atoddhan");
    cookies2.put("lang", "en");
    System.out.println(cookies2);
    
    System.out.println(login.statusCode());
    
    System.err.println(JsoupUtil.getDocumentWithCookies("https://registrar-console.centralnic.com/", cookies2));
    
    
    System.out.println(JsoupUtil.getDocumentWithCookies("https://registrar-console.centralnic.com/graphs/file_browser/droplist", cookies2));
    
    
}