Java Code Examples for org.jsoup.nodes.Document#toString()

The following examples show how to use org.jsoup.nodes.Document#toString() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: Downloader.java    From MMDownloader with Apache License 2.0 5 votes vote down vote up
/**
 * Jsoup을 이용한 HTML 코드 파싱.
 *
 * @param eachArchiveAddress 실제 만화가 담긴 아카이브 주소
 * @return 성공하면 html 코드를 리턴
 */
private String getHtmlPageJsoup(String eachArchiveAddress) throws Exception {
	print.info("고속 연결 시도중...\n");

	// pageSource = Html코드를 포함한 페이지 소스코드가 담길 스트링, domain = http://wasabisyrup.com <-마지막 / 안붙음!
	String pageSource = null;

	// POST방식으로 아예 처음부터 비밀번호를 body에 담아 전달
	Response response = Jsoup.connect(eachArchiveAddress)
			.userAgent(UserAgent.getUserAgent())
			.header("charset", "utf-8")
			.header("Accept-Encoding", "gzip") //20171126 gzip 추가
			.timeout(MAX_WAIT_TIME) // timeout
			.data("pass", PASSWORD)    // 20180429 기준 마루마루에서 reCaptcha를 사용하기에 의미없음
			.followRedirects(true)
			.execute();

	Document preDoc = response.parse(); //받아온 HTML 코드를 저장

	// <div class="gallery-template">이 만화 담긴 곳.
	if (preDoc.select("div.gallery-template").isEmpty()) {
		throw new RuntimeException("Jsoup Parsing Failed: No tag found");
	} else { // 만약 Jsoup 파싱 시 내용 있으면 성공
		pageSource = preDoc.toString();
	}

	print.info("고속 연결 성공!\n");
	return pageSource; //성공 시 html코드 리턴
}
 
Example 2
Source File: TemplateRender.java    From jpress with GNU Lesser General Public License v3.0 5 votes vote down vote up
public String buildNormalHtml(String content) {
    if (StrUtil.isBlank(content)) {
        return content;
    }


    Document doc = Jsoup.parse(content);
    doc.outputSettings().prettyPrint(false);
    doc.outputSettings().outline(false);

    Elements jsElements = doc.select("script");
    replace(jsElements, "src");

    Elements imgElements = doc.select("img");
    replace(imgElements, "src");

    Elements linkElements = doc.select("link");
    replace(linkElements, "href");

    //开启模板预览功能
    if (templatePreviewEnable && TemplateManager.me().getPreviewTemplate() != null) {
        Elements aElements = doc.select("a");
        replacePreviewHref(aElements);
    }

    return doc.toString();
}
 
Example 3
Source File: _WechatArticleImport.java    From jpress with GNU Lesser General Public License v3.0 5 votes vote down vote up
private String processContentImages(String content, List<String> imageUrls) {

        Document doc = Jsoup.parse(content);
        Elements imgElements = doc.select("img");
        if (imgElements != null) {
            Iterator<Element> iterator = imgElements.iterator();
            while (iterator.hasNext()) {
                Element element = iterator.next();

                String imageUrl = element.hasAttr("src")
                        ? element.attr("src")
                        : element.attr("data-src");

//http://mmbiz.qpic.cn/mmbiz/4gZTdZfnQeDvQqCZFuVvYv8scGS7sEQTRETgISib1blz5iclAtnsccaJhaugmKc
// hhm8mFOtjnicibibumazy8wPS6Xg/640?tp=webp&wxfrom=5&wx_lazy=1&wx_co=1

                imageUrl = replaceLast(imageUrl, "/", "__");
                imageUrl = imageUrl.startsWith("http://")
                        ? imageUrl.replace("http://", "/attachment/")
                        : imageUrl.replace("https://", "/attachment/s");

                imageUrl = imageUrl.replace("?",".png?");

                element.removeAttr("data-src");
                element.attr("src",imageUrl);

                imageUrls.add(imageUrl);
            }
        }

        return doc.toString();
    }
 
Example 4
Source File: LinkRewriterServiceImpl.java    From publick-sling-blog with Apache License 2.0 5 votes vote down vote up
/**
 * Rewrite all links in an HTML string based on the extensionless URLs settings.
 *
 * @param value The HTML string.
 * @param requestHost The host name from the request.
 * @return The HTML string with rewritten URLs.
 */
public String rewriteAllLinks(final String html, final String requestHost) {
    Document document = Jsoup.parse(html);
    Elements links = document.select("a[href]");
    Elements metas = document.select("meta[content]");

    updateAttribute(links, "href", requestHost);
    updateAttribute(metas, "content", requestHost);

    return document.toString();
}
 
Example 5
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testInvalidTableContents() throws IOException {
    File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
    Document doc = Jsoup.parse(in, "UTF-8");
    doc.outputSettings().prettyPrint(true);
    String rendered = doc.toString();
    int endOfEmail = rendered.indexOf("Comment");
    int guarantee = rendered.indexOf("Why am I here?");
    assertTrue("Comment not found", endOfEmail > -1);
    assertTrue("Search text not found", guarantee > -1);
    assertTrue("Search text did not come after comment", guarantee > endOfEmail);
}
 
Example 6
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test
public void testInvalidTableContents() throws IOException {
    File in = ParseTest.getFile("/htmltests/table-invalid-elements.html");
    Document doc = Jsoup.parse(in, "UTF-8");
    doc.outputSettings().prettyPrint(true);
    String rendered = doc.toString();
    int endOfEmail = rendered.indexOf("Comment");
    int guarantee = rendered.indexOf("Why am I here?");
    assertTrue("Comment not found", endOfEmail > -1);
    assertTrue("Search text not found", guarantee > -1);
    assertTrue("Search text did not come after comment", guarantee > endOfEmail);
}
 
Example 7
Source File: DownloadPostFragment.java    From Instagram-Profile-Downloader with MIT License 4 votes vote down vote up
@Override
        protected String doInBackground(String... f_url) {
            try {

                Document doc = Jsoup.connect(f_url[0]).get();
                String html = doc.toString();

                type = false;

                //for caption
                int indexcaption = html.indexOf("edge_media_to_caption");
                indexcaption += 48;

                int startCaption = html.indexOf("\"", indexcaption);
                startCaption += 1;
                int endCaption = html.indexOf("\"", startCaption);

                String strCaption = null;
                strCaption = html.substring(startCaption, endCaption);

                //setting caption flag=0 for caption flag=1 for vid flag=2 for image
                publishProgress("0", strCaption);

                //for video
                int indexVid = html.indexOf("\"video_url\"");
                indexVid += 11;
                int startVid = html.indexOf("\"", indexVid);
                startVid += 1;
                int endVid = html.indexOf("\"", startVid);

                String urlVid = null;
                urlVid = html.substring(startVid, endVid);

                if (!urlVid.equalsIgnoreCase("en")) {
                    // it is a vid show play btn
                    type = true;
                }

                //for image url
                int index = html.indexOf("display_url");
                index += 13;
                int start = html.indexOf("\"", index);
                start += 1;
                int end = html.indexOf("\"", start);
                //                System.out.println("start:"+start+ "end:"+ end);
                String urlImage = html.substring(start, end);


//                Bitmap mIcon11 = null;
//                try {
//                    InputStream in = new java.net.URL(urlImage).openStream();
//                    mIcon11 = BitmapFactory.decodeStream(in);
//                } catch (Exception e) {
//                    Log.e("Error", e.getMessage());
//                    e.printStackTrace();
//                }
//                return mIcon11;
                return urlImage;

            } catch (Exception e) {
                Log.e("Error: ", e.getMessage());
            }

            return null;
        }
 
Example 8
Source File: DownloadIGTVFragment.java    From Instagram-Profile-Downloader with MIT License 4 votes vote down vote up
@Override
protected String doInBackground(String... f_url) {
    try {

        Document doc = Jsoup.connect(f_url[0]).get();
        String html = doc.toString();

        type = false;

        //for caption
        int indexcaption = html.indexOf("edge_media_to_caption");
        indexcaption += 48;

        int startCaption = html.indexOf("\"", indexcaption);
        startCaption += 1;
        int endCaption = html.indexOf("\"", startCaption);

        String strCaption = null;
        strCaption = html.substring(startCaption, endCaption);

        //setting caption flag=0 for caption flag=1 for vid flag=2 for image
        publishProgress("0", strCaption);

        //for video
        int indexVid = html.indexOf("\"video_url\"");
        indexVid += 11;
        int startVid = html.indexOf("\"", indexVid);
        startVid += 1;
        int endVid = html.indexOf("\"", startVid);

        String urlVid = null;
        urlVid = html.substring(startVid, endVid);

        if (!urlVid.equalsIgnoreCase("en")) {
            // it is a vid show play btn
            type = true;
        }

        //for image url
        int index = html.indexOf("display_url");
        index += 13;
        int start = html.indexOf("\"", index);
        start += 1;
        int end = html.indexOf("\"", start);
        //                System.out.println("start:"+start+ "end:"+ end);
        String urlImage = html.substring(start, end);

        return urlImage;

    } catch (Exception e) {
        Log.e("Error: ", e.getMessage());
    }

    return null;
}
 
Example 9
Source File: HtmlUtil.java    From V2EX with GNU General Public License v3.0 4 votes vote down vote up
public static String applyHtmlStyle(String html, Context context){

        TypedValue typedColor = new TypedValue();
        context.getTheme().resolveAttribute(R.attr.attr_color_text, typedColor, true);
        int txtColor = typedColor.data;
        context.getTheme().resolveAttribute(R.attr.attr_color_text_link, typedColor, true);
        int linkColor = typedColor.data;
        context.getTheme().resolveAttribute(R.attr.attr_color_accent, typedColor, true);
        int codeColor = typedColor.data;
        context.getTheme().resolveAttribute(R.attr.attr_color_text_secondary, typedColor, true);
        int codeBackground = typedColor.data;

        String textColorStr = "#" +
                Integer.toHexString(Color.red(txtColor)) +
                Integer.toHexString(Color.green(txtColor)) +
                Integer.toHexString(Color.blue(txtColor));
        String linkColorStr  = "#" +
                Integer.toHexString(Color.red(linkColor)) +
                Integer.toHexString(Color.green(linkColor)) +
                Integer.toHexString(Color.blue(linkColor));
        String codeColorStr  = "#" +
                Integer.toHexString(Color.red(codeColor)) +
                Integer.toHexString(Color.green(codeColor)) +
                Integer.toHexString(Color.blue(codeColor));
        String codeBackgroundStr  = "#" +
                Integer.toHexString(Color.red(codeBackground)) +
                Integer.toHexString(Color.green(codeBackground)) +
                Integer.toHexString(Color.blue(codeBackground));

        if (html == null || html.equals("")){
            return "";
        }
        Document document = Jsoup.parse(html);
        document.head()
                .append(
                "<style type=\"text/css\">" +
                "body{width:95%;}" +
                        "* {" +
                "   color:" + textColorStr + ";" +
                "}" +
                "a {" +
                "   color:" + linkColorStr + ";" +
                        "word-wrap:break-word;" +
                "}" +
                "code,pre {" +
                "    color: " + codeColorStr + ";" +
                "    background: " + codeBackgroundStr + ";" +
                "    padding: 3px;" +
                "    border-radius: 5px;" +
                        "word-wrap:normal;" +
                "} img { border:1px solid grey;}" +
                "</style>");
        document.head()
                .append("<meta name=\"content-type\" content=\"text/html; charset=utf-8\">" +
                        "<meta http-equlv=\"Content-Type\" content=\"text/html;charset=utf-8\">");
        for (Element img:document.select("img")){
            img.attr("width","100%");
            img.attr("height","auto");
        }
        document.charset(Charset.forName("utf-8"));
        return document.toString();
    }
 
Example 10
Source File: RenderHelpler.java    From jboot with Apache License 2.0 4 votes vote down vote up
public static String processCDN(String content, String domain) {
    if (StrUtil.isBlank(content)) {
        return content;
    }


    Document doc = Jsoup.parse(content);

    Elements jsElements = doc.select("script[src]");
    replace(jsElements, "src", domain);

    Elements imgElements = doc.select("img[src]");
    replace(imgElements, "src", domain);


    Elements linkElements = doc.select("link[href]");
    replace(linkElements, "href", domain);

    return doc.toString();

}
 
Example 11
Source File: HtmlBeautifier.java    From cute-proxy with BSD 2-Clause "Simplified" License 4 votes vote down vote up
@Override
public String beautify(String s, Charset charset) {
    Document doc = Jsoup.parse(s);
    doc.outputSettings().indentAmount(4);
    return doc.toString();
}