Java Code Examples for org.jsoup.nodes.Document#outerHtml()

The following examples show how to use org.jsoup.nodes.Document#outerHtml() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: v9porn   File: GoogleRecaptchaVerifyPresenter.java    License: MIT License 6 votes vote down vote up
private String injectJs(String oldHtml) {
    if (TextUtils.isEmpty(oldHtml)) {
        return "";
    }
    Document doc = Jsoup.parse(oldHtml);
    doc.head().append("<script type=\"text/javascript\">\n" +
            "        function getPostData() {\n" +
            "            let recaptcha = document.getElementById(\"g-recaptcha-response\").value;\n" +
            "            if (!recaptcha || recaptcha === '') {\n" +
            "                recaptcha = document.getElementById(\"g-recaptcha-response\").innerHTML;\n" +
            "            }\n" +
            "            const action = document.getElementById('challenge-form').getAttribute(\"action\");\n" +
            "            const r = document.getElementsByName(\"r\")[0].getAttribute(\"value\");\n" +
            "            const id = document.getElementById('id').getAttribute(\"value\");\n" +
            "            return action + \",\" + r + \",\" + id + \",\" + recaptcha;\n" +
            "        }\n" +
            "    </script>");

    String html = doc.outerHtml();
    Log.d(TAG, "JS注入完成");
    return html;
}
 
Example 2
Source Project: v9porn   File: GoogleRecaptchaVerifyPresenter.java    License: MIT License 6 votes vote down vote up
private String injectJs(String oldHtml) {
    if (TextUtils.isEmpty(oldHtml)) {
        return "";
    }
    Document doc = Jsoup.parse(oldHtml);
    doc.head().append("<script type=\"text/javascript\">\n" +
            "        function getPostData() {\n" +
            "            let recaptcha = document.getElementById(\"g-recaptcha-response\").value;\n" +
            "            if (!recaptcha || recaptcha === '') {\n" +
            "                recaptcha = document.getElementById(\"g-recaptcha-response\").innerHTML;\n" +
            "            }\n" +
            "            const action = document.getElementById('challenge-form').getAttribute(\"action\");\n" +
            "            const r = document.getElementsByName(\"r\")[0].getAttribute(\"value\");\n" +
            "            const id = document.getElementById('id').getAttribute(\"value\");\n" +
            "            return action + \",\" + r + \",\" + id + \",\" + recaptcha;\n" +
            "        }\n" +
            "    </script>");

    String html = doc.outerHtml();
    Log.d(TAG, "JS注入完成");
    return html;
}
 
Example 3
Source Project: ripme   File: VkRipper.java    License: MIT License 6 votes vote down vote up
public static String getVideoURLAtPage(String url) throws IOException {
    Document doc = Http.url(url)
                       .userAgent(USER_AGENT)
                       .get();
    String html = doc.outerHtml();
    String videoURL = null;
    for (String quality : new String[] {"1080", "720", "480", "240"}) {
        quality = "url" + quality + "\\\":\\\"";
        if (html.contains(quality)) {
            videoURL = html.substring(html.indexOf(quality) + quality.length());
            videoURL = videoURL.substring(0, videoURL.indexOf("\""));
            videoURL = videoURL.replace("\\", "");
            break;
        }
    }
    if (videoURL == null) {
        throw new IOException("Could not find video URL at " + url);
    }
    return videoURL;
}
 
Example 4
@Override
public String doFormat(String code, LineEnding ending) {
    Document document;
    switch (formatter.syntax()) {
    case html:
        document = Jsoup.parse(code, "", Parser.htmlParser());
        break;
    case xml:
        document = Jsoup.parse(code, "", Parser.xmlParser());
        break;
    default:
        throw new IllegalArgumentException(formatter.syntax() + " is not allowed as syntax");
    }
    document.outputSettings(formatter);

    String formattedCode = document.outerHtml();
    if (code.equals(formattedCode)) {
        return null;
    }
    return formattedCode;
}
 
Example 5
Source Project: ogham   File: JsoupAttachImageInliner.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public ContentWithImages inline(String htmlContent, List<ImageResource> images) {
	Document doc = Jsoup.parse(htmlContent);
	List<Attachment> attachments = new ArrayList<>(images.size());
	for (ImageResource image : images) {
		// search all images in the HTML with the provided path or URL that
		// are not skipped
		Elements imgs = getImagesToAttach(doc, image);
		if (!imgs.isEmpty()) {
			String contentId = idGenerator.generate(image.getName());
			// generate attachment
			Attachment attachment = new Attachment(new ByteResource(image.getName(), image.getContent()), null, INLINE, format(CONTENT_ID, contentId));
			// update the HTML to use the generated content id instead of
			// the path or URL
			for (Element img : imgs) {
				img.attr(SRC_ATTR, format(SRC_VALUE, contentId));
				img.attr(INLINED_ATTR, true);
			}
			attachments.add(attachment);
		}
	}
	return new ContentWithImages(doc.outerHtml(), attachments);
}
 
Example 6
Source Project: Natty   File: CheckUtils.java    License: GNU General Public License v3.0 5 votes vote down vote up
public static String stripBody(Post post) {
    String body = post.getBody();
    Document doc = Jsoup.parse("<body>"+body+"</body>");
    doc.getElementsByTag("a").remove();
    doc.getElementsByTag("code").remove();
    doc.getElementsByTag("img").remove();
    doc.getElementsByTag("pre").remove();
    doc.getElementsByTag("blockquote").remove();
    return doc.outerHtml();
}
 
Example 7
private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString()+"-"+Integer.valueOf(crit).toString()+"-"+Integer.valueOf(test).toString();
        String wrongKey = theme+"."+crit+"."+test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n"+RGAA3.get(testKey).ruleRawHtml+"\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}
 
Example 8
@Override
 public void run() {
     dirtyHTML = removeBadNamespaceDefinition(dirtyHTML);
     Document doc = Jsoup.parse(dirtyHTML);
     doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
     doc.outputSettings().outline(true);
     doc.outputSettings().indentAmount(2);
     removeComments(doc);
     removeMalformedAttributes(doc);
     result = doc.outerHtml();
}
 
Example 9
Source Project: ogham   File: JsoupCssInliner.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String inline(String htmlContent, List<ExternalCss> cssContents) {
	Document doc = Jsoup.parse(htmlContent);

	internStyles(doc, cssContents);
	String stylesheet = fetchStyles(doc);
	extractStyles(doc, stylesheet);
	applyStyles(doc);

	return doc.outerHtml();
}
 
Example 10
Source Project: ogham   File: JsoupBase64ImageInliner.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public ContentWithImages inline(String htmlContent, List<ImageResource> images) {
	Document doc = Jsoup.parse(htmlContent);
	for (ImageResource image : images) {
		Elements imgs = getImagesToInline(doc, image);
		for (Element img : imgs) {
			img.attr(SRC_ATTR, MessageFormat.format(BASE64_URI, image.getMimetype(), Base64Utils.encodeToString(image.getContent())));
			img.attr(INLINED_ATTR, true);
		}
	}
	return new ContentWithImages(doc.outerHtml(), new ArrayList<Attachment>(0));
}
 
Example 11
Source Project: wisdom   File: BrowserWatchFilter.java    License: Apache License 2.0 5 votes vote down vote up
private String addJavascript(String content) {
	Document sourcePage = Jsoup.parse(content);
	sourcePage.body().lastElementSibling().after(
			String.format("<script src=\"%s\"></script>", "/assets/javascript/browserWatch.js")
			);
	return sourcePage.outerHtml();
}
 
Example 12
Source Project: apogen   File: DomDistance.java    License: Apache License 2.0 4 votes vote down vote up
public static String cleanDomFromText(File f) throws IOException {

		Document d = Jsoup.parse(f, null);

		stringsToRemove = new LinkedList<String>();

		// System.out.println(d);

		getStringsToRemove(d.getAllElements());

		// System.err.println(stringsToRemove);

		String domToString = d.outerHtml();

		for (String s : stringsToRemove) {
			domToString = domToString.replace(s, "");
		}

		// System.out.println(domToString);

		return domToString;
	}
 
Example 13
@Test public void testNormalisesIsIndex() {
    Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
    String html = doc.outerHtml();
    assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
            StringUtil.normaliseWhitespace(doc.body().html()));
}
 
Example 14
@Test public void testNormalisesIsIndex() {
    Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
    String html = doc.outerHtml();
    assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
            StringUtil.normaliseWhitespace(doc.body().html()));
}
 
Example 15
Source Project: ogham   File: CssInlineUtils.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Remove attributes that are used only by Ogham:
 * <ul>
 * <li>{@link CssInlinerConstants#INLINE_MODE_ATTR}</li>
 * <li>{@link CssInlinerConstants#INLINED_ATTR}</li>
 * </ul>
 * 
 * @param html
 *            the html to clean
 * @return the cleaned html
 */
public static String removeOghamAttributes(String html) {
	Document doc = Jsoup.parse(html);
	Elements nodes = doc.select("["+INLINE_MODE_ATTR+"], ["+INLINED_ATTR+"]");
	for (Element node : nodes) {
		node.removeAttr(INLINE_MODE_ATTR);
		node.removeAttr(INLINED_ATTR);
	}
	return doc.outerHtml();
}
 
Example 16
Source Project: ogham   File: ImageInlineUtils.java    License: Apache License 2.0 3 votes vote down vote up
/**
 * Remove attributes that are used only by Ogham:
 * <ul>
 * <li>{@link ImageInlinerConstants#INLINE_MODE_ATTR}</li>
 * <li>{@link ImageInlinerConstants#INLINED_ATTR}</li>
 * </ul>
 * 
 * @param html
 *            the html to clean
 * @return the cleaned html
 */
public static String removeOghamAttributes(String html) {
	Document doc = Jsoup.parse(html);
	Elements imgs = doc.select("img");
	for (Element img : imgs) {
		img.removeAttr(INLINE_MODE_ATTR);
		img.removeAttr(INLINED_ATTR);
	}
	return doc.outerHtml();
}