Java Code Examples for org.jsoup.nodes.Document#outerHtml()

The following examples show how to use org.jsoup.nodes.Document#outerHtml() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: GoogleRecaptchaVerifyPresenter.java    From v9porn with MIT License 6 votes vote down vote up
private String injectJs(String oldHtml) {
    if (TextUtils.isEmpty(oldHtml)) {
        return "";
    }
    Document doc = Jsoup.parse(oldHtml);
    doc.head().append("<script type=\"text/javascript\">\n" +
            "        function getPostData() {\n" +
            "            let recaptcha = document.getElementById(\"g-recaptcha-response\").value;\n" +
            "            if (!recaptcha || recaptcha === '') {\n" +
            "                recaptcha = document.getElementById(\"g-recaptcha-response\").innerHTML;\n" +
            "            }\n" +
            "            const action = document.getElementById('challenge-form').getAttribute(\"action\");\n" +
            "            const r = document.getElementsByName(\"r\")[0].getAttribute(\"value\");\n" +
            "            const id = document.getElementById('id').getAttribute(\"value\");\n" +
            "            return action + \",\" + r + \",\" + id + \",\" + recaptcha;\n" +
            "        }\n" +
            "    </script>");

    String html = doc.outerHtml();
    Log.d(TAG, "JS注入完成");
    return html;
}
 
Example 2
Source File: GoogleRecaptchaVerifyPresenter.java    From v9porn with MIT License 6 votes vote down vote up
private String injectJs(String oldHtml) {
    if (TextUtils.isEmpty(oldHtml)) {
        return "";
    }
    Document doc = Jsoup.parse(oldHtml);
    doc.head().append("<script type=\"text/javascript\">\n" +
            "        function getPostData() {\n" +
            "            let recaptcha = document.getElementById(\"g-recaptcha-response\").value;\n" +
            "            if (!recaptcha || recaptcha === '') {\n" +
            "                recaptcha = document.getElementById(\"g-recaptcha-response\").innerHTML;\n" +
            "            }\n" +
            "            const action = document.getElementById('challenge-form').getAttribute(\"action\");\n" +
            "            const r = document.getElementsByName(\"r\")[0].getAttribute(\"value\");\n" +
            "            const id = document.getElementById('id').getAttribute(\"value\");\n" +
            "            return action + \",\" + r + \",\" + id + \",\" + recaptcha;\n" +
            "        }\n" +
            "    </script>");

    String html = doc.outerHtml();
    Log.d(TAG, "JS注入完成");
    return html;
}
 
Example 3
Source File: VkRipper.java    From ripme with MIT License 6 votes vote down vote up
public static String getVideoURLAtPage(String url) throws IOException {
    Document doc = Http.url(url)
                       .userAgent(USER_AGENT)
                       .get();
    String html = doc.outerHtml();
    String videoURL = null;
    for (String quality : new String[] {"1080", "720", "480", "240"}) {
        quality = "url" + quality + "\\\":\\\"";
        if (html.contains(quality)) {
            videoURL = html.substring(html.indexOf(quality) + quality.length());
            videoURL = videoURL.substring(0, videoURL.indexOf("\""));
            videoURL = videoURL.replace("\\", "");
            break;
        }
    }
    if (videoURL == null) {
        throw new IOException("Could not find video URL at " + url);
    }
    return videoURL;
}
 
Example 4
Source File: JsoupBasedFormatter.java    From formatter-maven-plugin with Apache License 2.0 6 votes vote down vote up
@Override
public String doFormat(String code, LineEnding ending) {
    Document document;
    switch (formatter.syntax()) {
    case html:
        document = Jsoup.parse(code, "", Parser.htmlParser());
        break;
    case xml:
        document = Jsoup.parse(code, "", Parser.xmlParser());
        break;
    default:
        throw new IllegalArgumentException(formatter.syntax() + " is not allowed as syntax");
    }
    document.outputSettings(formatter);

    String formattedCode = document.outerHtml();
    if (code.equals(formattedCode)) {
        return null;
    }
    return formattedCode;
}
 
Example 5
Source File: JsoupAttachImageInliner.java    From ogham with Apache License 2.0 6 votes vote down vote up
@Override
public ContentWithImages inline(String htmlContent, List<ImageResource> images) {
	Document doc = Jsoup.parse(htmlContent);
	List<Attachment> attachments = new ArrayList<>(images.size());
	for (ImageResource image : images) {
		// search all images in the HTML with the provided path or URL that
		// are not skipped
		Elements imgs = getImagesToAttach(doc, image);
		if (!imgs.isEmpty()) {
			String contentId = idGenerator.generate(image.getName());
			// generate attachment
			Attachment attachment = new Attachment(new ByteResource(image.getName(), image.getContent()), null, INLINE, format(CONTENT_ID, contentId));
			// update the HTML to use the generated content id instead of
			// the path or URL
			for (Element img : imgs) {
				img.attr(SRC_ATTR, format(SRC_VALUE, contentId));
				img.attr(INLINED_ATTR, true);
			}
			attachments.add(attachment);
		}
	}
	return new ContentWithImages(doc.outerHtml(), attachments);
}
 
Example 6
Source File: CheckUtils.java    From Natty with GNU General Public License v3.0 5 votes vote down vote up
public static String stripBody(Post post) {
    String body = post.getBody();
    Document doc = Jsoup.parse("<body>"+body+"</body>");
    doc.getElementsByTag("a").remove();
    doc.getElementsByTag("code").remove();
    doc.getElementsByTag("img").remove();
    doc.getElementsByTag("pre").remove();
    doc.getElementsByTag("blockquote").remove();
    return doc.outerHtml();
}
 
Example 7
Source File: Rgaa3Extractor.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
private static void createTestcaseFiles() throws IOException {
    File srcDir = new File(RGAA3_TESTCASE_PATH);
    for (File file : srcDir.listFiles()) {
        String fileName = file.getName().replace("Rgaa30Rule", "").replace(".java", "");
        String theme = fileName.substring(0, 2);
        String crit = fileName.substring(2, 4);
        String test = fileName.substring(4, 6);
        String testKey = Integer.valueOf(theme).toString()+"-"+Integer.valueOf(crit).toString()+"-"+Integer.valueOf(test).toString();
        String wrongKey = theme+"."+crit+"."+test;
        for (File testcase : file.listFiles()) {
            if (testcase.isFile() && testcase.getName().contains("html")) {
                Document doc = Jsoup.parse(FileUtils.readFileToString(testcase));
                Element detail = doc.select(".test-detail").first();
                if (detail == null) {
                    System.out.println(doc.outerHtml());
                } else {
                    detail.tagName("div");
                    detail.text("");
                    for (Element el : detail.children()) {
                        el.remove();
                    }
                    if (!detail.hasAttr("lang")) {
                        detail.attr("lang", "fr");
                    }
                    detail.append("\n"+RGAA3.get(testKey).ruleRawHtml+"\n");
                    doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
                    doc.outputSettings().outline(false);
                    doc.outputSettings().indentAmount(4);
                    String outputHtml = doc.outerHtml();
                    if (outputHtml.contains(wrongKey)) {
                        outputHtml = outputHtml.replaceAll(wrongKey, RGAA3.get(testKey).getRuleDot());
                    }
                    FileUtils.writeStringToFile(testcase, outputHtml);
                }
            }
        }
    }
}
 
Example 8
Source File: HTMLJsoupCleanerImpl.java    From Asqatasun with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
 public void run() {
     dirtyHTML = removeBadNamespaceDefinition(dirtyHTML);
     Document doc = Jsoup.parse(dirtyHTML);
     doc.outputSettings().escapeMode(Entities.EscapeMode.xhtml);
     doc.outputSettings().outline(true);
     doc.outputSettings().indentAmount(2);
     removeComments(doc);
     removeMalformedAttributes(doc);
     result = doc.outerHtml();
}
 
Example 9
Source File: JsoupCssInliner.java    From ogham with Apache License 2.0 5 votes vote down vote up
@Override
public String inline(String htmlContent, List<ExternalCss> cssContents) {
	Document doc = Jsoup.parse(htmlContent);

	internStyles(doc, cssContents);
	String stylesheet = fetchStyles(doc);
	extractStyles(doc, stylesheet);
	applyStyles(doc);

	return doc.outerHtml();
}
 
Example 10
Source File: JsoupBase64ImageInliner.java    From ogham with Apache License 2.0 5 votes vote down vote up
@Override
public ContentWithImages inline(String htmlContent, List<ImageResource> images) {
	Document doc = Jsoup.parse(htmlContent);
	for (ImageResource image : images) {
		Elements imgs = getImagesToInline(doc, image);
		for (Element img : imgs) {
			img.attr(SRC_ATTR, MessageFormat.format(BASE64_URI, image.getMimetype(), Base64Utils.encodeToString(image.getContent())));
			img.attr(INLINED_ATTR, true);
		}
	}
	return new ContentWithImages(doc.outerHtml(), new ArrayList<Attachment>(0));
}
 
Example 11
Source File: BrowserWatchFilter.java    From wisdom with Apache License 2.0 5 votes vote down vote up
private String addJavascript(String content) {
	Document sourcePage = Jsoup.parse(content);
	sourcePage.body().lastElementSibling().after(
			String.format("<script src=\"%s\"></script>", "/assets/javascript/browserWatch.js")
			);
	return sourcePage.outerHtml();
}
 
Example 12
Source File: DomDistance.java    From apogen with Apache License 2.0 4 votes vote down vote up
public static String cleanDomFromText(File f) throws IOException {

		Document d = Jsoup.parse(f, null);

		stringsToRemove = new LinkedList<String>();

		// System.out.println(d);

		getStringsToRemove(d.getAllElements());

		// System.err.println(stringsToRemove);

		String domToString = d.outerHtml();

		for (String s : stringsToRemove) {
			domToString = domToString.replace(s, "");
		}

		// System.out.println(domToString);

		return domToString;
	}
 
Example 13
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test public void testNormalisesIsIndex() {
    Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
    String html = doc.outerHtml();
    assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
            StringUtil.normaliseWhitespace(doc.body().html()));
}
 
Example 14
Source File: HtmlParserTest.java    From astor with GNU General Public License v2.0 4 votes vote down vote up
@Test public void testNormalisesIsIndex() {
    Document doc = Jsoup.parse("<body><isindex action='/submit'></body>");
    String html = doc.outerHtml();
    assertEquals("<form action=\"/submit\"> <hr> <label>This is a searchable index. Enter search keywords: <input name=\"isindex\"></label> <hr> </form>",
            StringUtil.normaliseWhitespace(doc.body().html()));
}
 
Example 15
Source File: CssInlineUtils.java    From ogham with Apache License 2.0 3 votes vote down vote up
/**
 * Remove attributes that are used only by Ogham:
 * <ul>
 * <li>{@link CssInlinerConstants#INLINE_MODE_ATTR}</li>
 * <li>{@link CssInlinerConstants#INLINED_ATTR}</li>
 * </ul>
 * 
 * @param html
 *            the html to clean
 * @return the cleaned html
 */
public static String removeOghamAttributes(String html) {
	Document doc = Jsoup.parse(html);
	Elements nodes = doc.select("["+INLINE_MODE_ATTR+"], ["+INLINED_ATTR+"]");
	for (Element node : nodes) {
		node.removeAttr(INLINE_MODE_ATTR);
		node.removeAttr(INLINED_ATTR);
	}
	return doc.outerHtml();
}
 
Example 16
Source File: ImageInlineUtils.java    From ogham with Apache License 2.0 3 votes vote down vote up
/**
 * Remove attributes that are used only by Ogham:
 * <ul>
 * <li>{@link ImageInlinerConstants#INLINE_MODE_ATTR}</li>
 * <li>{@link ImageInlinerConstants#INLINED_ATTR}</li>
 * </ul>
 * 
 * @param html
 *            the html to clean
 * @return the cleaned html
 */
public static String removeOghamAttributes(String html) {
	Document doc = Jsoup.parse(html);
	Elements imgs = doc.select("img");
	for (Element img : imgs) {
		img.removeAttr(INLINE_MODE_ATTR);
		img.removeAttr(INLINED_ATTR);
	}
	return doc.outerHtml();
}