Java Code Examples for org.jsoup.nodes.Document#html()

The following examples show how to use org.jsoup.nodes.Document#html() . You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: EmailServiceImpl.java    From graviteeio-access-management with Apache License 2.0 7 votes vote down vote up
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception {
    final Document document = Jsoup.parse(htmlText);

    final List<String> resources = new ArrayList<>();

    final Elements imageElements = document.getElementsByTag("img");
    resources.addAll(imageElements.stream()
            .filter(imageElement -> imageElement.hasAttr("src"))
            .filter(imageElement -> !imageElement.attr("src").startsWith("http"))
            .map(imageElement -> {
                final String src = imageElement.attr("src");
                imageElement.attr("src", "cid:" + src);
                return src;
            })
            .collect(Collectors.toList()));

    final String html = document.html();
    mailMessage.setText(html, true);

    for (final String res : resources) {
        final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
        mailMessage.addInline(res, templateResource, getContentTypeByFileName(res));
    }

    return html;
}
 
Example 2
Source File: HtmlHelper.java    From FairEmail with GNU General Public License v3.0 6 votes vote down vote up
static String toHtml(Spanned spanned, Context context) {
    HtmlEx converter = new HtmlEx(context);
    String html = converter.toHtml(spanned, TO_HTML_PARAGRAPH_LINES_CONSECUTIVE);

    // @Google: why convert size to and from in a different way?
    Document doc = JsoupEx.parse(html);
    for (Element element : doc.select("span")) {
        String style = element.attr("style");
        if (style.startsWith("font-size:")) {
            int colon = style.indexOf(':');
            int semi = style.indexOf("em;", colon);
            if (semi > colon)
                try {
                    String hsize = style.substring(colon + 1, semi).replace(',', '.');
                    float size = Float.parseFloat(hsize);
                    element.tagName(size < 1.0f ? "small" : "big");
                    element.attributes().remove("style");
                } catch (NumberFormatException ex) {
                    Log.e(ex);
                }
        }
    }

    return doc.html();
}
 
Example 3
Source File: CSSReverter.java    From BlogManagePlatform with Apache License 2.0 6 votes vote down vote up
/**
 * 将html中外联的css变成内联,并去掉外联样式
 * @author Frodez
 * @date 2019-03-21
 */
@Override
public String revert(String html) {
	Assert.notNull(html, "html must not be null");
	try {
		Document document = Jsoup.parse(html);
		Elements links = document.select("link[href]");
		Elements htmlElement = document.select("html");
		for (Element iter : links) {
			String path = iter.attr("href");
			if (!path.endsWith(".css")) {
				continue;
			}
			htmlElement.prepend(StrUtil.concat("<style type=\"text/css\">", FileUtil.readString(ResourceUtils
				.getFile(StrUtil.concat(FreemarkerRender.getLoaderPath(), path))), "</style>"));
		}
		links.remove();
		return document.html();
	} catch (Exception e) {
		log.error("[frodez.util.renderer.reverter.CSSReverter.revert]", e);
		return html;
	}
}
 
Example 4
Source File: EmailServiceImpl.java    From graviteeio-access-management with Apache License 2.0 6 votes vote down vote up
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception {
    final Document document = Jsoup.parse(htmlText);

    final List<String> resources = new ArrayList<>();

    final Elements imageElements = document.getElementsByTag("img");
    resources.addAll(imageElements.stream()
            .filter(imageElement -> imageElement.hasAttr("src"))
            .filter(imageElement -> !imageElement.attr("src").startsWith("http"))
            .map(imageElement -> {
                final String src = imageElement.attr("src");
                imageElement.attr("src", "cid:" + src);
                return src;
            })
            .collect(Collectors.toList()));

    final String html = document.html();
    mailMessage.setText(html, true);

    for (final String res : resources) {
        final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
        mailMessage.addInline(res, templateResource, getContentTypeByFileName(res));
    }

    return html;
}
 
Example 5
Source File: HTMLUtil.java    From arma-intellij-plugin with MIT License 6 votes vote down vote up
public boolean save(@NotNull Document document) throws Exception {
	convertURLsToBase(document);
	String documentAsString = document.html();

	if (documentAsString.length() <= skipLength) {
		System.out.println("Skipping:" + url);
		return false;
	}

	File outputFile = new File((basePath != null ? basePath + "/" : "") + saveName);
	outputFile.createNewFile();
	System.out.println("Saving " + url + " to " + outputFile);


	FileOutputStream fos = new FileOutputStream(outputFile);
	fos.write(documentAsString.getBytes());
	fos.flush();
	fos.close();

	if (latch != null) {
		latch.countDown();
	}

	return true;
}
 
Example 6
Source File: EmailServiceImpl.java    From gravitee-management-rest-api with Apache License 2.0 6 votes vote down vote up
private String addResourcesInMessage(final MimeMessageHelper mailMessage, final String htmlText) throws Exception {
    final Document document = Jsoup.parse(htmlText);

    final List<String> resources = new ArrayList<>();

    final Elements imageElements = document.getElementsByTag("img");
    resources.addAll(imageElements.stream()
            .filter(imageElement -> imageElement.hasAttr("src"))
            .filter(imageElement -> !imageElement.attr("src").startsWith("http"))
            .map(imageElement -> {
                final String src = imageElement.attr("src");
                imageElement.attr("src", "cid:" + src);
                return src;
            })
            .collect(Collectors.toList()));

    final String html = document.html();
    mailMessage.setText(html, true);

    for (final String res : resources) {
        final FileSystemResource templateResource = new FileSystemResource(new File(templatesPath, res));
        mailMessage.addInline(res, templateResource, getContentTypeByFileName(res));
    }

    return html;
}
 
Example 7
Source File: HTMLUtil.java    From arma-intellij-plugin with MIT License 6 votes vote down vote up
public boolean save(@NotNull Document document) throws Exception {
	convertURLsToBase(document);
	String documentAsString = document.html();

	if (documentAsString.length() <= skipLength) {
		System.out.println("Skipping:" + url);
		return false;
	}

	File outputFile = new File((basePath != null ? basePath + "/" : "") + saveName);
	outputFile.createNewFile();
	System.out.println("Saving " + url + " to " + outputFile);


	FileOutputStream fos = new FileOutputStream(outputFile);
	fos.write(documentAsString.getBytes());
	fos.flush();
	fos.close();

	if (latch != null) {
		latch.countDown();
	}

	return true;
}
 
Example 8
Source File: JerryExtractor.java    From web-data-extractor with Apache License 2.0 6 votes vote down vote up
private String parse(String str) {
    Document document = Jsoup.parse(str, "", Parser.xmlParser());
    String result = "";
    switch (outType) {
        case TYPE_TEXT:
            result = document.text();
            break;
        case TYPE_HTML:
            result = document.html();
            break;
        default:
            result = document.text();
            break;
    }
    return result;
}
 
Example 9
Source File: ProfileEditFragment.java    From 4pdaClient-plus with Apache License 2.0 6 votes vote down vote up
private String parseBody(String body) {
    Matcher m = PatternExtensions.compile("br \\/>\\s*(<fieldset>[\\S\\s]*<.form>)").matcher(body);
    if (m.find()) {
        body = "<form>" + m.group(1);
        //body =  + "</form><input type=\"button\" value=\"asdghjk\" onclick=\"jsonElem();\">";
        body = body.replaceAll("<td class=\"row1\" width=\"30%\"><b>О себе:</b>[\\s\\S]*?</td>",
                "<td class=\"row1\" width=\"30%\"><b>О себе</b></td>");
        body = body.replaceAll("<td width=\"30%\" class=\"row1\" style='padding:6px;'><b>Город</b>[\\s\\S]*?</td>",
                "<td class=\"row1\" width=\"30%\" style='padding:6px;'><b>Город</b></td>");
        body = body.replaceAll("legend", "h2").replaceAll("<fieldset>", "<div class=\"field\">").replaceAll("</fieldset>", "</div>");
        Document doc = Jsoup.parse(body);
        doc.select(".formbuttonrow .button").remove();
        doc.select(".formbuttonrow").append("<input type=\"button\" value=\"Сохранить\" onclick=\"jsonElem();\">");
        doc.select("textarea").first().attr("maxlength", "500");
        body = doc.html();
    }
    return body;
}
 
Example 10
Source File: UKTools.java    From youkefu with Apache License 2.0 5 votes vote down vote up
/**
* 处理 对话消息中的图片
* @param message
* @return
*/
  public static String filterChatMessage(String message){
  	Document document = Jsoup.parse(message) ;
  	Elements pngs = document.select("img[src]");
  	for (Element element : pngs) {
  		String imgUrl = element.attr("src");
  		if(imgUrl.indexOf("/res/image") >= 0){
  			element.attr("class", "ukefu-media-image") ;
  		}
  	}
  	return document.html() ;
  }
 
Example 11
Source File: HtmlView.java    From JavaRushTasks with MIT License 5 votes vote down vote up
private String getUpdatedFileContent(List<Vacancy> vacancies) {

        Document document = null;
        try {
            document = getDocument();

            Element templateOriginal = document.getElementsByClass("template").first();
            Element copyTemplate = templateOriginal.clone();
            copyTemplate.removeAttr("style");
            copyTemplate.removeClass("template");
            document.select("tr[class=vacancy]").remove().not("tr[class=vacancy template");

            for (Vacancy vacancy : vacancies) {
                Element localClone = copyTemplate.clone();
                localClone.getElementsByClass("city").first().text(vacancy.getCity());
                localClone.getElementsByClass("companyName").first().text(vacancy.getCompanyName());
                localClone.getElementsByClass("salary").first().text(vacancy.getSalary());
                Element link =localClone.getElementsByTag("a").first();
                link.text(vacancy.getTitle());
                link.attr("href", vacancy.getUrl());

                templateOriginal.before(localClone.outerHtml());
            }
        } catch (IOException e) {
            e.printStackTrace();
            return "Some exception occurred";
        }
        return document.html();
    }
 
Example 12
Source File: SpecificationAction.java    From livingdoc-confluence with GNU General Public License v3.0 5 votes vote down vote up
private String removeUnknownMacroElements(String result) {
    Document document = Jsoup.parse(result);
    document.getElementsByClass("wysiwyg-unknown-macro").stream()
        .filter(element -> element.attr("src").contains("livingdoc"))
        .forEach(element -> element.remove());

    return document.html();
}
 
Example 13
Source File: PrintableDetailField.java    From commcare-android with Apache License 2.0 5 votes vote down vote up
private static String addStyleAttributes(String htmlString) {
    Document graphDoc = Jsoup.parse(htmlString);
    Element htmlNode = graphDoc.getElementsByTag("html").get(0);
    htmlNode.attr("style", "height: 100%");
    Element bodyNode = graphDoc.getElementsByTag("body").get(0);
    bodyNode.attr("style", "height: 90%; margin:0;");
    return graphDoc.html();
}
 
Example 14
Source File: TestPDFDomTree.java    From Pdf2Dom with GNU Lesser General Public License v3.0 5 votes vote down vote up
@Test
public void givenMultiPagePdf_renderOnlyFirstPage_outputHtmlOnlyHasFirstPage() throws Exception
{
    Document htmlDoc = convertWithPageRange(testPath + "3-page-document.pdf", 0, 1);
    String htmlText = htmlDoc.html();

    Assert.assertThat(htmlText, containsString("#1"));

    Assert.assertThat(htmlText, not(containsString("#2")));
    Assert.assertThat(htmlText, not(containsString("#3")));
}