Java Code Examples for org.jsoup.nodes.Document.OutputSettings

The following examples show how to use org.jsoup.nodes.Document.OutputSettings. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: docx4j-template   Author: hiwepy   File: XHTMLDocumentHandler.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Jsoup.parse(in, charsetName, baseUri)
 */
@Override
public Document handle( InputStream input) throws IOException{
	//获取Jsoup参数
	String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME );
	String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
	//使用Jsoup将html转换成Document对象
	Document doc = Jsoup.parse(input, charsetName, baseUri);
	
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	
	/*
	outputSettings.syntax(syntax)
	outputSettings.charset(charset)
	outputSettings*/
	doc.outputSettings(outputSettings);
	
	//返回Document对象
	return doc;
}
 
Example #2
Source Project: crawler-jsoup-maven   Author: bluetata   File: JsoupTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example #3
Source Project: astor   Author: SpoonLabs   File: DocumentTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example #4
Source Project: astor   Author: SpoonLabs   File: DocumentTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example #5
Source Project: astor   Author: SpoonLabs   File: DocumentTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example #6
Source Project: bbs   Author: diyhi   File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #7
Source Project: document-management-software   Author: logicaldoc   File: HTMLSanitizer.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
Example #8
Source Project: formatter-maven-plugin   Author: revelc   File: JsoupBasedFormatter.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public void init(Map<String, String> options, ConfigurationSource cfg) {
    super.initCfg(cfg);

    formatter = new OutputSettings();
    formatter.charset(Charset.forName(options.getOrDefault("charset", StandardCharsets.UTF_8.name())));
    formatter.escapeMode(EscapeMode.valueOf(options.getOrDefault("escapeMode", EscapeMode.xhtml.name())));
    formatter.indentAmount(Integer.parseInt(options.getOrDefault("indentAmount", "4")));
    formatter.outline(Boolean.parseBoolean(options.getOrDefault("outlineMode", Boolean.TRUE.toString())));
    formatter.prettyPrint(Boolean.parseBoolean(options.getOrDefault("pretty", Boolean.TRUE.toString())));
    formatter.syntax(Syntax.valueOf(options.getOrDefault("syntax", Syntax.html.name())));
}
 
Example #9
Source Project: astor   Author: SpoonLabs   File: DocumentTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example #10
Source Project: astor   Author: SpoonLabs   File: DocumentTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example #11
Source Project: astor   Author: SpoonLabs   File: DocumentTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example #12
Source Project: doov   Author: doov-io   File: HtmlSampleRulesTest.java    License: Apache License 2.0 4 votes vote down vote up
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
Example #13
Source Project: doov   Author: doov-io   File: HtmlAnyMatchTest.java    License: Apache License 2.0 4 votes vote down vote up
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
Example #14
Source Project: SkyTube   Author: ram-on   File: NewPipeService.java    License: GNU General Public License v3.0 4 votes vote down vote up
private String filterHtml(String content) {
    return Jsoup.clean(content, "", Whitelist.basic(), new OutputSettings().prettyPrint(false));
}
 
Example #15
Source Project: zeppelin   Author: apache   File: ZeppelinRDisplay.java    License: Apache License 2.0 4 votes vote down vote up
private static RDisplay textDisplay(Element body) {
  // remove HTML tag while preserving whitespaces and newlines
  String text = Jsoup.clean(body.html(), "",
    Whitelist.none(), new OutputSettings().prettyPrint(false));
  return new RDisplay(text, Type.TEXT, Code.SUCCESS);
}
 
Example #16
Source Project: bbs   Author: diyhi   File: TextFilterManage.java    License: GNU Affero General Public License v3.0 3 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html 内容
 * @param editorTag 评论编辑器标签
 * @return
 */
public String filterTag(HttpServletRequest request,String html,EditorTag editorTag) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(editorTag);
	

	//return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 

	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #17
Source Project: cia   Author: Hack23   File: DocumentDataPageModContentFactoryImpl.java    License: Apache License 2.0 2 votes vote down vote up
@Secured({ "ROLE_ANONYMOUS", "ROLE_USER", "ROLE_ADMIN" })
@Override
public Layout createContent(final String parameters, final MenuBar menuBar, final Panel panel) {
	final VerticalLayout panelContent = createPanelContent();

	final String pageId = getPageId(parameters);

	getDocumentMenuItemFactory().createDocumentMenuBar(menuBar, pageId);

	LabelFactory.createHeader2Label(panelContent, DOCUMENT_DATA);

	final DataContainer<DocumentContentData, String> documentContentDataDataContainer = getApplicationManager()
			.getDataContainer(DocumentContentData.class);

	final List<DocumentContentData> documentContentlist = documentContentDataDataContainer
			.getAllBy(DocumentContentData_.id, pageId);

	if (!documentContentlist.isEmpty()) {

		final Panel formPanel = new Panel();
		formPanel.setSizeFull();

		panelContent.addComponent(formPanel);

		final FormLayout formContent = new FormLayout();
		formPanel.setContent(formContent);

		final String cleanContent = Jsoup.clean(documentContentlist.get(0).getContent(), "", Whitelist.simpleText(),
				new OutputSettings().indentAmount(4));

		final Label htmlContent = new Label(cleanContent, ContentMode.PREFORMATTED);

		formContent.addComponent(htmlContent);

		final DocumentWordCountRequest documentWordCountRequest = new DocumentWordCountRequest();
		documentWordCountRequest.setDocumentId(pageId);
		documentWordCountRequest.setMaxResults(MAX_RESULTS);
		documentWordCountRequest.setSessionId(RequestContextHolder.currentRequestAttributes().getSessionId());
		final DocumentWordCountResponse resp = (DocumentWordCountResponse) getApplicationManager()
				.service(documentWordCountRequest);

		if (resp.getWordCountMap() != null) {
			final Label wordCloud = new Label(createWordCloud(resp.getWordCountMap()), ContentMode.HTML);
			formContent.addComponent(wordCloud);
		}

		panelContent.setExpandRatio(formPanel, ContentRatio.GRID);

	}

	panel.setContent(panelContent);
	getPageActionEventHelper().createPageEvent(ViewAction.VISIT_DOCUMENT_VIEW, ApplicationEventGroup.USER, NAME,
			parameters, pageId);

	return panelContent;

}