org.jsoup.nodes.Document.OutputSettings Java Examples

The following examples show how to use org.jsoup.nodes.Document.OutputSettings. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: XHTMLDocumentHandler.java    From docx4j-template with Apache License 2.0 6 votes vote down vote up
/**
 * Jsoup.parse(in, charsetName, baseUri)
 */
@Override
public Document handle( InputStream input) throws IOException{
	//获取Jsoup参数
	String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME );
	String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
	//使用Jsoup将html转换成Document对象
	Document doc = Jsoup.parse(input, charsetName, baseUri);
	
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	
	/*
	outputSettings.syntax(syntax)
	outputSettings.charset(charset)
	outputSettings*/
	doc.outputSettings(outputSettings);
	
	//返回Document对象
	return doc;
}
 
Example #2
Source File: JsoupTest.java    From crawler-jsoup-maven with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example #3
Source File: DocumentTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example #4
Source File: DocumentTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example #5
Source File: DocumentTest.java    From astor with GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example #6
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #7
Source File: HTMLSanitizer.java    From document-management-software with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
Example #8
Source File: JsoupBasedFormatter.java    From formatter-maven-plugin with Apache License 2.0 5 votes vote down vote up
@Override
public void init(Map<String, String> options, ConfigurationSource cfg) {
    super.initCfg(cfg);

    formatter = new OutputSettings();
    formatter.charset(Charset.forName(options.getOrDefault("charset", StandardCharsets.UTF_8.name())));
    formatter.escapeMode(EscapeMode.valueOf(options.getOrDefault("escapeMode", EscapeMode.xhtml.name())));
    formatter.indentAmount(Integer.parseInt(options.getOrDefault("indentAmount", "4")));
    formatter.outline(Boolean.parseBoolean(options.getOrDefault("outlineMode", Boolean.TRUE.toString())));
    formatter.prettyPrint(Boolean.parseBoolean(options.getOrDefault("pretty", Boolean.TRUE.toString())));
    formatter.syntax(Syntax.valueOf(options.getOrDefault("syntax", Syntax.html.name())));
}
 
Example #9
Source File: DocumentTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example #10
Source File: DocumentTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example #11
Source File: DocumentTest.java    From astor with GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example #12
Source File: HtmlSampleRulesTest.java    From doov with Apache License 2.0 4 votes vote down vote up
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
Example #13
Source File: HtmlAnyMatchTest.java    From doov with Apache License 2.0 4 votes vote down vote up
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
Example #14
Source File: NewPipeService.java    From SkyTube with GNU General Public License v3.0 4 votes vote down vote up
private String filterHtml(String content) {
    return Jsoup.clean(content, "", Whitelist.basic(), new OutputSettings().prettyPrint(false));
}
 
Example #15
Source File: ZeppelinRDisplay.java    From zeppelin with Apache License 2.0 4 votes vote down vote up
private static RDisplay textDisplay(Element body) {
  // remove HTML tag while preserving whitespaces and newlines
  String text = Jsoup.clean(body.html(), "",
    Whitelist.none(), new OutputSettings().prettyPrint(false));
  return new RDisplay(text, Type.TEXT, Code.SUCCESS);
}
 
Example #16
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 3 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html 内容
 * @param editorTag 评论编辑器标签
 * @return
 */
public String filterTag(HttpServletRequest request,String html,EditorTag editorTag) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(editorTag);
	

	//return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 

	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #17
Source File: DocumentDataPageModContentFactoryImpl.java    From cia with Apache License 2.0 2 votes vote down vote up
@Secured({ "ROLE_ANONYMOUS", "ROLE_USER", "ROLE_ADMIN" })
@Override
public Layout createContent(final String parameters, final MenuBar menuBar, final Panel panel) {
	final VerticalLayout panelContent = createPanelContent();

	final String pageId = getPageId(parameters);

	getDocumentMenuItemFactory().createDocumentMenuBar(menuBar, pageId);

	LabelFactory.createHeader2Label(panelContent, DOCUMENT_DATA);

	final DataContainer<DocumentContentData, String> documentContentDataDataContainer = getApplicationManager()
			.getDataContainer(DocumentContentData.class);

	final List<DocumentContentData> documentContentlist = documentContentDataDataContainer
			.getAllBy(DocumentContentData_.id, pageId);

	if (!documentContentlist.isEmpty()) {

		final Panel formPanel = new Panel();
		formPanel.setSizeFull();

		panelContent.addComponent(formPanel);

		final FormLayout formContent = new FormLayout();
		formPanel.setContent(formContent);

		final String cleanContent = Jsoup.clean(documentContentlist.get(0).getContent(), "", Whitelist.simpleText(),
				new OutputSettings().indentAmount(4));

		final Label htmlContent = new Label(cleanContent, ContentMode.PREFORMATTED);

		formContent.addComponent(htmlContent);

		final DocumentWordCountRequest documentWordCountRequest = new DocumentWordCountRequest();
		documentWordCountRequest.setDocumentId(pageId);
		documentWordCountRequest.setMaxResults(MAX_RESULTS);
		documentWordCountRequest.setSessionId(RequestContextHolder.currentRequestAttributes().getSessionId());
		final DocumentWordCountResponse resp = (DocumentWordCountResponse) getApplicationManager()
				.service(documentWordCountRequest);

		if (resp.getWordCountMap() != null) {
			final Label wordCloud = new Label(createWordCloud(resp.getWordCountMap()), ContentMode.HTML);
			formContent.addComponent(wordCloud);
		}

		panelContent.setExpandRatio(formPanel, ContentRatio.GRID);

	}

	panel.setContent(panelContent);
	getPageActionEventHelper().createPageEvent(ViewAction.VISIT_DOCUMENT_VIEW, ApplicationEventGroup.USER, NAME,
			parameters, pageId);

	return panelContent;

}