Java Code Examples for org.jsoup.nodes.Document.OutputSettings

The following examples show how to use org.jsoup.nodes.Document.OutputSettings. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: docx4j-template   Source File: XHTMLDocumentHandler.java    License: Apache License 2.0 6 votes vote down vote up
/**
 * Jsoup.parse(in, charsetName, baseUri)
 */
@Override
public Document handle( InputStream input) throws IOException{
	//获取Jsoup参数
	String charsetName = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_CHARSETNAME, Docx4jConstants.DEFAULT_CHARSETNAME );
	String baseUri = Docx4jProperties.getProperty(Docx4jConstants.DOCX4J_JSOUP_PARSE_BASEURI,"");
	//使用Jsoup将html转换成Document对象
	Document doc = Jsoup.parse(input, charsetName, baseUri);
	
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	
	/*
	outputSettings.syntax(syntax)
	outputSettings.charset(charset)
	outputSettings*/
	doc.outputSettings(outputSettings);
	
	//返回Document对象
	return doc;
}
 
Example 2
Source Project: crawler-jsoup-maven   Source File: JsoupTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example 3
Source Project: astor   Source File: DocumentTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example 4
Source Project: astor   Source File: DocumentTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example 5
Source Project: astor   Source File: DocumentTest.java    License: GNU General Public License v2.0 6 votes vote down vote up
@Test public void testHtmlAndXmlSyntax() {
    String h = "<!DOCTYPE html><body><img async checked='checked' src='&<>\"'>&lt;&gt;&amp;&quot;<foo />bar";
    Document doc = Jsoup.parse(h);

    doc.outputSettings().syntax(Syntax.html);
    assertEquals("<!doctype html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async checked src=\"&amp;<>&quot;\">&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());

    doc.outputSettings().syntax(Document.OutputSettings.Syntax.xml);
    assertEquals("<!DOCTYPE html>\n" +
            "<html>\n" +
            " <head></head>\n" +
            " <body>\n" +
            "  <img async=\"\" checked=\"checked\" src=\"&amp;<>&quot;\" />&lt;&gt;&amp;\"\n" +
            "  <foo />bar\n" +
            " </body>\n" +
            "</html>", doc.html());
}
 
Example 6
Source Project: bbs   Source File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example 7
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
Example 8
@Override
public void init(Map<String, String> options, ConfigurationSource cfg) {
    super.initCfg(cfg);

    formatter = new OutputSettings();
    formatter.charset(Charset.forName(options.getOrDefault("charset", StandardCharsets.UTF_8.name())));
    formatter.escapeMode(EscapeMode.valueOf(options.getOrDefault("escapeMode", EscapeMode.xhtml.name())));
    formatter.indentAmount(Integer.parseInt(options.getOrDefault("indentAmount", "4")));
    formatter.outline(Boolean.parseBoolean(options.getOrDefault("outlineMode", Boolean.TRUE.toString())));
    formatter.prettyPrint(Boolean.parseBoolean(options.getOrDefault("pretty", Boolean.TRUE.toString())));
    formatter.syntax(Syntax.valueOf(options.getOrDefault("syntax", Syntax.html.name())));
}
 
Example 9
Source Project: astor   Source File: DocumentTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example 10
Source Project: astor   Source File: DocumentTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example 11
Source Project: astor   Source File: DocumentTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testHtmlAppendable() {
	String htmlContent = "<html><head><title>Hello</title></head><body><p>One</p><p>Two</p></body></html>";
	Document document = Jsoup.parse(htmlContent);
	OutputSettings outputSettings = new OutputSettings();
	
	outputSettings.prettyPrint(false);
	document.outputSettings(outputSettings);
	assertEquals(htmlContent, document.html(new StringWriter()).toString());
}
 
Example 12
Source Project: doov   Source File: HtmlSampleRulesTest.java    License: Apache License 2.0 4 votes vote down vote up
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
Example 13
Source Project: doov   Source File: HtmlAnyMatchTest.java    License: Apache License 2.0 4 votes vote down vote up
static String format(Context context, Document doc) {
    return "<!-- " + AstVisitorUtils.astToString(context.getRootMetadata(), LOCALE) + " -->\n"
            + doc.outputSettings(new OutputSettings().prettyPrint(true).indentAmount(2)).toString();
}
 
Example 14
Source Project: SkyTube   Source File: NewPipeService.java    License: GNU General Public License v3.0 4 votes vote down vote up
private String filterHtml(String content) {
    return Jsoup.clean(content, "", Whitelist.basic(), new OutputSettings().prettyPrint(false));
}
 
Example 15
Source Project: zeppelin   Source File: ZeppelinRDisplay.java    License: Apache License 2.0 4 votes vote down vote up
private static RDisplay textDisplay(Element body) {
  // remove HTML tag while preserving whitespaces and newlines
  String text = Jsoup.clean(body.html(), "",
    Whitelist.none(), new OutputSettings().prettyPrint(false));
  return new RDisplay(text, Type.TEXT, Code.SUCCESS);
}
 
Example 16
Source Project: bbs   Source File: TextFilterManage.java    License: GNU Affero General Public License v3.0 3 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html 内容
 * @param editorTag 评论编辑器标签
 * @return
 */
public String filterTag(HttpServletRequest request,String html,EditorTag editorTag) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(editorTag);
	

	//return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 

	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example 17
Source Project: cia   Source File: DocumentDataPageModContentFactoryImpl.java    License: Apache License 2.0 2 votes vote down vote up
@Secured({ "ROLE_ANONYMOUS", "ROLE_USER", "ROLE_ADMIN" })
@Override
public Layout createContent(final String parameters, final MenuBar menuBar, final Panel panel) {
	final VerticalLayout panelContent = createPanelContent();

	final String pageId = getPageId(parameters);

	getDocumentMenuItemFactory().createDocumentMenuBar(menuBar, pageId);

	LabelFactory.createHeader2Label(panelContent, DOCUMENT_DATA);

	final DataContainer<DocumentContentData, String> documentContentDataDataContainer = getApplicationManager()
			.getDataContainer(DocumentContentData.class);

	final List<DocumentContentData> documentContentlist = documentContentDataDataContainer
			.getAllBy(DocumentContentData_.id, pageId);

	if (!documentContentlist.isEmpty()) {

		final Panel formPanel = new Panel();
		formPanel.setSizeFull();

		panelContent.addComponent(formPanel);

		final FormLayout formContent = new FormLayout();
		formPanel.setContent(formContent);

		final String cleanContent = Jsoup.clean(documentContentlist.get(0).getContent(), "", Whitelist.simpleText(),
				new OutputSettings().indentAmount(4));

		final Label htmlContent = new Label(cleanContent, ContentMode.PREFORMATTED);

		formContent.addComponent(htmlContent);

		final DocumentWordCountRequest documentWordCountRequest = new DocumentWordCountRequest();
		documentWordCountRequest.setDocumentId(pageId);
		documentWordCountRequest.setMaxResults(MAX_RESULTS);
		documentWordCountRequest.setSessionId(RequestContextHolder.currentRequestAttributes().getSessionId());
		final DocumentWordCountResponse resp = (DocumentWordCountResponse) getApplicationManager()
				.service(documentWordCountRequest);

		if (resp.getWordCountMap() != null) {
			final Label wordCloud = new Label(createWordCloud(resp.getWordCountMap()), ContentMode.HTML);
			formContent.addComponent(wordCloud);
		}

		panelContent.setExpandRatio(formPanel, ContentRatio.GRID);

	}

	panel.setContent(panelContent);
	getPageActionEventHelper().createPageEvent(ViewAction.VISIT_DOCUMENT_VIEW, ApplicationEventGroup.USER, NAME,
			parameters, pageId);

	return panelContent;

}