Java Code Examples for org.jsoup.Jsoup#clean()

The following examples show how to use org.jsoup.Jsoup#clean() . These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: blog-hunter   File: CommonUtil.java    License: MIT License 5 votes vote down vote up
/**
 * 获取真实的网站介绍,最多只保留100个字符
 *
 * @param description 原博客的description
 * @param content     原博客的正文内容
 */
public static String getRealDescription(String description, String content) {
    if (StringUtils.isNotEmpty(description)) {
        return description.replaceAll("\r\n| ", "");
    }
    if (StringUtils.isNotEmpty(content)) {
        content = Jsoup.clean(content.trim(), Whitelist.simpleText());
        return content.length() > 100 ? content.substring(0, 100) : content;
    }
    return null;
}
 
Example 2
@Test public void selfClosingVoidIsNotAnError() {
    String html = "<p>test<br/>test<br/></p>";
    Parser parser = Parser.htmlParser().setTrackErrors(5);
    parser.parseInput(html, "");
    assertEquals(0, parser.getErrors().size());

    assertTrue(Jsoup.isValid(html, Whitelist.basic()));
    String clean = Jsoup.clean(html, Whitelist.basic());
    assertEquals("<p>test<br>test<br></p>", clean);
}
 
Example 3
/**
 * 根据白名单,剔除多余的属性、标签
 *
 * @param xssStr
 * @return
 */
public static String clean(String xssStr) {
    if (null == xssStr || xssStr.isEmpty()) {
        return "";
    }
    return Jsoup.clean(xssStr, custome());
}
 
Example 4
Source Project: Aooms   File: XssFilter.java    License: Apache License 2.0 5 votes vote down vote up
/** 
 * 对用户输入内容进行过滤 
 * @return
 */  
public String filter(String value) {
    if(StrUtil.isNotBlank(value)){
    	return Jsoup.clean(value, user_content_filter);  
    }
    return value;
}
 
Example 5
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void handlesAllPseudoTag() {
    String html = "<p class='foo' src='bar'><a class='qux'>link</a></p>";
    Whitelist whitelist = new Whitelist()
            .addAttributes(":all", "class")
            .addAttributes("p", "style")
            .addTags("p", "a");

    String clean = Jsoup.clean(html, whitelist);
    assertEquals("<p class=\"foo\"><a class=\"qux\">link</a></p>", clean);
}
 
Example 6
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void handlesFramesets() {
    String dirty = "<html><head><script></script><noscript></noscript></head><frameset><frame src=\"foo\" /><frame src=\"foo\" /></frameset></html>";
    String clean = Jsoup.clean(dirty, Whitelist.basic());
    assertEquals("", clean); // nothing good can come out of that

    Document dirtyDoc = Jsoup.parse(dirty);
    Document cleanDoc = new Cleaner(Whitelist.basic()).clean(dirtyDoc);
    assertFalse(cleanDoc == null);
    assertEquals(0, cleanDoc.body().childNodeSize());
}
 
Example 7
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 5 votes vote down vote up
@Test public void testRemoveEnforcedAttributes() {
    String h = "<div><p><A HREF='HTTP://nice.com'>Nice</a></p><blockquote>Hello</blockquote>";
    String cleanHtml = Jsoup.clean(h, Whitelist.basic().removeEnforcedAttribute("a", "rel"));

    assertEquals("<p><a href=\"http://nice.com\">Nice</a></p><blockquote>Hello</blockquote>",
            TextUtil.stripNewlines(cleanHtml));
}
 
Example 8
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void basicWithImagesTest() {
    String h = "<div><p><img src='http://example.com/' alt=Image></p><p><img src='ftp://ftp.example.com'></p></div>";
    String cleanHtml = Jsoup.clean(h, Whitelist.basicWithImages());
    assertEquals("<p><img src=\"http://example.com/\" alt=\"Image\"></p><p><img></p>", TextUtil.stripNewlines(cleanHtml));
}
 
Example 9
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void simpleBehaviourTest2() {
    String h = "Hello <b>there</b>!";
    String cleanHtml = Jsoup.clean(h, Whitelist.simpleText());

    assertEquals("Hello <b>there</b>!", TextUtil.stripNewlines(cleanHtml));
}
 
Example 10
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void testDropsUnknownTags() {
    String h = "<p><custom foo=true>Test</custom></p>";
    String cleanHtml = Jsoup.clean(h, Whitelist.relaxed());
    assertEquals("<p>Test</p>", cleanHtml);
}
 
Example 11
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void testDropsUnknownTags() {
    String h = "<p><custom foo=true>Test</custom></p>";
    String cleanHtml = Jsoup.clean(h, Whitelist.relaxed());
    assertEquals("<p>Test</p>", cleanHtml);
}
 
Example 12
Source Project: springboot-admin   File: JsoupUtils.java    License: Apache License 2.0 4 votes vote down vote up
public static String clear(String html) {
	if (StringUtils.isNotBlank(html))
		return Jsoup.clean(html, whitelist);

	return html;
}
 
Example 13
Source Project: spring-boot   File: MyHtmlUtils.java    License: Apache License 2.0 4 votes vote down vote up
public static String getText(String html) {
    if (html == null)
        return null;
    return Jsoup.clean(html, Whitelist.none());
}
 
Example 14
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void resolvesRelativeLinks() {
    String html = "<a href='/foo'>Link</a><img src='/bar'>";
    String clean = Jsoup.clean(html, "http://example.com/", Whitelist.basicWithImages());
    assertEquals("<a href=\"http://example.com/foo\" rel=\"nofollow\">Link</a>\n<img src=\"http://example.com/bar\">", clean);
}
 
Example 15
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void handlesControlCharactersAfterTagName() {
    String html = "<a/\06>";
    String clean = Jsoup.clean(html, Whitelist.basic());
    assertEquals("<a rel=\"nofollow\"></a>", clean);
}
 
Example 16
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void simpleBehaviourTest() {
    String h = "<div><p class=foo><a href='http://evil.com'>Hello <b id=bar>there</b>!</a></div>";
    String cleanHtml = Jsoup.clean(h, Whitelist.simpleText());

    assertEquals("Hello <b>there</b>!", TextUtil.stripNewlines(cleanHtml));
}
 
Example 17
Source Project: spring-boot   File: MyHtmlUtils.java    License: Apache License 2.0 4 votes vote down vote up
public static String getBasicHtml(String html) {
    if (html == null)
        return null;
    return Jsoup.clean(html, Whitelist.basic());
}
 
Example 18
Source Project: symphonyx   File: ArticleProcessor.java    License: Apache License 2.0 4 votes vote down vote up
/**
 * Gets article preview content.
 *
 * <p>
 * Renders the response with a json object, for example,
 * <pre>
 * {
 *     "html": ""
 * }
 * </pre>
 * </p>
 *
 * @param request the specified http servlet request
 * @param response the specified http servlet response
 * @param context the specified http request context
 * @param articleId the specified article id
 * @throws Exception exception
 */
@RequestProcessing(value = "/article/{articleId}/preview", method = HTTPRequestMethod.GET)
@Before(adviceClass = StopwatchStartAdvice.class)
@After(adviceClass = StopwatchEndAdvice.class)
public void getArticlePreviewContent(final HttpServletRequest request, final HttpServletResponse response,
        final HTTPRequestContext context, final String articleId) throws Exception {
    context.renderJSON(true).renderJSONValue("html", "");

    final JSONObject article = articleQueryService.getArticle(articleId);
    if (null == article) {
        context.renderFalseResult();

        return;
    }

    final int length = Integer.valueOf("150");
    String content = article.optString(Article.ARTICLE_CONTENT);
    final String authorId = article.optString(Article.ARTICLE_AUTHOR_ID);
    final JSONObject author = userQueryService.getUser(authorId);

    if (null != author && UserExt.USER_STATUS_C_INVALID == author.optInt(UserExt.USER_STATUS)
            || Article.ARTICLE_STATUS_C_INVALID == article.optInt(Article.ARTICLE_STATUS)) {
        context.renderJSONValue("html", langPropsService.get("articleContentBlockLabel"));

        return;
    }

    final Set<String> userNames = userQueryService.getUserNames(content);
    final JSONObject currentUser = userQueryService.getCurrentUser(request);
    final String currentUserName = null == currentUser ? "" : currentUser.optString(User.USER_NAME);
    final String authorName = author.optString(User.USER_NAME);
    if (Article.ARTICLE_TYPE_C_DISCUSSION == article.optInt(Article.ARTICLE_TYPE)
            && !authorName.equals(currentUserName)) {
        boolean invited = false;
        for (final String userName : userNames) {
            if (userName.equals(currentUserName)) {
                invited = true;

                break;
            }
        }

        if (!invited) {
            String blockContent = langPropsService.get("articleDiscussionLabel");
            blockContent = blockContent.replace("{user}", "<a href='" + Latkes.getServePath()
                    + "/member/" + authorName + "'>" + authorName + "</a>");

            context.renderJSONValue("html", blockContent);

            return;
        }
    }

    content = Emotions.convert(content);
    content = Markdowns.toHTML(content);

    content = Jsoup.clean(content, Whitelist.none());
    if (content.length() >= length) {
        content = StringUtils.substring(content, 0, length)
                + " ....";
    }

    context.renderJSONValue("html", content);
}
 
Example 19
Source Project: astor   File: CleanerTest.java    License: GNU General Public License v2.0 4 votes vote down vote up
@Test public void testDropComments() {
    String h = "<p>Hello<!-- no --></p>";
    String cleanHtml = Jsoup.clean(h, Whitelist.relaxed());
    assertEquals("<p>Hello</p>", cleanHtml);
}
 
Example 20
public static String removeHideHtml(String html) {
    if (html == null)
        return null;
    return Jsoup.clean(html, (new Whitelist()).addTags("hide"));
}