org.jsoup.safety.Whitelist Java Examples

The following examples show how to use org.jsoup.safety.Whitelist. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: XssHttpServletRequestWrapper.java    From supplierShop with MIT License 6 votes vote down vote up
@Override
public String[] getParameterValues(String name)
{
    String[] values = super.getParameterValues(name);
    if (values != null)
    {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++)
        {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example #2
Source File: XssHttpServletRequestWrapper.java    From ruoyiplus with MIT License 6 votes vote down vote up
@Override
public String[] getParameterValues(String name)
{
    String[] values = super.getParameterValues(name);
    if (values != null)
    {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++)
        {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example #3
Source File: XHTMLDocumentHandler.java    From docx4j-template with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
	String baseUri = "http://www.baidu.com";
	String html = "<a href=\"http://www.baidu.com/gaoji/preferences.html\"name=\"tj_setting\">搜索设置</a>";
	String doc = Jsoup.clean(html, baseUri, Whitelist.none());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.simpleText());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basic());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basicWithImages());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.relaxed());
	System.out.println(doc);

}
 
Example #4
Source File: JsoupTest.java    From crawler-jsoup-maven with Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example #5
Source File: TRECAquaintDocumentIndexer.java    From lucene4ir with Apache License 2.0 6 votes vote down vote up
public TRECAquaintDocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
    super(indexPath, tokenFilterFile, positional);

    try {
        whiteList = Whitelist.relaxed();
        whiteList.addTags("docno");
        whiteList.addTags("doc");
        whiteList.addTags("headline");
        whiteList.addTags("text");
        whiteList.addTags("date_time");
        whiteList.addTags("slug");
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }

    doc = new Document();
    initFields();
    initAQUAINTDoc();
}
 
Example #6
Source File: JEditorPaneBrowser.java    From MtgDesktopCompanion with GNU General Public License v3.0 6 votes vote down vote up
@Override
public void loadURL(String url) {
	logger.debug("loading " + url);
	
	ThreadManager.getInstance().executeThread(()->{
			try {
				
				Whitelist w = Whitelist.basic();
				w.addTags("img");
				w.addAttributes("img", "src");
				
				String contf = Jsoup.clean(RequestBuilder.build().clean().url(url).method(METHOD.GET).setClient(client).toHtml().html(),w);
				browse.setText(contf);
			}
			catch(Exception e)
			{
				logger.error(e);
				browse.setText("Error " + e);
			}
			
	}, "loading " + url);
	
	
	
}
 
Example #7
Source File: RouteNotFoundError.java    From flow with Apache License 2.0 6 votes vote down vote up
@Override
public int setErrorParameter(BeforeEnterEvent event,
        ErrorParameter<NotFoundException> parameter) {
    String path = event.getLocation().getPath();
    String additionalInfo = "";
    if (parameter.hasCustomMessage()) {
        additionalInfo = "Reason: " + parameter.getCustomMessage();
    }
    path = Jsoup.clean(path, Whitelist.none());
    additionalInfo = Jsoup.clean(additionalInfo, Whitelist.none());

    boolean productionMode = event.getUI().getSession().getConfiguration()
            .isProductionMode();

    String template = getErrorHtml(productionMode);
    template = template.replace("{{path}}", path);
    template = template.replace("{{additionalInfo}}", additionalInfo);
    if (template.contains("{{routes}}")) {
        template = template.replace("{{routes}}", getRoutes(event));
    }

    getElement().appendChild(new Html(template).getElement());
    return HttpServletResponse.SC_NOT_FOUND;
}
 
Example #8
Source File: TopicApiController.java    From pybbs with GNU Affero General Public License v3.0 6 votes vote down vote up
@PutMapping(value = "/{id}")
public Result edit(@PathVariable Integer id, @RequestBody Map<String, String> body) {
    User user = getApiUser();
    String title = body.get("title");
    String content = body.get("content");
    ApiAssert.notEmpty(title, "请输入标题");
    // 更新话题
    Topic topic = topicService.selectById(id);
    ApiAssert.isTrue(topic.getUserId().equals(user.getId()), "谁给你的权限修改别人的话题的?");
    topic.setTitle(Jsoup.clean(title, Whitelist.none().addTags("video")));
    topic.setContent(content);
    topic.setModifyTime(new Date());
    topicService.update(topic, null);
    topic.setContent(SensitiveWordUtil.replaceSensitiveWord(topic.getContent(), "*", SensitiveWordUtil.MinMatchType));
    return success(topic);
}
 
Example #9
Source File: TopicService.java    From pybbs with GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void update(Topic topic, String tags) {
    topicMapper.updateById(topic);
    // 处理标签
    if (!StringUtils.isEmpty(tags)) {
        // 旧标签每个topicCount都-1
        tagService.reduceTopicCount(topic.getId());
        if (!StringUtils.isEmpty(tags)) {
            // 保存标签
            List<Tag> tagList = tagService.insertTag(Jsoup.clean(tags, Whitelist.none()));
            // 处理标签与话题的关联
            topicTagService.insertTopicTag(topic.getId(), tagList);
        }
    }
    // 索引话题
    indexedService.indexTopic(String.valueOf(topic.getId()), topic.getTitle(), topic.getContent());
}
 
Example #10
Source File: WordCounterImpl.java    From cia with Apache License 2.0 6 votes vote down vote up
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {

		final String html = documentContentData.getContent();
		
		final SimpleCorpus simpleCorpus = new SimpleCorpus(SimpleSentenceSplitter.getInstance(), new SimpleTokenizer(),
				new SwedishStopWords(), EnglishPunctuations.getInstance());

		simpleCorpus.add(new Text( Jsoup.clean(html, Whitelist.basic())));

		final Iterator<String> terms = simpleCorpus.getTerms();

		final Map<String, Integer> result = new HashMap<>();
		while (terms.hasNext()) {
			final String term = terms.next();
			result.put(term, simpleCorpus.getTermFrequency(term));
		}
		return result;
	}
 
Example #11
Source File: SanitizeActivity.java    From android-opensource-library-56 with Apache License 2.0 6 votes vote down vote up
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_sanitize);

    final EditText inputText = (EditText) findViewById(R.id.input_text);
    inputText
            .setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
    final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
    findViewById(R.id.sanitize_button).setOnClickListener(
            new OnClickListener() {
                @Override
                public void onClick(View v) {
                    String sanitized = Jsoup.clean(inputText.getText()
                            .toString(), Whitelist.basic());
                    sanitizedText.setText(sanitized);
                }
            });
}
 
Example #12
Source File: PreviewTextUtils.java    From plumemo with Apache License 2.0 5 votes vote down vote up
/**
 * 提取纯文本
 * @param html 代码
 * @return string
 */
public static String getText(String html) {
    if (html == null) {
        return null;
    }
    return Jsoup.clean(html, Whitelist.none()).trim();
}
 
Example #13
Source File: PreviewTextUtils.java    From plumemo with Apache License 2.0 5 votes vote down vote up
/**
 * 以下标签可以通过 (b, em, i, strong, u. 纯文本)
 * @param html 代码
 * @return string
 */
public static String getSimpleHtml(String html) {
    if (html == null) {
        return null;
    }
    return Jsoup.clean(html, Whitelist.simpleText());
}
 
Example #14
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 文本过滤标签,只保留<br>标签
 * @param html
 * @return
 */
public String filterTag_br(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	
	Whitelist whitelist = Whitelist.none();//只保留文本,其他所有的html内容均被删除
	whitelist.addTags("br");
	
    return Jsoup.clean(html, whitelist); 
	//return Jsoup.clean(html,"", whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #15
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #16
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 过滤所有的标签,只返回文本
 * @param html
 * @return
 */
public String filterText(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	return Jsoup.clean(html, Whitelist.none()); //只保留文本,其他所有的html内容均被删除
	
	//doc.text()或Jsoup.clean提取出文本,注意text会将p等标签转为空格而不是换行符,而clean默认会转为换行符。
	
	
	//只保留文本,其他所有的html内容均被删除
	//return Jsoup.clean(html, "",Whitelist.none(),new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #17
Source File: TextFilterManage.java    From bbs with GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 过滤标签并删除<hide>标签所有内容,只返回文本
 * @param html
 * @return
 */
public String filterHideText(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	String newHtml = this.deleteHiddenTag(html);
	if(StringUtils.isBlank(newHtml)) return ""; 
	//只保留文本,其他所有的html内容均被删除
	return Jsoup.clean(newHtml, Whitelist.none()); 
	//return Jsoup.clean(newHtml,"", Whitelist.none(),new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #18
Source File: Utilities.java    From inception with Apache License 2.0 5 votes vote down vote up
public static String cleanHighlight(String aHighlight) {
    Whitelist wl = new Whitelist();
    wl.addTags("em");
    Document dirty = Jsoup.parseBodyFragment(aHighlight, "");
    Cleaner cleaner = new Cleaner(wl);
    Document clean = cleaner.clean(dirty);
    clean.select("em").tagName("mark");

    return clean.body().html();
}
 
Example #19
Source File: Stringcut.java    From FlyCms with MIT License 5 votes vote down vote up
@SuppressWarnings("rawtypes")
public void execute(Environment env, Map params, TemplateModel[] loopVars,
		TemplateDirectiveBody body) throws TemplateException, IOException {
	DefaultObjectWrapperBuilder builder = new DefaultObjectWrapperBuilder(Configuration.VERSION_2_3_28);
	// 获取页面的参数
	String content = params.get("content").toString();
	Integer num = Integer.parseInt(params.get("num").toString());
	content = Jsoup.clean(content, Whitelist.none());
	content = StringUtils.abbreviate(content, num);
	env.setVariable("info_content", builder.build().wrap(content));
	body.render(env.getOut());
}
 
Example #20
Source File: HTMLSanitizer.java    From document-management-software with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static String sanitize(String unsafeHtmlContent) {
	Whitelist whiteList = Whitelist.relaxed().preserveRelativeLinks(true);
	whiteList = whiteList.addTags("head", "html", "style", "body", "fieldsMap", "area");
	whiteList = whiteList.addAttributes(":all", "name", "class", "style", "id", "src", "type", "cellpadding",
			"cellspacing", "alt", "title", "shape", "coords", "width", "height", "dir");
	whiteList = whiteList.addProtocols("img", "src", "http", "https", "data", "cid");

	return Jsoup.clean(unsafeHtmlContent, whiteList);
}
 
Example #21
Source File: HTMLSanitizer.java    From document-management-software with GNU Lesser General Public License v3.0 5 votes vote down vote up
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
Example #22
Source File: XssHttpServletRequestWrapper.java    From RuoYi with Apache License 2.0 5 votes vote down vote up
@Override
public String[] getParameterValues(String name) {
    String[] values = super.getParameterValues(name);
    if (values != null) {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++) {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example #23
Source File: FuraffinityRipper.java    From ripme with MIT License 5 votes vote down vote up
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.isEmpty()) {
            LOGGER.debug("No description at " + page);
            throw new IOException("No description found");
        }
        LOGGER.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        LOGGER.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
 
Example #24
Source File: GeneralUtils.java    From FlareBot with MIT License 5 votes vote down vote up
/**
 * Resolves an {@link AudioItem} from a string.
 * This can be a url or search terms
 *
 * @param player The music player
 * @param input  The string to get the AudioItem from.
 * @return {@link AudioItem} from the string.
 * @throws IllegalArgumentException If the Item couldn't be found due to it not existing on Youtube.
 * @throws IllegalStateException    If the Video is unavailable for Flare, for example if it was published by VEVO.
 */
public static AudioItem resolveItem(Player player, String input) throws IllegalArgumentException, IllegalStateException {
    Optional<AudioItem> item = Optional.empty();
    boolean failed = false;
    int backoff = 2;
    Throwable cause = null;
    for (int i = 0; i <= 2; i++) {
        try {
            item = Optional.ofNullable(player.resolve(input));
            failed = false;
            break;
        } catch (FriendlyException | InterruptedException | ExecutionException e) {
            failed = true;
            cause = e;
            if (e.getMessage().contains("Vevo")) {
                throw new IllegalStateException(Jsoup.clean(cause.getMessage(), Whitelist.none()), cause);
            }
            FlareBot.LOGGER.error(Markers.NO_ANNOUNCE, "Cannot get video '" + input + "'");
            try {
                Thread.sleep(backoff);
            } catch (InterruptedException ignored) {
            }
            backoff ^= 2;
        }
    }
    if (failed) {
        throw new IllegalStateException(Jsoup.clean(cause.getMessage(), Whitelist.none()), cause);
    } else if (!item.isPresent()) {
        throw new IllegalArgumentException();
    }
    return item.get();
}
 
Example #25
Source File: GmailClientFormatter.java    From matrix-appservice-email with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
 
Example #26
Source File: ThunderbirdClientFormatter.java    From matrix-appservice-email with GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    body.select("blockquote[cite]").remove();
    body.select("div.moz-cite-prefix").remove();

    while (body.children().size() > 0 && body.children().last().is("br")) {
        body.children().last().remove();
    }

    return Jsoup.clean(body.html(), Whitelist.basic());
}
 
Example #27
Source File: HtmlParser.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
private static Whitelist whitelist(String extraTagToRemoveWhitelist, String extraTagToAddWhiteList)
{	
	Whitelist newWhiteList = whitelist;
	if(extraTagToRemoveWhitelist!=null && !extraTagToRemoveWhitelist.isEmpty())
		newWhiteList.removeTags(extraTagToRemoveWhitelist);
	if(extraTagToAddWhiteList!=null && !extraTagToAddWhiteList.isEmpty())
		newWhiteList.addTags(extraTagToAddWhiteList);
	return newWhiteList;
}
 
Example #28
Source File: HtmlParser.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
private static Whitelist whitelist(String[] extraTagsToRemoveWhitelist, String[] extraTagsToAddWhiteList)
{		
	Whitelist newWhiteList = whitelist;
	if(extraTagsToRemoveWhitelist!=null && extraTagsToRemoveWhitelist.length>0)
		newWhiteList.removeTags(extraTagsToRemoveWhitelist);
	if(extraTagsToAddWhiteList!=null && extraTagsToAddWhiteList.length>0)
		newWhiteList.addTags(extraTagsToAddWhiteList);
	return whitelist.removeTags(extraTagsToRemoveWhitelist);
}
 
Example #29
Source File: HtmlParser.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
private static List<String> parse(String input, Whitelist wl)
{
	String cleanInput=Jsoup.clean(input, "", wl, outputSettings);
	//System.out.println(input);
	Document document = Jsoup.parse(cleanInput);
	
	document.outputSettings(outputSettings);
	
	List<String> textList = new ArrayList<String>();

	readNodes(document.body().childNodes(), textList);
	return textList;
}
 
Example #30
Source File: HtmlParser.java    From scava with Eclipse Public License 2.0 5 votes vote down vote up
private static List<Map.Entry<String,String>> parseWithTags(String input, Whitelist wl)
{
	String cleanInput=Jsoup.clean(input, "", wl, outputSettings);
	Document document = Jsoup.parse(cleanInput);
	
	document.outputSettings(outputSettings);
	
	List<Map.Entry<String,String>> textListMap = new ArrayList<Map.Entry<String,String>>();

	readNodesWithTags(document.body().childNodes(), textListMap,"body");
	return textListMap;
}