Java Code Examples for org.jsoup.safety.Whitelist

The following examples show how to use org.jsoup.safety.Whitelist. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source Project: supplierShop   Author: guchengwuyue   File: XssHttpServletRequestWrapper.java    License: MIT License 6 votes vote down vote up
@Override
public String[] getParameterValues(String name)
{
    String[] values = super.getParameterValues(name);
    if (values != null)
    {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++)
        {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example #2
Source Project: ruoyiplus   Author: kongshanxuelin   File: XssHttpServletRequestWrapper.java    License: MIT License 6 votes vote down vote up
@Override
public String[] getParameterValues(String name)
{
    String[] values = super.getParameterValues(name);
    if (values != null)
    {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++)
        {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example #3
Source Project: docx4j-template   Author: hiwepy   File: XHTMLDocumentHandler.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
	String baseUri = "http://www.baidu.com";
	String html = "<a href=\"http://www.baidu.com/gaoji/preferences.html\"name=\"tj_setting\">搜索设置</a>";
	String doc = Jsoup.clean(html, baseUri, Whitelist.none());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.simpleText());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basic());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basicWithImages());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.relaxed());
	System.out.println(doc);

}
 
Example #4
Source Project: crawler-jsoup-maven   Author: bluetata   File: JsoupTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example #5
Source Project: lucene4ir   Author: lucene4ir   File: TRECAquaintDocumentIndexer.java    License: Apache License 2.0 6 votes vote down vote up
public TRECAquaintDocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
    super(indexPath, tokenFilterFile, positional);

    try {
        whiteList = Whitelist.relaxed();
        whiteList.addTags("docno");
        whiteList.addTags("doc");
        whiteList.addTags("headline");
        whiteList.addTags("text");
        whiteList.addTags("date_time");
        whiteList.addTags("slug");
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }

    doc = new Document();
    initFields();
    initAQUAINTDoc();
}
 
Example #6
Source Project: MtgDesktopCompanion   Author: nicho92   File: JEditorPaneBrowser.java    License: GNU General Public License v3.0 6 votes vote down vote up
@Override
public void loadURL(String url) {
	logger.debug("loading " + url);
	
	ThreadManager.getInstance().executeThread(()->{
			try {
				
				Whitelist w = Whitelist.basic();
				w.addTags("img");
				w.addAttributes("img", "src");
				
				String contf = Jsoup.clean(RequestBuilder.build().clean().url(url).method(METHOD.GET).setClient(client).toHtml().html(),w);
				browse.setText(contf);
			}
			catch(Exception e)
			{
				logger.error(e);
				browse.setText("Error " + e);
			}
			
	}, "loading " + url);
	
	
	
}
 
Example #7
Source Project: flow   Author: vaadin   File: RouteNotFoundError.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int setErrorParameter(BeforeEnterEvent event,
        ErrorParameter<NotFoundException> parameter) {
    String path = event.getLocation().getPath();
    String additionalInfo = "";
    if (parameter.hasCustomMessage()) {
        additionalInfo = "Reason: " + parameter.getCustomMessage();
    }
    path = Jsoup.clean(path, Whitelist.none());
    additionalInfo = Jsoup.clean(additionalInfo, Whitelist.none());

    boolean productionMode = event.getUI().getSession().getConfiguration()
            .isProductionMode();

    String template = getErrorHtml(productionMode);
    template = template.replace("{{path}}", path);
    template = template.replace("{{additionalInfo}}", additionalInfo);
    if (template.contains("{{routes}}")) {
        template = template.replace("{{routes}}", getRoutes(event));
    }

    getElement().appendChild(new Html(template).getElement());
    return HttpServletResponse.SC_NOT_FOUND;
}
 
Example #8
Source Project: pybbs   Author: tomoya92   File: TopicApiController.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
@PutMapping(value = "/{id}")
public Result edit(@PathVariable Integer id, @RequestBody Map<String, String> body) {
    User user = getApiUser();
    String title = body.get("title");
    String content = body.get("content");
    ApiAssert.notEmpty(title, "请输入标题");
    // 更新话题
    Topic topic = topicService.selectById(id);
    ApiAssert.isTrue(topic.getUserId().equals(user.getId()), "谁给你的权限修改别人的话题的?");
    topic.setTitle(Jsoup.clean(title, Whitelist.none().addTags("video")));
    topic.setContent(content);
    topic.setModifyTime(new Date());
    topicService.update(topic, null);
    topic.setContent(SensitiveWordUtil.replaceSensitiveWord(topic.getContent(), "*", SensitiveWordUtil.MinMatchType));
    return success(topic);
}
 
Example #9
Source Project: pybbs   Author: tomoya92   File: TopicService.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void update(Topic topic, String tags) {
    topicMapper.updateById(topic);
    // 处理标签
    if (!StringUtils.isEmpty(tags)) {
        // 旧标签每个topicCount都-1
        tagService.reduceTopicCount(topic.getId());
        if (!StringUtils.isEmpty(tags)) {
            // 保存标签
            List<Tag> tagList = tagService.insertTag(Jsoup.clean(tags, Whitelist.none()));
            // 处理标签与话题的关联
            topicTagService.insertTopicTag(topic.getId(), tagList);
        }
    }
    // 索引话题
    indexedService.indexTopic(String.valueOf(topic.getId()), topic.getTitle(), topic.getContent());
}
 
Example #10
Source Project: cia   Author: Hack23   File: WordCounterImpl.java    License: Apache License 2.0 6 votes vote down vote up
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {

		final String html = documentContentData.getContent();
		
		final SimpleCorpus simpleCorpus = new SimpleCorpus(SimpleSentenceSplitter.getInstance(), new SimpleTokenizer(),
				new SwedishStopWords(), EnglishPunctuations.getInstance());

		simpleCorpus.add(new Text( Jsoup.clean(html, Whitelist.basic())));

		final Iterator<String> terms = simpleCorpus.getTerms();

		final Map<String, Integer> result = new HashMap<>();
		while (terms.hasNext()) {
			final String term = terms.next();
			result.put(term, simpleCorpus.getTermFrequency(term));
		}
		return result;
	}
 
Example #11
Source Project: android-opensource-library-56   Author: android-opensource-library-56   File: SanitizeActivity.java    License: Apache License 2.0 6 votes vote down vote up
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_sanitize);

    final EditText inputText = (EditText) findViewById(R.id.input_text);
    inputText
            .setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
    final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
    findViewById(R.id.sanitize_button).setOnClickListener(
            new OnClickListener() {
                @Override
                public void onClick(View v) {
                    String sanitized = Jsoup.clean(inputText.getText()
                            .toString(), Whitelist.basic());
                    sanitizedText.setText(sanitized);
                }
            });
}
 
Example #12
Source Project: plumemo   Author: byteblogs168   File: PreviewTextUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 提取纯文本
 * @param html 代码
 * @return string
 */
public static String getText(String html) {
    if (html == null) {
        return null;
    }
    return Jsoup.clean(html, Whitelist.none()).trim();
}
 
Example #13
Source Project: plumemo   Author: byteblogs168   File: PreviewTextUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 以下标签可以通过 (b, em, i, strong, u. 纯文本)
 * @param html 代码
 * @return string
 */
public static String getSimpleHtml(String html) {
    if (html == null) {
        return null;
    }
    return Jsoup.clean(html, Whitelist.simpleText());
}
 
Example #14
Source Project: bbs   Author: diyhi   File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 文本过滤标签,只保留<br>标签
 * @param html
 * @return
 */
public String filterTag_br(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	
	Whitelist whitelist = Whitelist.none();//只保留文本,其他所有的html内容均被删除
	whitelist.addTags("br");
	
    return Jsoup.clean(html, whitelist); 
	//return Jsoup.clean(html,"", whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #15
Source Project: bbs   Author: diyhi   File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #16
Source Project: bbs   Author: diyhi   File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 过滤所有的标签,只返回文本
 * @param html
 * @return
 */
public String filterText(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	return Jsoup.clean(html, Whitelist.none()); //只保留文本,其他所有的html内容均被删除
	
	//doc.text()或Jsoup.clean提取出文本,注意text会将p等标签转为空格而不是换行符,而clean默认会转为换行符。
	
	
	//只保留文本,其他所有的html内容均被删除
	//return Jsoup.clean(html, "",Whitelist.none(),new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #17
Source Project: bbs   Author: diyhi   File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 过滤标签并删除<hide>标签所有内容,只返回文本
 * @param html
 * @return
 */
public String filterHideText(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	String newHtml = this.deleteHiddenTag(html);
	if(StringUtils.isBlank(newHtml)) return ""; 
	//只保留文本,其他所有的html内容均被删除
	return Jsoup.clean(newHtml, Whitelist.none()); 
	//return Jsoup.clean(newHtml,"", Whitelist.none(),new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example #18
Source Project: inception   Author: inception-project   File: Utilities.java    License: Apache License 2.0 5 votes vote down vote up
public static String cleanHighlight(String aHighlight) {
    Whitelist wl = new Whitelist();
    wl.addTags("em");
    Document dirty = Jsoup.parseBodyFragment(aHighlight, "");
    Cleaner cleaner = new Cleaner(wl);
    Document clean = cleaner.clean(dirty);
    clean.select("em").tagName("mark");

    return clean.body().html();
}
 
Example #19
Source Project: FlyCms   Author: sunkaifei   File: Stringcut.java    License: MIT License 5 votes vote down vote up
@SuppressWarnings("rawtypes")
public void execute(Environment env, Map params, TemplateModel[] loopVars,
		TemplateDirectiveBody body) throws TemplateException, IOException {
	DefaultObjectWrapperBuilder builder = new DefaultObjectWrapperBuilder(Configuration.VERSION_2_3_28);
	// 获取页面的参数
	String content = params.get("content").toString();
	Integer num = Integer.parseInt(params.get("num").toString());
	content = Jsoup.clean(content, Whitelist.none());
	content = StringUtils.abbreviate(content, num);
	env.setVariable("info_content", builder.build().wrap(content));
	body.render(env.getOut());
}
 
Example #20
Source Project: document-management-software   Author: logicaldoc   File: HTMLSanitizer.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
public static String sanitize(String unsafeHtmlContent) {
	Whitelist whiteList = Whitelist.relaxed().preserveRelativeLinks(true);
	whiteList = whiteList.addTags("head", "html", "style", "body", "fieldsMap", "area");
	whiteList = whiteList.addAttributes(":all", "name", "class", "style", "id", "src", "type", "cellpadding",
			"cellspacing", "alt", "title", "shape", "coords", "width", "height", "dir");
	whiteList = whiteList.addProtocols("img", "src", "http", "https", "data", "cid");

	return Jsoup.clean(unsafeHtmlContent, whiteList);
}
 
Example #21
Source Project: document-management-software   Author: logicaldoc   File: HTMLSanitizer.java    License: GNU Lesser General Public License v3.0 5 votes vote down vote up
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
Example #22
Source Project: RuoYi   Author: lerry903   File: XssHttpServletRequestWrapper.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String[] getParameterValues(String name) {
    String[] values = super.getParameterValues(name);
    if (values != null) {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++) {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example #23
Source Project: ripme   Author: RipMeApp   File: FuraffinityRipper.java    License: MIT License 5 votes vote down vote up
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.isEmpty()) {
            LOGGER.debug("No description at " + page);
            throw new IOException("No description found");
        }
        LOGGER.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        LOGGER.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
 
Example #24
Source Project: FlareBot   Author: FlareBot   File: GeneralUtils.java    License: MIT License 5 votes vote down vote up
/**
 * Resolves an {@link AudioItem} from a string.
 * This can be a url or search terms
 *
 * @param player The music player
 * @param input  The string to get the AudioItem from.
 * @return {@link AudioItem} from the string.
 * @throws IllegalArgumentException If the Item couldn't be found due to it not existing on Youtube.
 * @throws IllegalStateException    If the Video is unavailable for Flare, for example if it was published by VEVO.
 */
public static AudioItem resolveItem(Player player, String input) throws IllegalArgumentException, IllegalStateException {
    Optional<AudioItem> item = Optional.empty();
    boolean failed = false;
    int backoff = 2;
    Throwable cause = null;
    for (int i = 0; i <= 2; i++) {
        try {
            item = Optional.ofNullable(player.resolve(input));
            failed = false;
            break;
        } catch (FriendlyException | InterruptedException | ExecutionException e) {
            failed = true;
            cause = e;
            if (e.getMessage().contains("Vevo")) {
                throw new IllegalStateException(Jsoup.clean(cause.getMessage(), Whitelist.none()), cause);
            }
            FlareBot.LOGGER.error(Markers.NO_ANNOUNCE, "Cannot get video '" + input + "'");
            try {
                Thread.sleep(backoff);
            } catch (InterruptedException ignored) {
            }
            backoff ^= 2;
        }
    }
    if (failed) {
        throw new IllegalStateException(Jsoup.clean(cause.getMessage(), Whitelist.none()), cause);
    } else if (!item.isPresent()) {
        throw new IllegalArgumentException();
    }
    return item.get();
}
 
Example #25
Source Project: matrix-appservice-email   Author: kamax-matrix   File: GmailClientFormatter.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
 
Example #26
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    body.select("blockquote[cite]").remove();
    body.select("div.moz-cite-prefix").remove();

    while (body.children().size() > 0 && body.children().last().is("br")) {
        body.children().last().remove();
    }

    return Jsoup.clean(body.html(), Whitelist.basic());
}
 
Example #27
Source Project: scava   Author: crossminer   File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static Whitelist whitelist(String extraTagToRemoveWhitelist, String extraTagToAddWhiteList)
{	
	Whitelist newWhiteList = whitelist;
	if(extraTagToRemoveWhitelist!=null && !extraTagToRemoveWhitelist.isEmpty())
		newWhiteList.removeTags(extraTagToRemoveWhitelist);
	if(extraTagToAddWhiteList!=null && !extraTagToAddWhiteList.isEmpty())
		newWhiteList.addTags(extraTagToAddWhiteList);
	return newWhiteList;
}
 
Example #28
Source Project: scava   Author: crossminer   File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static Whitelist whitelist(String[] extraTagsToRemoveWhitelist, String[] extraTagsToAddWhiteList)
{		
	Whitelist newWhiteList = whitelist;
	if(extraTagsToRemoveWhitelist!=null && extraTagsToRemoveWhitelist.length>0)
		newWhiteList.removeTags(extraTagsToRemoveWhitelist);
	if(extraTagsToAddWhiteList!=null && extraTagsToAddWhiteList.length>0)
		newWhiteList.addTags(extraTagsToAddWhiteList);
	return whitelist.removeTags(extraTagsToRemoveWhitelist);
}
 
Example #29
Source Project: scava   Author: crossminer   File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static List<String> parse(String input, Whitelist wl)
{
	String cleanInput=Jsoup.clean(input, "", wl, outputSettings);
	//System.out.println(input);
	Document document = Jsoup.parse(cleanInput);
	
	document.outputSettings(outputSettings);
	
	List<String> textList = new ArrayList<String>();

	readNodes(document.body().childNodes(), textList);
	return textList;
}
 
Example #30
Source Project: scava   Author: crossminer   File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static List<Map.Entry<String,String>> parseWithTags(String input, Whitelist wl)
{
	String cleanInput=Jsoup.clean(input, "", wl, outputSettings);
	Document document = Jsoup.parse(cleanInput);
	
	document.outputSettings(outputSettings);
	
	List<Map.Entry<String,String>> textListMap = new ArrayList<Map.Entry<String,String>>();

	readNodesWithTags(document.body().childNodes(), textListMap,"body");
	return textListMap;
}