Java Code Examples for org.jsoup.safety.Whitelist

The following examples show how to use org.jsoup.safety.Whitelist. These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source Project: supplierShop   Source File: XssHttpServletRequestWrapper.java    License: MIT License 6 votes vote down vote up
@Override
public String[] getParameterValues(String name)
{
    String[] values = super.getParameterValues(name);
    if (values != null)
    {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++)
        {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example 2
Source Project: ruoyiplus   Source File: XssHttpServletRequestWrapper.java    License: MIT License 6 votes vote down vote up
@Override
public String[] getParameterValues(String name)
{
    String[] values = super.getParameterValues(name);
    if (values != null)
    {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++)
        {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example 3
Source Project: docx4j-template   Source File: XHTMLDocumentHandler.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
	String baseUri = "http://www.baidu.com";
	String html = "<a href=\"http://www.baidu.com/gaoji/preferences.html\"name=\"tj_setting\">搜索设置</a>";
	String doc = Jsoup.clean(html, baseUri, Whitelist.none());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.simpleText());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basic());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.basicWithImages());
	System.out.println(doc);
	System.out.println("*******");
	doc = Jsoup.clean(html, baseUri, Whitelist.relaxed());
	System.out.println(doc);

}
 
Example 4
Source Project: crawler-jsoup-maven   Source File: JsoupTest.java    License: Apache License 2.0 6 votes vote down vote up
public static void main(String[] args) {
    
    String d = "<span><div>test</div></span>";
    Document doc = Jsoup.parse(d);
    Element div = doc.select("div").first(); // <div></div>
    div.html("<p>lorem ipsum</p>"); // <div><p>lorem ipsum</p></div>
    div.prepend("<p>First</p>");
    div.append("<p>Last</p>");
    // now: <div><p>First</p><p>lorem ipsum</p><p>Last</p></div>
    div.appendElement(d);
    Element span = doc.select("span").first(); // <span>One</span>
    span.wrap("<li><a href='http://example.com/'></a></li>");
    // now: <li><a href="http://example.com"><span>One</span></a></li>
    System.out.println(doc.html());
    
    String s = Jsoup.clean(doc.html(), "", Whitelist.relaxed(), new OutputSettings().prettyPrint(false));
    
    System.out.println(s);
}
 
Example 5
Source Project: lucene4ir   Source File: TRECAquaintDocumentIndexer.java    License: Apache License 2.0 6 votes vote down vote up
public TRECAquaintDocumentIndexer(String indexPath, String tokenFilterFile, boolean positional){
    super(indexPath, tokenFilterFile, positional);

    try {
        whiteList = Whitelist.relaxed();
        whiteList.addTags("docno");
        whiteList.addTags("doc");
        whiteList.addTags("headline");
        whiteList.addTags("text");
        whiteList.addTags("date_time");
        whiteList.addTags("slug");
    } catch (Exception e){
        System.out.println(" caught a " + e.getClass() +
                "\n with message: " + e.getMessage());
    }

    doc = new Document();
    initFields();
    initAQUAINTDoc();
}
 
Example 6
@Override
public void loadURL(String url) {
	logger.debug("loading " + url);
	
	ThreadManager.getInstance().executeThread(()->{
			try {
				
				Whitelist w = Whitelist.basic();
				w.addTags("img");
				w.addAttributes("img", "src");
				
				String contf = Jsoup.clean(RequestBuilder.build().clean().url(url).method(METHOD.GET).setClient(client).toHtml().html(),w);
				browse.setText(contf);
			}
			catch(Exception e)
			{
				logger.error(e);
				browse.setText("Error " + e);
			}
			
	}, "loading " + url);
	
	
	
}
 
Example 7
Source Project: flow   Source File: RouteNotFoundError.java    License: Apache License 2.0 6 votes vote down vote up
@Override
public int setErrorParameter(BeforeEnterEvent event,
        ErrorParameter<NotFoundException> parameter) {
    String path = event.getLocation().getPath();
    String additionalInfo = "";
    if (parameter.hasCustomMessage()) {
        additionalInfo = "Reason: " + parameter.getCustomMessage();
    }
    path = Jsoup.clean(path, Whitelist.none());
    additionalInfo = Jsoup.clean(additionalInfo, Whitelist.none());

    boolean productionMode = event.getUI().getSession().getConfiguration()
            .isProductionMode();

    String template = getErrorHtml(productionMode);
    template = template.replace("{{path}}", path);
    template = template.replace("{{additionalInfo}}", additionalInfo);
    if (template.contains("{{routes}}")) {
        template = template.replace("{{routes}}", getRoutes(event));
    }

    getElement().appendChild(new Html(template).getElement());
    return HttpServletResponse.SC_NOT_FOUND;
}
 
Example 8
@PutMapping(value = "/{id}")
public Result edit(@PathVariable Integer id, @RequestBody Map<String, String> body) {
    User user = getApiUser();
    String title = body.get("title");
    String content = body.get("content");
    ApiAssert.notEmpty(title, "请输入标题");
    // 更新话题
    Topic topic = topicService.selectById(id);
    ApiAssert.isTrue(topic.getUserId().equals(user.getId()), "谁给你的权限修改别人的话题的?");
    topic.setTitle(Jsoup.clean(title, Whitelist.none().addTags("video")));
    topic.setContent(content);
    topic.setModifyTime(new Date());
    topicService.update(topic, null);
    topic.setContent(SensitiveWordUtil.replaceSensitiveWord(topic.getContent(), "*", SensitiveWordUtil.MinMatchType));
    return success(topic);
}
 
Example 9
Source Project: pybbs   Source File: TopicService.java    License: GNU Affero General Public License v3.0 6 votes vote down vote up
@Override
public void update(Topic topic, String tags) {
    topicMapper.updateById(topic);
    // 处理标签
    if (!StringUtils.isEmpty(tags)) {
        // 旧标签每个topicCount都-1
        tagService.reduceTopicCount(topic.getId());
        if (!StringUtils.isEmpty(tags)) {
            // 保存标签
            List<Tag> tagList = tagService.insertTag(Jsoup.clean(tags, Whitelist.none()));
            // 处理标签与话题的关联
            topicTagService.insertTopicTag(topic.getId(), tagList);
        }
    }
    // 索引话题
    indexedService.indexTopic(String.valueOf(topic.getId()), topic.getTitle(), topic.getContent());
}
 
Example 10
Source Project: cia   Source File: WordCounterImpl.java    License: Apache License 2.0 6 votes vote down vote up
public Map<String, Integer> calculateWordCount(final DocumentContentData documentContentData, final int maxResult) {

		final String html = documentContentData.getContent();
		
		final SimpleCorpus simpleCorpus = new SimpleCorpus(SimpleSentenceSplitter.getInstance(), new SimpleTokenizer(),
				new SwedishStopWords(), EnglishPunctuations.getInstance());

		simpleCorpus.add(new Text( Jsoup.clean(html, Whitelist.basic())));

		final Iterator<String> terms = simpleCorpus.getTerms();

		final Map<String, Integer> result = new HashMap<>();
		while (terms.hasNext()) {
			final String term = terms.next();
			result.put(term, simpleCorpus.getTermFrequency(term));
		}
		return result;
	}
 
Example 11
@Override
protected void onCreate(Bundle savedInstanceState) {
    super.onCreate(savedInstanceState);
    setContentView(R.layout.activity_sanitize);

    final EditText inputText = (EditText) findViewById(R.id.input_text);
    inputText
            .setText("<p><a href='http://example.com/' onclick='doAttack()'>Link</a></p>");
    final EditText sanitizedText = (EditText) findViewById(R.id.sanitized_text);
    findViewById(R.id.sanitize_button).setOnClickListener(
            new OnClickListener() {
                @Override
                public void onClick(View v) {
                    String sanitized = Jsoup.clean(inputText.getText()
                            .toString(), Whitelist.basic());
                    sanitizedText.setText(sanitized);
                }
            });
}
 
Example 12
Source Project: plumemo   Source File: PreviewTextUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 提取纯文本
 * @param html 代码
 * @return string
 */
public static String getText(String html) {
    if (html == null) {
        return null;
    }
    return Jsoup.clean(html, Whitelist.none()).trim();
}
 
Example 13
Source Project: plumemo   Source File: PreviewTextUtils.java    License: Apache License 2.0 5 votes vote down vote up
/**
 * 以下标签可以通过 (b, em, i, strong, u. 纯文本)
 * @param html 代码
 * @return string
 */
public static String getSimpleHtml(String html) {
    if (html == null) {
        return null;
    }
    return Jsoup.clean(html, Whitelist.simpleText());
}
 
Example 14
Source Project: bbs   Source File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 文本过滤标签,只保留<br>标签
 * @param html
 * @return
 */
public String filterTag_br(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	
	Whitelist whitelist = Whitelist.none();//只保留文本,其他所有的html内容均被删除
	whitelist.addTags("br");
	
    return Jsoup.clean(html, whitelist); 
	//return Jsoup.clean(html,"", whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example 15
Source Project: bbs   Source File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 富文本过滤标签
 * @param request
 * @param html
 * @return
 */
public String filterTag(HttpServletRequest request,String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	Whitelist whitelist = this.filterParameter(null);

    //return Jsoup.clean(html, Configuration.getUrl(request),whitelist); 
	return Jsoup.clean(html, Configuration.getUrl(request),whitelist,new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example 16
Source Project: bbs   Source File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 过滤所有的标签,只返回文本
 * @param html
 * @return
 */
public String filterText(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	return Jsoup.clean(html, Whitelist.none()); //只保留文本,其他所有的html内容均被删除
	
	//doc.text()或Jsoup.clean提取出文本,注意text会将p等标签转为空格而不是换行符,而clean默认会转为换行符。
	
	
	//只保留文本,其他所有的html内容均被删除
	//return Jsoup.clean(html, "",Whitelist.none(),new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example 17
Source Project: bbs   Source File: TextFilterManage.java    License: GNU Affero General Public License v3.0 5 votes vote down vote up
/**
 * 过滤标签并删除<hide>标签所有内容,只返回文本
 * @param html
 * @return
 */
public String filterHideText(String html) {  
	if(StringUtils.isBlank(html)) return ""; 
	String newHtml = this.deleteHiddenTag(html);
	if(StringUtils.isBlank(newHtml)) return ""; 
	//只保留文本,其他所有的html内容均被删除
	return Jsoup.clean(newHtml, Whitelist.none()); 
	//return Jsoup.clean(newHtml,"", Whitelist.none(),new OutputSettings().prettyPrint(false)); //prettyPrint(是否重新格式化)
}
 
Example 18
Source Project: inception   Source File: Utilities.java    License: Apache License 2.0 5 votes vote down vote up
public static String cleanHighlight(String aHighlight) {
    Whitelist wl = new Whitelist();
    wl.addTags("em");
    Document dirty = Jsoup.parseBodyFragment(aHighlight, "");
    Cleaner cleaner = new Cleaner(wl);
    Document clean = cleaner.clean(dirty);
    clean.select("em").tagName("mark");

    return clean.body().html();
}
 
Example 19
Source Project: FlyCms   Source File: Stringcut.java    License: MIT License 5 votes vote down vote up
@SuppressWarnings("rawtypes")
public void execute(Environment env, Map params, TemplateModel[] loopVars,
		TemplateDirectiveBody body) throws TemplateException, IOException {
	DefaultObjectWrapperBuilder builder = new DefaultObjectWrapperBuilder(Configuration.VERSION_2_3_28);
	// 获取页面的参数
	String content = params.get("content").toString();
	Integer num = Integer.parseInt(params.get("num").toString());
	content = Jsoup.clean(content, Whitelist.none());
	content = StringUtils.abbreviate(content, num);
	env.setVariable("info_content", builder.build().wrap(content));
	body.render(env.getOut());
}
 
Example 20
public static String sanitize(String unsafeHtmlContent) {
	Whitelist whiteList = Whitelist.relaxed().preserveRelativeLinks(true);
	whiteList = whiteList.addTags("head", "html", "style", "body", "fieldsMap", "area");
	whiteList = whiteList.addAttributes(":all", "name", "class", "style", "id", "src", "type", "cellpadding",
			"cellspacing", "alt", "title", "shape", "coords", "width", "height", "dir");
	whiteList = whiteList.addProtocols("img", "src", "http", "https", "data", "cid");

	return Jsoup.clean(unsafeHtmlContent, whiteList);
}
 
Example 21
public static String sanitizeSimpleText(String unsafeHtmlContent) {
	OutputSettings outputSettings = new OutputSettings().indentAmount(0).prettyPrint(false);
	Whitelist whiteList = Whitelist.simpleText().preserveRelativeLinks(false);
	String sanitized = Jsoup.clean(unsafeHtmlContent, "", whiteList, outputSettings);
	sanitized = StringEscapeUtils.unescapeHtml(sanitized);
	return sanitized;
}
 
Example 22
Source Project: RuoYi   Source File: XssHttpServletRequestWrapper.java    License: Apache License 2.0 5 votes vote down vote up
@Override
public String[] getParameterValues(String name) {
    String[] values = super.getParameterValues(name);
    if (values != null) {
        int length = values.length;
        String[] escapseValues = new String[length];
        for (int i = 0; i < length; i++) {
            // 防xss攻击和过滤前后空格
            escapseValues[i] = Jsoup.clean(values[i], Whitelist.relaxed()).trim();
        }
        return escapseValues;
    }
    return super.getParameterValues(name);
}
 
Example 23
Source Project: ripme   Source File: FuraffinityRipper.java    License: MIT License 5 votes vote down vote up
public String getDescription(String page) {
    try {
        // Fetch the image page
        Response resp = Http.url(page)
                .referrer(this.url)
                .response();
        cookies.putAll(resp.cookies());

        // Try to find the description
        Elements els = resp.parse().select("td[class=alt1][width=\"70%\"]");
        if (els.isEmpty()) {
            LOGGER.debug("No description at " + page);
            throw new IOException("No description found");
        }
        LOGGER.debug("Description found!");
        Document documentz = resp.parse();
        Element ele = documentz.select("td[class=alt1][width=\"70%\"]").get(0); // This is where the description is.
        // Would break completely if FurAffinity changed site layout.
        documentz.outputSettings(new Document.OutputSettings().prettyPrint(false));
        ele.select("br").append("\\n");
        ele.select("p").prepend("\\n\\n");
        LOGGER.debug("Returning description at " + page);
        String tempPage = Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false));
        return documentz.select("meta[property=og:title]").attr("content") + "\n" + tempPage; // Overridden saveText takes first line and makes it the file name.
    } catch (IOException ioe) {
        LOGGER.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'");
        return null;
    }
}
 
Example 24
Source Project: FlareBot   Source File: GeneralUtils.java    License: MIT License 5 votes vote down vote up
/**
 * Resolves an {@link AudioItem} from a string.
 * This can be a url or search terms
 *
 * @param player The music player
 * @param input  The string to get the AudioItem from.
 * @return {@link AudioItem} from the string.
 * @throws IllegalArgumentException If the Item couldn't be found due to it not existing on Youtube.
 * @throws IllegalStateException    If the Video is unavailable for Flare, for example if it was published by VEVO.
 */
public static AudioItem resolveItem(Player player, String input) throws IllegalArgumentException, IllegalStateException {
    Optional<AudioItem> item = Optional.empty();
    boolean failed = false;
    int backoff = 2;
    Throwable cause = null;
    for (int i = 0; i <= 2; i++) {
        try {
            item = Optional.ofNullable(player.resolve(input));
            failed = false;
            break;
        } catch (FriendlyException | InterruptedException | ExecutionException e) {
            failed = true;
            cause = e;
            if (e.getMessage().contains("Vevo")) {
                throw new IllegalStateException(Jsoup.clean(cause.getMessage(), Whitelist.none()), cause);
            }
            FlareBot.LOGGER.error(Markers.NO_ANNOUNCE, "Cannot get video '" + input + "'");
            try {
                Thread.sleep(backoff);
            } catch (InterruptedException ignored) {
            }
            backoff ^= 2;
        }
    }
    if (failed) {
        throw new IllegalStateException(Jsoup.clean(cause.getMessage(), Whitelist.none()), cause);
    } else if (!item.isPresent()) {
        throw new IllegalArgumentException();
    }
    return item.get();
}
 
Example 25
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    Element contentDiv = body.select("div[dir='ltr']").first();
    if (contentDiv == null) {
        log.warn("Found no valid content in e-mail from Gmail, returning empty");
        return "";
    }

    while (contentDiv.children().size() > 0 && contentDiv.children().last().is("br")) {
        contentDiv.children().last().remove();
    }

    return Jsoup.clean(contentDiv.html(), Whitelist.basic());
}
 
Example 26
@Override
protected String formatHtml(String content) {
    Element body = Jsoup.parse(content).body();
    body.select("blockquote[cite]").remove();
    body.select("div.moz-cite-prefix").remove();

    while (body.children().size() > 0 && body.children().last().is("br")) {
        body.children().last().remove();
    }

    return Jsoup.clean(body.html(), Whitelist.basic());
}
 
Example 27
Source Project: scava   Source File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static Whitelist whitelist(String extraTagToRemoveWhitelist, String extraTagToAddWhiteList)
{	
	Whitelist newWhiteList = whitelist;
	if(extraTagToRemoveWhitelist!=null && !extraTagToRemoveWhitelist.isEmpty())
		newWhiteList.removeTags(extraTagToRemoveWhitelist);
	if(extraTagToAddWhiteList!=null && !extraTagToAddWhiteList.isEmpty())
		newWhiteList.addTags(extraTagToAddWhiteList);
	return newWhiteList;
}
 
Example 28
Source Project: scava   Source File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static Whitelist whitelist(String[] extraTagsToRemoveWhitelist, String[] extraTagsToAddWhiteList)
{		
	Whitelist newWhiteList = whitelist;
	if(extraTagsToRemoveWhitelist!=null && extraTagsToRemoveWhitelist.length>0)
		newWhiteList.removeTags(extraTagsToRemoveWhitelist);
	if(extraTagsToAddWhiteList!=null && extraTagsToAddWhiteList.length>0)
		newWhiteList.addTags(extraTagsToAddWhiteList);
	return whitelist.removeTags(extraTagsToRemoveWhitelist);
}
 
Example 29
Source Project: scava   Source File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static List<String> parse(String input, Whitelist wl)
{
	String cleanInput=Jsoup.clean(input, "", wl, outputSettings);
	//System.out.println(input);
	Document document = Jsoup.parse(cleanInput);
	
	document.outputSettings(outputSettings);
	
	List<String> textList = new ArrayList<String>();

	readNodes(document.body().childNodes(), textList);
	return textList;
}
 
Example 30
Source Project: scava   Source File: HtmlParser.java    License: Eclipse Public License 2.0 5 votes vote down vote up
private static List<Map.Entry<String,String>> parseWithTags(String input, Whitelist wl)
{
	String cleanInput=Jsoup.clean(input, "", wl, outputSettings);
	Document document = Jsoup.parse(cleanInput);
	
	document.outputSettings(outputSettings);
	
	List<Map.Entry<String,String>> textListMap = new ArrayList<Map.Entry<String,String>>();

	readNodesWithTags(document.body().childNodes(), textListMap,"body");
	return textListMap;
}