Python bleach.clean() Examples
The following are 30
code examples of bleach.clean().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
bleach
, or try the search function
.
Example #1
Source File: fetch.py From fb-feed-gen with GNU General Public License v2.0 | 7 votes |
def strip_invalid_html(content): ''' strips invalid tags/attributes ''' allowed_tags = ['a', 'abbr', 'acronym', 'address', 'b', 'br', 'div', 'dl', 'dt', 'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'li', 'ol', 'p', 'pre', 'q', 's', 'small', 'strike', 'strong', 'span', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul'] allowed_attrs = { 'a': ['href', 'target', 'title'], 'img': ['src', 'alt', 'width', 'height'], } cleaned = bleach.clean(content, tags=allowed_tags, attributes=allowed_attrs, strip=True) # handle malformed html after running through bleach tree = BeautifulSoup(cleaned, "lxml") return str(tree.html)
Example #2
Source File: markdown.py From karrot-backend with GNU Affero General Public License v3.0 | 6 votes |
def render(text, truncate_words=None): html = markdown.markdown( text, extensions=[ EmojiExtension(emoji_index=twemoji), SuperFencesCodeExtension(), MagiclinkExtension(), DeleteSubExtension(subscript=False), Nl2BrExtension(), ] ) markdown_attrs['img'].append('class') markdown_tags.append('pre') clean_html = bleach.clean(html, markdown_tags, markdown_attrs) if truncate_words: clean_html = Truncator(clean_html).words(num=truncate_words, html=True) return clean_html
Example #3
Source File: helpers.py From daf-recipes with GNU General Public License v3.0 | 6 votes |
def render_markdown(data, auto_link=True, allow_html=False): ''' Returns the data as rendered markdown :param auto_link: Should ckan specific links be created e.g. `group:xxx` :type auto_link: bool :param allow_html: If True then html entities in the markdown data. This is dangerous if users have added malicious content. If False all html tags are removed. :type allow_html: bool ''' if not data: return '' if allow_html: data = markdown(data.strip()) else: data = RE_MD_HTML_TAGS.sub('', data.strip()) data = clean_html( markdown(data), strip=True, tags=MARKDOWN_TAGS, attributes=MARKDOWN_ATTRIBUTES) # tags can be added by tag:... or tag:"...." and a link will be made # from it if auto_link: data = html_auto_link(data) return literal(data)
Example #4
Source File: autocomplete.py From iguana with Creative Commons Attribution Share Alike 4.0 International | 6 votes |
def get_result_label_html(self, result): standard_html = '<strong>%s </strong>%s' % ( bleach.clean(result.get_ticket_identifier()), bleach.clean(result.title) ) # mark 'Done' issues if result.kanbancol.type == "Done": standard_html = '<del>%s</del><small style="padding-left: 1em">[%s]</small>' % ( standard_html, result.kanbancol.get_type_display() ) # mark archived issues if result.archived: standard_html = '<em class="text-muted">%s</em><small style="padding-left: 1em">[%s]</small>' % ( standard_html, _("Archived") ) return standard_html
Example #5
Source File: _markdown.py From webviz-config with MIT License | 6 votes |
def __init__(self, markdown_file: Path): super().__init__() self.markdown_file = markdown_file self.html = bleach.clean( markdown.markdown( get_path(self.markdown_file).read_text(), extensions=[ "tables", "sane_lists", _WebvizMarkdownExtension(base_path=markdown_file.parent), ], ), tags=Markdown.ALLOWED_TAGS, attributes=Markdown.ALLOWED_ATTRIBUTES, styles=Markdown.ALLOWED_STYLES, ) # Workaround for upstream issue https://github.com/plotly/dash-core-components/issues/746, # where we convert void html tags from <tag> to <tag/>. self.html = re.sub("<img (.*?[^/])>", r"<img \1/>", self.html) self.html = self.html.replace("<br>", "<br/>").replace("<hr>", "<hr/>")
Example #6
Source File: serializers.py From ecommerce with GNU Affero General Public License v3.0 | 6 votes |
def create(self, validated_data): enterprise_customer = self.context['view'].kwargs.get('enterprise_customer') email_type = validated_data['email_type'] email_greeting = bleach.clean(validated_data.get('email_greeting', '')) email_closing = bleach.clean(validated_data.get('email_closing', '')) create_data = dict( enterprise_customer=enterprise_customer, email_type=email_type, email_greeting=email_greeting, email_closing=email_closing, ) if 'name' in validated_data: create_data['name'] = validated_data.get('name') instance = OfferAssignmentEmailTemplates.objects.create(**create_data) # deactivate old templates for enterprise for this specific email type OfferAssignmentEmailTemplates.objects.filter( enterprise_customer=enterprise_customer, email_type=email_type, ).exclude(pk=instance.pk).update(active=False) return instance
Example #7
Source File: utils.py From OpenBazaar-Server with MIT License | 6 votes |
def sanitize_html(value): """ Recursively sanitize all strings within a data structure. """ if isinstance(value, dict): value = {k:sanitize_html(v) for k, v in value.iteritems()} elif isinstance(value, list): value = [sanitize_html(v) for v in value] elif isinstance(value, basestring): value = bleach.clean(value, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, styles=ALLOWED_STYLES) return value
Example #8
Source File: text_plugin.py From lambda-packs with MIT License | 6 votes |
def markdown_and_sanitize(markdown_string): """Takes a markdown string and converts it into sanitized html. It uses the table extension; while that's not a part of standard markdown, it is sure to be useful for TensorBoard users. The sanitizer uses the allowed_tags and attributes specified above. Mostly, we ensure that our standard use cases like tables and links are supported. Args: markdown_string: Markdown string to sanitize Returns: a string containing sanitized html for input markdown """ # Convert to utf-8 whenever we have a binary input. if isinstance(markdown_string, six.binary_type): markdown_string = markdown_string.decode('utf-8') string_html = markdown.markdown( markdown_string, extensions=['markdown.extensions.tables']) string_sanitized = bleach.clean( string_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES) return string_sanitized
Example #9
Source File: utils.py From ecommerce with GNU Affero General Public License v3.0 | 6 votes |
def format_email(template, placeholder_dict, greeting, closing): """ Arguments: template (String): Email template body placeholder_dict (SafeDict): Safe dictionary of placeholders and their values greeting (String): Email greeting (prefix) closing (String): Email closing (suffix) Apply placeholders to the email template. Safely handle placeholders in the template without matching tokens (just emit the placeholders). Reference: https://stackoverflow.com/questions/17215400/python-format-string-unused-named-arguments """ if greeting is None: greeting = '' if closing is None: closing = '' greeting = bleach.clean(greeting) closing = bleach.clean(closing) email_body = string.Formatter().vformat(template, SafeTuple(), placeholder_dict) return greeting + email_body + closing
Example #10
Source File: utils.py From ideascube with GNU Affero General Public License v3.0 | 6 votes |
def clean_html(html, with_media=False): authorized_tags = [ 'p', 'a', 'ul', 'ol', 'li', 'blockquote', 'h1', 'h2', 'h3', 'h4', 'h5', 'strong', 'em', 'br', ] authorized_attributes = { 'a': ['href', 'title'], 'img': ['src', 'width', 'height', 'alt'], 'iframe': ['src', 'width', 'height', 'allowfullscreen'], 'video': [ 'controls', 'width', 'height', 'allowfullscreen', 'preload', 'poster'], 'audio': ['controls', 'preload'], 'source': ['src'] } if with_media: authorized_tags += ['img', 'iframe', 'video', 'audio', 'source'] return bleach.clean( html, authorized_tags, authorized_attributes, strip=True)
Example #11
Source File: test_bleach_field.py From django-localized-fields with MIT License | 6 votes |
def _validate(non_bleached_value, bleached_value): """Validates whether the specified non-bleached value ended up being correctly bleached. Arguments: non_bleached_value: The value before bleaching. bleached_value: The value after bleaching. """ for lang_code, _ in settings.LANGUAGES: if not non_bleached_value.get(lang_code): assert not bleached_value.get(lang_code) continue expected_value = bleach.clean( non_bleached_value.get(lang_code), get_bleach_default_options() ) assert bleached_value.get(lang_code) == expected_value
Example #12
Source File: blocks.py From hypha with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_searchable_content(self, value, data): return bleach.clean(data or '', tags=[], strip=True)
Example #13
Source File: test_enterprise.py From ecommerce with GNU Affero General Public License v3.0 | 5 votes |
def test_post_with_unsafe_data(self, email_type): """ Verify that view correctly performs HTTP POST on unsafe data. """ template_name = 'E Learning' email_greeting = '<script>document.getElementById("greeting").innerHTML = "GREETING!";</script>' email_closing = '<script>document.getElementById("closing").innerHTML = "CLOSING!";</script>' template = self.create_template_data(email_type, template_name, email_greeting, email_closing) assert template['email_greeting'] == bleach.clean(email_greeting) assert template['email_closing'] == bleach.clean(email_closing)
Example #14
Source File: utils.py From edx-enterprise with GNU Affero General Public License v3.0 | 5 votes |
def strip_html_tags(text, allowed_tags=None): """ Strip all tags from a string except those tags provided in `allowed_tags` parameter. Args: text (str): string to strip html tags from allowed_tags (list): allowed list of html tags Returns: a string without html tags """ if text is None: return None if allowed_tags is None: allowed_tags = ALLOWED_TAGS return bleach.clean(text, tags=allowed_tags, attributes=['id', 'class', 'style', 'href', 'title'], strip=True)
Example #15
Source File: __init__.py From diffengine with MIT License | 5 votes |
def _fingerprint(s): # make sure the string has been normalized, bleach everything, remove all # whitespace and punctuation to create a pseudo fingerprint for the text # for use during comparison s = _normal(s) s = bleach.clean(s, tags=[], strip=True) s = re.sub(r"\s+", "", s, flags=re.MULTILINE) s = s.translate(punctuation) return s
Example #16
Source File: blocks.py From hypha with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_searchable_content(self, value, data): return bleach.clean(data or '', tags=[], strip=True)
Example #17
Source File: utils.py From PonyConf with Apache License 2.0 | 5 votes |
def markdown_to_html(md): html = markdown(md) allowed_tags = bleach.ALLOWED_TAGS + ['p', 'pre', 'span' ] + ['h%d' % i for i in range(1, 7) ] html = bleach.clean(html, tags=allowed_tags) return mark_safe(html)
Example #18
Source File: unfurl_message.py From gitlab-unfurly with MIT License | 5 votes |
def strip_html_tags(value): return bleach.clean(value, tags=[], strip=True)
Example #19
Source File: blocks.py From hypha with BSD 3-Clause "New" or "Revised" License | 5 votes |
def get_searchable_content(self, value, data): # CharField acts as a fallback. Force data to string data = str(data) return bleach.clean(data or '', tags=[], strip=True)
Example #20
Source File: jinja.py From maple-bbs with GNU General Public License v3.0 | 5 votes |
def markdown(text, clean=True): renderer = HtmlRenderer() md = Markdown(renderer, extensions=('fenced-code', )) if clean: return Markup(safe_clean(md(text))) return Markup(md(text))
Example #21
Source File: jinja.py From maple-bbs with GNU General Public License v3.0 | 5 votes |
def safe_clean(text): tags = ['b', 'i', 'font', 'br', 'blockquote', 'div', 'h2', 'a', 'p'] attrs = {'*': ['style', 'id', 'class'], 'font': ['color'], 'a': ['href']} styles = ['color'] return Markup(clean(text, tags=tags, attributes=attrs, styles=styles))
Example #22
Source File: Post.py From maniwani with MIT License | 5 votes |
def render_post_collection(posts, context, extensions): cache_connection = cache.Cache() for post in posts: cache_key = post_render_cache_key(context, post["id"]) cached_render = cache_connection.get(cache_key) if cached_render: post["body"] = cached_render continue rendered_markdown = clean(markdown(post["body"], extensions=extensions), ALLOWED_TAGS, ALLOWED_ATTRIBUTES) cache_connection.set(cache_key, rendered_markdown) post["body"] = rendered_markdown
Example #23
Source File: transforms.py From adhocracy4 with GNU Affero General Public License v3.0 | 5 votes |
def clean_html_field(text, setting='default'): allowed_tags = settings.BLEACH_LIST[setting]['tags'] allowed_attrs = settings.BLEACH_LIST[setting]['attributes'] allowed_styles = settings.BLEACH_LIST[setting].get('styles', []) return bleach.clean(text, tags=allowed_tags, attributes=allowed_attrs, styles=allowed_styles, strip=True)
Example #24
Source File: transforms.py From adhocracy4 with GNU Affero General Public License v3.0 | 5 votes |
def clean_html_all(text): return bleach.clean(text, tags=[], attributes={}, styles=[], strip=True)
Example #25
Source File: app.py From activitypub with Mozilla Public License 2.0 | 5 votes |
def clean(self, html): return clean_html(html)
Example #26
Source File: app.py From activitypub with Mozilla Public License 2.0 | 5 votes |
def clean_html(html): return bleach.clean(html, tags=ALLOWED_TAGS)
Example #27
Source File: main.py From python-docs-samples with Apache License 2.0 | 5 votes |
def index(): data = request.get_data(as_text=True) # Parses the markdown and outputs the formatted HTML html = markdown.markdown(data) # Keep the paragraph tags bleach.sanitizer.ALLOWED_TAGS.append('p') # Sanitize and return clean = bleach.clean(html, strip=True) return clean
Example #28
Source File: markdown.py From appstore with GNU Affero General Public License v3.0 | 5 votes |
def markdown(sentence): text = default_markdown(sentence) safe_text = clean(text, attributes=settings.MARKDOWN_ALLOWED_ATTRIBUTES, tags=settings.MARKDOWN_ALLOWED_TAGS) return mark_safe(safe_text) # nosec
Example #29
Source File: feeds.py From appstore with GNU Affero General Public License v3.0 | 5 votes |
def item_description(self, item): try: if item.changelog: changelog = '\n\n# %s\n\n%s' % (_('Changes'), item.changelog) else: changelog = '' content = '%s%s' % (item.app.description, changelog) except TranslationDoesNotExist: content = item.app.description content += '\n\n [%s](%s)' % (_('Download'), item.download) return clean(markdown(content), attributes=settings.MARKDOWN_ALLOWED_ATTRIBUTES, tags=settings.MARKDOWN_ALLOWED_TAGS)
Example #30
Source File: models.py From cookiecutter-django with MIT License | 5 votes |
def create_hub_user(email, password, profile_type=Choices.Profiles.USER, username=None, first_name=None, last_name=None, **kwargs): """ Wrapper where a user is created followed by the models :param username: str username :param email: str email :param password: str password :param profile_type: Type of user to be created. Defaults to User :param first_name: First name of a user :param last_name: Last name of a user :param incomplete_signup: If a signup of a HubUser is complete or not :return: HubUser """ with transaction.atomic(): try: username = username or HubUser.generate_username(first_name, last_name, email) user = get_user_model().objects.create_user( username=bleach.clean(username), email=bleach.clean(email), password=password, is_staff=False, is_superuser=False, first_name=bleach.clean(first_name), last_name=bleach.clean(last_name), ) hub_user = HubUser.objects.create( user=user, profile_type=profile_type, **kwargs ) except IntegrityError as e: logger.exception(f"Error creating the HubUser: {e}") return return hub_user