Python bleach.clean() Examples

The following are 30 code examples of bleach.clean(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module bleach , or try the search function .
Example #1
Source File: fetch.py    From fb-feed-gen with GNU General Public License v2.0 7 votes vote down vote up
def strip_invalid_html(content):
    ''' strips invalid tags/attributes '''

    allowed_tags = ['a', 'abbr', 'acronym', 'address', 'b', 'br', 'div', 'dl', 'dt',
                    'em', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img',
                    'li', 'ol', 'p', 'pre', 'q', 's', 'small', 'strike', 'strong',
                    'span', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th',
                    'thead', 'tr', 'tt', 'u', 'ul']

    allowed_attrs = {
        'a': ['href', 'target', 'title'],
        'img': ['src', 'alt', 'width', 'height'],
    }

    cleaned = bleach.clean(content,
                           tags=allowed_tags,
                           attributes=allowed_attrs,
                           strip=True)

    # handle malformed html after running through bleach
    tree = BeautifulSoup(cleaned, "lxml")
    return str(tree.html) 
Example #2
Source File: markdown.py    From karrot-backend with GNU Affero General Public License v3.0 6 votes vote down vote up
def render(text, truncate_words=None):
    html = markdown.markdown(
        text,
        extensions=[
            EmojiExtension(emoji_index=twemoji),
            SuperFencesCodeExtension(),
            MagiclinkExtension(),
            DeleteSubExtension(subscript=False),
            Nl2BrExtension(),
        ]
    )
    markdown_attrs['img'].append('class')
    markdown_tags.append('pre')
    clean_html = bleach.clean(html, markdown_tags, markdown_attrs)

    if truncate_words:
        clean_html = Truncator(clean_html).words(num=truncate_words, html=True)

    return clean_html 
Example #3
Source File: helpers.py    From daf-recipes with GNU General Public License v3.0 6 votes vote down vote up
def render_markdown(data, auto_link=True, allow_html=False):
    ''' Returns the data as rendered markdown

    :param auto_link: Should ckan specific links be created e.g. `group:xxx`
    :type auto_link: bool
    :param allow_html: If True then html entities in the markdown data.
        This is dangerous if users have added malicious content.
        If False all html tags are removed.
    :type allow_html: bool
    '''
    if not data:
        return ''
    if allow_html:
        data = markdown(data.strip())
    else:
        data = RE_MD_HTML_TAGS.sub('', data.strip())
        data = clean_html(
            markdown(data), strip=True,
            tags=MARKDOWN_TAGS, attributes=MARKDOWN_ATTRIBUTES)
    # tags can be added by tag:... or tag:"...." and a link will be made
    # from it
    if auto_link:
        data = html_auto_link(data)
    return literal(data) 
Example #4
Source File: autocomplete.py    From iguana with Creative Commons Attribution Share Alike 4.0 International 6 votes vote down vote up
def get_result_label_html(self, result):
        standard_html = '<strong>%s </strong>%s' % (
            bleach.clean(result.get_ticket_identifier()),
            bleach.clean(result.title)
        )

        # mark 'Done' issues
        if result.kanbancol.type == "Done":
            standard_html = '<del>%s</del><small style="padding-left: 1em">[%s]</small>' % (
                standard_html,
                result.kanbancol.get_type_display()
            )
        # mark archived issues
        if result.archived:
            standard_html = '<em class="text-muted">%s</em><small style="padding-left: 1em">[%s]</small>' % (
                standard_html,
                _("Archived")
            )

        return standard_html 
Example #5
Source File: _markdown.py    From webviz-config with MIT License 6 votes vote down vote up
def __init__(self, markdown_file: Path):

        super().__init__()

        self.markdown_file = markdown_file

        self.html = bleach.clean(
            markdown.markdown(
                get_path(self.markdown_file).read_text(),
                extensions=[
                    "tables",
                    "sane_lists",
                    _WebvizMarkdownExtension(base_path=markdown_file.parent),
                ],
            ),
            tags=Markdown.ALLOWED_TAGS,
            attributes=Markdown.ALLOWED_ATTRIBUTES,
            styles=Markdown.ALLOWED_STYLES,
        )

        # Workaround for upstream issue https://github.com/plotly/dash-core-components/issues/746,
        # where we convert void html tags from <tag> to <tag/>.
        self.html = re.sub("<img (.*?[^/])>", r"<img \1/>", self.html)
        self.html = self.html.replace("<br>", "<br/>").replace("<hr>", "<hr/>") 
Example #6
Source File: serializers.py    From ecommerce with GNU Affero General Public License v3.0 6 votes vote down vote up
def create(self, validated_data):
        enterprise_customer = self.context['view'].kwargs.get('enterprise_customer')
        email_type = validated_data['email_type']
        email_greeting = bleach.clean(validated_data.get('email_greeting', ''))
        email_closing = bleach.clean(validated_data.get('email_closing', ''))

        create_data = dict(
            enterprise_customer=enterprise_customer,
            email_type=email_type,
            email_greeting=email_greeting,
            email_closing=email_closing,
        )

        if 'name' in validated_data:
            create_data['name'] = validated_data.get('name')

        instance = OfferAssignmentEmailTemplates.objects.create(**create_data)

        # deactivate old templates for enterprise for this specific email type
        OfferAssignmentEmailTemplates.objects.filter(
            enterprise_customer=enterprise_customer,
            email_type=email_type,
        ).exclude(pk=instance.pk).update(active=False)

        return instance 
Example #7
Source File: utils.py    From OpenBazaar-Server with MIT License 6 votes vote down vote up
def sanitize_html(value):
    """ Recursively sanitize all strings within a data structure. """
    if isinstance(value, dict):
        value = {k:sanitize_html(v) for k, v in value.iteritems()}
    elif isinstance(value, list):
        value = [sanitize_html(v) for v in value]
    elif isinstance(value, basestring):
        value = bleach.clean(value, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES, styles=ALLOWED_STYLES)
    return value 
Example #8
Source File: text_plugin.py    From lambda-packs with MIT License 6 votes vote down vote up
def markdown_and_sanitize(markdown_string):
  """Takes a markdown string and converts it into sanitized html.

  It uses the table extension; while that's not a part of standard
  markdown, it is sure to be useful for TensorBoard users.

  The sanitizer uses the allowed_tags and attributes specified above. Mostly,
  we ensure that our standard use cases like tables and links are supported.

  Args:
    markdown_string: Markdown string to sanitize

  Returns:
    a string containing sanitized html for input markdown
  """
  # Convert to utf-8 whenever we have a binary input.
  if isinstance(markdown_string, six.binary_type):
    markdown_string = markdown_string.decode('utf-8')

  string_html = markdown.markdown(
      markdown_string, extensions=['markdown.extensions.tables'])
  string_sanitized = bleach.clean(
      string_html, tags=ALLOWED_TAGS, attributes=ALLOWED_ATTRIBUTES)
  return string_sanitized 
Example #9
Source File: utils.py    From ecommerce with GNU Affero General Public License v3.0 6 votes vote down vote up
def format_email(template, placeholder_dict, greeting, closing):
    """
    Arguments:
        template (String): Email template body
        placeholder_dict (SafeDict): Safe dictionary of placeholders and their values
        greeting (String): Email greeting (prefix)
        closing (String): Email closing (suffix)

    Apply placeholders to the email template.

    Safely handle placeholders in the template without matching tokens (just emit the placeholders).

    Reference: https://stackoverflow.com/questions/17215400/python-format-string-unused-named-arguments
    """
    if greeting is None:
        greeting = ''
    if closing is None:
        closing = ''

    greeting = bleach.clean(greeting)
    closing = bleach.clean(closing)
    email_body = string.Formatter().vformat(template, SafeTuple(), placeholder_dict)
    return greeting + email_body + closing 
Example #10
Source File: utils.py    From ideascube with GNU Affero General Public License v3.0 6 votes vote down vote up
def clean_html(html, with_media=False):
    authorized_tags = [
        'p', 'a', 'ul', 'ol', 'li', 'blockquote',
        'h1', 'h2', 'h3', 'h4', 'h5',
        'strong', 'em',
        'br',
    ]
    authorized_attributes = {
        'a': ['href', 'title'],
        'img': ['src', 'width', 'height', 'alt'],
        'iframe': ['src', 'width', 'height', 'allowfullscreen'],
        'video': [
            'controls', 'width', 'height', 'allowfullscreen', 'preload',
            'poster'],
        'audio': ['controls', 'preload'],
        'source': ['src']
    }

    if with_media:
        authorized_tags += ['img', 'iframe', 'video', 'audio', 'source']

    return bleach.clean(
        html, authorized_tags, authorized_attributes, strip=True) 
Example #11
Source File: test_bleach_field.py    From django-localized-fields with MIT License 6 votes vote down vote up
def _validate(non_bleached_value, bleached_value):
        """Validates whether the specified non-bleached value ended up being
        correctly bleached.

        Arguments:
            non_bleached_value:
                The value before bleaching.

            bleached_value:
                The value after bleaching.
        """

        for lang_code, _ in settings.LANGUAGES:
            if not non_bleached_value.get(lang_code):
                assert not bleached_value.get(lang_code)
                continue

            expected_value = bleach.clean(
                non_bleached_value.get(lang_code), get_bleach_default_options()
            )

            assert bleached_value.get(lang_code) == expected_value 
Example #12
Source File: blocks.py    From hypha with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_searchable_content(self, value, data):
        return bleach.clean(data or '', tags=[], strip=True) 
Example #13
Source File: test_enterprise.py    From ecommerce with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_post_with_unsafe_data(self, email_type):
        """
        Verify that view correctly performs HTTP POST on unsafe data.
        """
        template_name = 'E Learning'
        email_greeting = '<script>document.getElementById("greeting").innerHTML = "GREETING!";</script>'
        email_closing = '<script>document.getElementById("closing").innerHTML = "CLOSING!";</script>'

        template = self.create_template_data(email_type, template_name, email_greeting, email_closing)
        assert template['email_greeting'] == bleach.clean(email_greeting)
        assert template['email_closing'] == bleach.clean(email_closing) 
Example #14
Source File: utils.py    From edx-enterprise with GNU Affero General Public License v3.0 5 votes vote down vote up
def strip_html_tags(text, allowed_tags=None):
    """
    Strip all tags from a string except those tags provided in `allowed_tags` parameter.

    Args:
        text (str): string to strip html tags from
        allowed_tags (list): allowed list of html tags

    Returns: a string without html tags
    """
    if text is None:
        return None
    if allowed_tags is None:
        allowed_tags = ALLOWED_TAGS
    return bleach.clean(text, tags=allowed_tags, attributes=['id', 'class', 'style', 'href', 'title'], strip=True) 
Example #15
Source File: __init__.py    From diffengine with MIT License 5 votes vote down vote up
def _fingerprint(s):
    # make sure the string has been normalized, bleach everything, remove all
    # whitespace and punctuation to create a pseudo fingerprint for the text
    # for use during comparison
    s = _normal(s)
    s = bleach.clean(s, tags=[], strip=True)
    s = re.sub(r"\s+", "", s, flags=re.MULTILINE)
    s = s.translate(punctuation)
    return s 
Example #16
Source File: blocks.py    From hypha with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_searchable_content(self, value, data):
        return bleach.clean(data or '', tags=[], strip=True) 
Example #17
Source File: utils.py    From PonyConf with Apache License 2.0 5 votes vote down vote up
def markdown_to_html(md):
    html = markdown(md)
    allowed_tags = bleach.ALLOWED_TAGS + ['p', 'pre', 'span' ] + ['h%d' % i for i in range(1, 7) ]
    html = bleach.clean(html, tags=allowed_tags)
    return mark_safe(html) 
Example #18
Source File: unfurl_message.py    From gitlab-unfurly with MIT License 5 votes vote down vote up
def strip_html_tags(value):
    return bleach.clean(value, tags=[], strip=True) 
Example #19
Source File: blocks.py    From hypha with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def get_searchable_content(self, value, data):
        # CharField acts as a fallback. Force data to string
        data = str(data)
        return bleach.clean(data or '', tags=[], strip=True) 
Example #20
Source File: jinja.py    From maple-bbs with GNU General Public License v3.0 5 votes vote down vote up
def markdown(text, clean=True):
    renderer = HtmlRenderer()
    md = Markdown(renderer, extensions=('fenced-code', ))
    if clean:
        return Markup(safe_clean(md(text)))
    return Markup(md(text)) 
Example #21
Source File: jinja.py    From maple-bbs with GNU General Public License v3.0 5 votes vote down vote up
def safe_clean(text):
    tags = ['b', 'i', 'font', 'br', 'blockquote', 'div', 'h2', 'a', 'p']
    attrs = {'*': ['style', 'id', 'class'], 'font': ['color'], 'a': ['href']}
    styles = ['color']
    return Markup(clean(text, tags=tags, attributes=attrs, styles=styles)) 
Example #22
Source File: Post.py    From maniwani with MIT License 5 votes vote down vote up
def render_post_collection(posts, context, extensions):
    cache_connection = cache.Cache()
    for post in posts:
        cache_key = post_render_cache_key(context, post["id"])
        cached_render = cache_connection.get(cache_key)
        if cached_render:
            post["body"] = cached_render
            continue
        rendered_markdown = clean(markdown(post["body"], extensions=extensions),
                             ALLOWED_TAGS, ALLOWED_ATTRIBUTES)
        cache_connection.set(cache_key, rendered_markdown)
        post["body"] = rendered_markdown 
Example #23
Source File: transforms.py    From adhocracy4 with GNU Affero General Public License v3.0 5 votes vote down vote up
def clean_html_field(text, setting='default'):
    allowed_tags = settings.BLEACH_LIST[setting]['tags']
    allowed_attrs = settings.BLEACH_LIST[setting]['attributes']
    allowed_styles = settings.BLEACH_LIST[setting].get('styles', [])
    return bleach.clean(text,
                        tags=allowed_tags,
                        attributes=allowed_attrs,
                        styles=allowed_styles,
                        strip=True) 
Example #24
Source File: transforms.py    From adhocracy4 with GNU Affero General Public License v3.0 5 votes vote down vote up
def clean_html_all(text):
    return bleach.clean(text,
                        tags=[], attributes={}, styles=[], strip=True) 
Example #25
Source File: app.py    From activitypub with Mozilla Public License 2.0 5 votes vote down vote up
def clean(self, html):
    return clean_html(html) 
Example #26
Source File: app.py    From activitypub with Mozilla Public License 2.0 5 votes vote down vote up
def clean_html(html):
    return bleach.clean(html, tags=ALLOWED_TAGS) 
Example #27
Source File: main.py    From python-docs-samples with Apache License 2.0 5 votes vote down vote up
def index():
    data = request.get_data(as_text=True)
    # Parses the markdown and outputs the formatted HTML
    html = markdown.markdown(data)

    # Keep the paragraph tags
    bleach.sanitizer.ALLOWED_TAGS.append('p')
    # Sanitize and return
    clean = bleach.clean(html, strip=True)
    return clean 
Example #28
Source File: markdown.py    From appstore with GNU Affero General Public License v3.0 5 votes vote down vote up
def markdown(sentence):
    text = default_markdown(sentence)
    safe_text = clean(text,
                      attributes=settings.MARKDOWN_ALLOWED_ATTRIBUTES,
                      tags=settings.MARKDOWN_ALLOWED_TAGS)
    return mark_safe(safe_text)  # nosec 
Example #29
Source File: feeds.py    From appstore with GNU Affero General Public License v3.0 5 votes vote down vote up
def item_description(self, item):
        try:
            if item.changelog:
                changelog = '\n\n# %s\n\n%s' % (_('Changes'), item.changelog)
            else:
                changelog = ''
            content = '%s%s' % (item.app.description, changelog)
        except TranslationDoesNotExist:
            content = item.app.description
        content += '\n\n [%s](%s)' % (_('Download'), item.download)
        return clean(markdown(content),
                     attributes=settings.MARKDOWN_ALLOWED_ATTRIBUTES,
                     tags=settings.MARKDOWN_ALLOWED_TAGS) 
Example #30
Source File: models.py    From cookiecutter-django with MIT License 5 votes vote down vote up
def create_hub_user(email, password, profile_type=Choices.Profiles.USER, username=None, first_name=None,
                        last_name=None, **kwargs):
        """
        Wrapper where a user is created followed by the models
        :param username: str username
        :param email: str email
        :param password: str password
        :param profile_type: Type of user to be created. Defaults to User
        :param first_name: First name of a user
        :param last_name: Last name of a user
        :param incomplete_signup: If a signup of a HubUser is complete or not
        :return: HubUser
        """
        with transaction.atomic():
            try:
                username = username or HubUser.generate_username(first_name, last_name, email)
                user = get_user_model().objects.create_user(
                    username=bleach.clean(username), email=bleach.clean(email), password=password,
                    is_staff=False, is_superuser=False, first_name=bleach.clean(first_name), 
                    last_name=bleach.clean(last_name),

                )
                hub_user = HubUser.objects.create(
                    user=user, profile_type=profile_type, **kwargs
                )
            except IntegrityError as e:
                logger.exception(f"Error creating the HubUser: {e}")
                return
            return hub_user