Python html.entities.entitydefs() Examples

The following are 8 code examples of html.entities.entitydefs(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module html.entities , or try the search function .
Example #1
Source File: strings.py    From acmpv with Do What The F*ck You Want To Public License 5 votes vote down vote up
def unescape_html(string):
        '''HTML entity decode'''
        string = re.sub(r'&#[^;]+;', _sharp2uni, string)
        string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
        return string 
Example #2
Source File: strings.py    From acmpv with Do What The F*ck You Want To Public License 5 votes vote down vote up
def unescape_html(string):
        '''HTML entity decode'''
        string = re.sub(r'&#[^;]+;', _sharp2uni, string)
        string = re.sub(r'&[^;]+;', lambda m: entitydefs[m.group(0)[1:-1]], string)
        return string 
Example #3
Source File: sanitizer.py    From yatl with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def handle_entityref(self, ref):
        if self.in_disallowed[-1]:
            return
        elif ref in entitydefs:
            self.result += '&%s;' % ref
        else:
            self.result += xmlescape('&%s' % ref) 
Example #4
Source File: html2text.py    From arlo with Apache License 2.0 5 votes vote down vote up
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0]) 
Example #5
Source File: html2text.py    From PyDataset with MIT License 5 votes vote down vote up
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0]) 
Example #6
Source File: _fetchtitle.py    From robot with MIT License 5 votes vote down vote up
def _mapEntity(m):
  name = _extract_entity_name(m)
  if name.startswith('#'):
    return _sharp2uni(name)
  try:
    return _entities[name]
  except KeyError:
    return '&' + name 
Example #7
Source File: html2text.py    From RedditBots with MIT License 5 votes vote down vote up
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0]) 
Example #8
Source File: extract_recipe.py    From extract_recipe with Apache License 2.0 4 votes vote down vote up
def name2cp(k):
    if k == 'apos': return ord("'")
    if hasattr(htmlentitydefs, "name2codepoint"): # requires Python 2.3
        return htmlentitydefs.name2codepoint[k]
    else:
        k = htmlentitydefs.entitydefs[k]
        if k.startswith("&#") and k.endswith(";"): return int(k[2:-1]) # not in latin-1
        return ord(codecs.latin_1_decode(k)[0])