Python re.html() Examples
The following are 30
code examples of re.html().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.

Example #1
Source File: google_doc.py From kobo-predict with BSD 2-Clause "Simplified" License | 6 votes |
def set_html(self, html): """ When setting the html for this Google Document we do two things: 1. We extract the content from the html. Using a regular expression we pull the meat of the document out of the body of the html, we also cut off the footer Google adds on automatically. 2. We extract the various sections from the content of the document. Again using a regular expression, we look for h1, h2, ... tags to split the document up into sections. Note: it is important when you are writing your Google Document to use the heading text styles, so this code will split things correctly. """ self._html = html self._extract_content() self._extract_sections()
Example #2
Source File: google_doc.py From kobo-predict with BSD 2-Clause "Simplified" License | 6 votes |
def _construct_section_tree(self): """ For some weird reason Google Documents doesn't like nesting lists, so their table of contents requires a bunch of special formatting. Instead of trying to hack off what they provide us, we create a tree of sections based on each sections level. This tree will be used to construct the html for the table of contents. """ self._section_tree = TreeNode(Section(level=0)) current_node = self._section_tree for section in self._sections: while section['level'] <= current_node.value['level']: current_node = current_node.parent while section['level'] > current_node.value['level'] + 1: empty_section = Section(level=current_node.value['level'] + 1) current_node = current_node.add_child(empty_section) assert section['level'] == current_node.value['level'] + 1 current_node = current_node.add_child(section)
Example #3
Source File: google_doc.py From kobo-predict with BSD 2-Clause "Simplified" License | 6 votes |
def _navigation_list(self, node=None): """ Return an html representation of the table of contents for this document. This is done recursively adding on a list item for each element in the tree, and an unordered list if this node has children. I might want to double check that this html is the correct way to nest lists. """ if node is None: self._construct_section_tree() return self._navigation_list(self._section_tree) result = "" if 'title' in node.value and 'id' in node.value: result += '<li>%s</li>' % node.value.url() if len(node) > 0: result += "<ul>%s</ul>" % \ "\n".join([self._navigation_list(child) for child in node]) return result
Example #4
Source File: test_reader.py From IRCLogParser with GNU General Public License v3.0 | 6 votes |
def test_linux_input_slack(self): expected_captured_output = util.load_from_disk(self.current_directory +"/data/stdout_captured_linux_input_slack") capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput log_data = reader.linux_input_slack(self.current_directory + "/data/slackware/", self.starting_date, self.ending_date) output = capturedOutput.getvalue() capturedOutput.close() sys.stdout = sys.__stdout__ #See https://docs.python.org/2/library/re.html for more details. # string 'Working on: /any_valid_path/IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n' is replaced by # 'Working on: IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n' output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)',r'\g<begin>\g<constant>', output) self.assertEqual(log_data, self.log_data) self.assertEqual(expected_captured_output, output)
Example #5
Source File: test_reader.py From IRCLogParser with GNU General Public License v3.0 | 6 votes |
def test_linux_input(self): expected_capturedOutput = util.load_from_disk(self.current_directory + "/data/stdout_captured_linux_input") capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput log_data = reader.linux_input(self.current_directory + "/data/log/", self.channel_name, self.starting_date, self.ending_date) output = capturedOutput.getvalue() capturedOutput.close() sys.stdout = sys.__stdout__ #See https://docs.python.org/2/library/re.html for more details. # string 'Working on: /any_valid_path/IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n' is replaced by # 'Working on: IRCLogParser/test/unit-test/test_lib/test_in_out/data/log/2013/01/04/#kubuntu-devel.txt\n' output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)', r'\g<begin>\g<constant>', output) self.assertEqual(log_data, self.log_data) self.assertEqual(expected_capturedOutput, output)
Example #6
Source File: test_reader.py From IRCLogParser with GNU General Public License v3.0 | 6 votes |
def test_linux_input_all_channels(self): expected_capturedOutput = util.load_from_disk(self.current_directory + "/data/stdout_captured_linux_input_all_channels") expected_log_data = util.load_from_disk(self.current_directory + "/data/log_data_for_test_linux_input_all_channels") capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput log_data = reader.linux_input(self.current_directory + "/data/log_to_test_for_all_channels/", ["ALL"], "2013-1-1", "2013-1-2") output = capturedOutput.getvalue() capturedOutput.close() sys.stdout = sys.__stdout__ #See https://docs.python.org/2/library/re.html for more details. output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)', r'\g<begin>\g<constant>', output) self.assertEqual(expected_log_data, log_data) self.assertEqual(expected_capturedOutput, output)
Example #7
Source File: x3270.py From Robot-Framework-Mainframe-3270-Library with MIT License | 6 votes |
def take_screenshot(self, height='410', width='670'): """Generate a screenshot of the IBM 3270 Mainframe in a html format. The default folder is the log folder of RobotFramework, if you want change see the `Set Screenshot Folder`. The Screenshot is printed in a iframe log, with the values of height=410 and width=670, you can change this values passing them from the keyword. Examples: | Take Screenshot | | Take Screenshot | height=500 | width=700 | """ filename_prefix = 'screenshot' extension = 'html' filename_sufix = str(int(round(time.time() * 1000))) filepath = os.path.join(self.imgfolder, '%s_%s.%s' % (filename_prefix, filename_sufix, extension)) self.mf.save_screen(os.path.join(self.output_folder, filepath)) logger.write('<iframe src="%s" height="%s" width="%s"></iframe>' % (filepath.replace("\\", "/"), height, width), level='INFO', html=True)
Example #8
Source File: regex.py From wextracto with BSD 3-Clause "New" or "Revised" License | 6 votes |
def re_group(pattern, group=1, flags=0): """ Returns a :mod:`composable <wex.composed>` callable that extract the specified group using a regular expression. :param pattern: The regular expression. :param group: The group from the `MatchObject <https://docs.python.org/2/library/re.html#re.MatchObject.group>`_. :param flags: Flags to use when compiling the `pattern <https://docs.python.org/2/library/re.html#re.compile>`_. """ compiled = re.compile(pattern, flags) @composable def regroup(src): for string in flatten(src): for match in compiled.finditer(string): yield match.group(group) return regroup
Example #9
Source File: regex.py From wextracto with BSD 3-Clause "New" or "Revised" License | 6 votes |
def re_groupdict(pattern, flags=0): """ Returns a :mod:`composable <wex.composed>` callable that extract the a group dictionary using a regular expression. :param pattern: The regular expression. :param flags: Flags to use when compiling the `pattern <https://docs.python.org/2/library/re.html#re.compile>`_. """ compiled = re.compile(pattern, flags) compiled = re.compile(pattern, flags) @composable def redict(src): for string in flatten(src): for match in compiled.finditer(string): yield match.groupdict() return redict
Example #10
Source File: sanity.py From reframe with BSD 3-Clause "New" or "Revised" License | 6 votes |
def assert_found(patt, filename, msg=None, encoding='utf-8'): '''Assert that regex pattern ``patt`` is found in the file ``filename``. :arg patt: The regex pattern to search. Any standard Python `regular expression <https://docs.python.org/3/library/re.html#regular-expression-syntax>`_ is accepted. :arg filename: The name of the file to examine. Any :class:`OSError` raised while processing the file will be propagated as a :class:`reframe.core.exceptions.SanityError`. :arg encoding: The name of the encoding used to decode the file. :returns: ``True`` on success. :raises reframe.core.exceptions.SanityError: if assertion fails. ''' num_matches = count(finditer(patt, filename, encoding)) try: evaluate(assert_true(num_matches)) except SanityError: error_msg = msg or "pattern `{0}' not found in `{1}'" raise SanityError(_format(error_msg, patt, filename)) else: return True
Example #11
Source File: sanity.py From reframe with BSD 3-Clause "New" or "Revised" License | 6 votes |
def extractall(patt, filename, tag=0, conv=None, encoding='utf-8'): '''Extract all values from the capturing group ``tag`` of a matching regex ``patt`` in the file ``filename``. :arg patt: The regex pattern to search. Any standard Python `regular expression <https://docs.python.org/3/library/re.html#regular-expression-syntax>`_ is accepted. :arg filename: The name of the file to examine. :arg encoding: The name of the encoding used to decode the file. :arg tag: The regex capturing group to be extracted. Group ``0`` refers always to the whole match. Since the file is processed line by line, this means that group ``0`` returns the whole line that was matched. :arg conv: A callable that takes a single argument and returns a new value. If provided, it will be used to convert the extracted values before returning them. :returns: A list of the extracted values from the matched regex. :raises reframe.core.exceptions.SanityError: In case of errors. ''' return list(evaluate(x) for x in extractiter(patt, filename, tag, conv, encoding))
Example #12
Source File: test_configtypes.py From qutebrowser with GNU General Public License v3.0 | 5 votes |
def __init__(self, pattern, flags=0): # We compile the regex because re.compile also adds flags defined in # the pattern and implicit flags to its .flags. # See https://docs.python.org/3/library/re.html#re.regex.flags compiled = re.compile(pattern, flags) self.pattern = compiled.pattern self.flags = compiled.flags self._user_flags = flags
Example #13
Source File: utils.py From sncli with MIT License | 5 votes |
def build_regex_search(search_string): """ Build up a compiled regular expression from the search string. Supports the use of flags - ie. search for `nothing/i` will perform a case-insensitive regex for `nothing` """ sspat = None valid_flags = { 'i': re.IGNORECASE } if search_string: try: search_string, flag_letters = re.match(r'^(.+?)(?:/([a-z]+))?$', search_string).groups() flags = 0 # if flags are given, OR together all the valid flags # see https://docs.python.org/3/library/re.html#re.compile if flag_letters: for letter in flag_letters: if letter in valid_flags: flags = flags | valid_flags[letter] sspat = re.compile(search_string, flags) except re.error: sspat = None return sspat
Example #14
Source File: google_doc.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def to_html(self): return render_to_string('section.html', self)
Example #15
Source File: google_doc.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def _extract_sections(self): """ Here is an example of what a section header looks like in the html of a Google Document: <h3 class="c1"><a name="h.699ffpepx6zs"></a><span>Hello World </span></h3> We split the content of the Google Document up using a regular expression that matches the above header. re.split is a pretty cool function if you haven't tried it before. It puts the matching groups into the list as well as the content between the matches. Check it out here: http://docs.python.org/library/re.html#re.split One big thing we do in this method is replace the ugly section id that Google creates with a nicely slugified version of the section title. This makes for pretty urls. """ self._sections = [] header = r'<h(?P<level>\d) class="[^"]+">' \ r'<a name="(?P<id>[^"]+)"></a>' \ r'<span>(?P<title>[^<]+)</span>' \ r'</h\d>' l = re.split(header, self._content) l.pop(0) while l: section = Section( # hack: cause we started with h3 in google docs level=int(l.pop(0)) - 2, id=l.pop(0), title=l.pop(0).decode('utf8'), content=l.pop(0), ) section['id'] = slugify(section['title']) if section['level'] >= 1: self._sections.append(section)
Example #16
Source File: google_doc.py From kobo-predict with BSD 2-Clause "Simplified" License | 5 votes |
def to_html(self): """ Return a cleaned up HTML representation of this Google Document. """ return render_to_string('google_doc.html', { 'nav': self._navigation_html(), 'content': '\n'.join([s.to_html() for s in self._sections])})
Example #17
Source File: dumpgenerator.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def undoHTMLEntities(text=''): """ Undo some HTML codes """ # i guess only < > & " ' need conversion # http://www.w3schools.com/html/html_entities.asp text = re.sub('<', '<', text) text = re.sub('>', '>', text) text = re.sub('&', '&', text) text = re.sub('"', '"', text) text = re.sub(''', '\'', text) return text
Example #18
Source File: dumpgenerator.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def removeIP(raw=''): """ Remove IP from HTML comments <!-- --> """ raw = re.sub(r'\d+\.\d+\.\d+\.\d+', '0.0.0.0', raw) # http://www.juniper.net/techpubs/software/erx/erx50x/swconfig-routing-vol1/html/ipv6-config5.html # weird cases as :: are not included raw = re.sub( r'(?i)[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}', '0:0:0:0:0:0:0:0', raw) return raw
Example #19
Source File: dumpgenerator.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def saveSpecialVersion(config={}, session=None): """ Save Special:Version as .html, to preserve extensions details """ if os.path.exists('%s/Special:Version.html' % (config['path'])): print 'Special:Version.html exists, do not overwrite' else: print 'Downloading Special:Version with extensions and other related info' r = session.post( url=config['index'], params={'title': 'Special:Version'}, timeout=10) raw = r.text delay(config=config, session=session) raw = removeIP(raw=raw) with open('%s/Special:Version.html' % (config['path']), 'w') as outfile: outfile.write(raw.encode('utf-8'))
Example #20
Source File: dumpgenerator.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def saveIndexPHP(config={}, session=None): """ Save index.php as .html, to preserve license details available at the botom of the page """ if os.path.exists('%s/index.html' % (config['path'])): print 'index.html exists, do not overwrite' else: print 'Downloading index.php (Main Page) as index.html' r = session.post(url=config['index'], params={}, timeout=10) raw = r.text delay(config=config, session=session) raw = removeIP(raw=raw) with open('%s/index.html' % (config['path']), 'w') as outfile: outfile.write(raw.encode('utf-8'))
Example #21
Source File: mediawiki.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def mwGetAPI(config={}): """ Returns API for a MediaWiki wiki, if available """ api = '' html = wikiteam.getURL(url=config['wiki']) m = re.findall( r'(?im)<\s*link\s*rel="EditURI"\s*type="application/rsd\+xml"\s*href="([^>]+?)\?action=rsd"\s*/\s*>', html) if m: api = m[0] if api.startswith('//'): # gentoo wiki and others api = url.split('//')[0] + api return api
Example #22
Source File: mediawiki.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def mwRemoveIP(raw=''): """ Remove IP from HTML comments <!-- --> """ raw = re.sub(r'\d+\.\d+\.\d+\.\d+', '0.0.0.0', raw) # http://www.juniper.net/techpubs/software/erx/erx50x/swconfig-routing-vol1/html/ipv6-config5.html # weird cases as :: are not included raw = re.sub( r'(?i)[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}', '0:0:0:0:0:0:0:0', raw) return raw
Example #23
Source File: mediawiki.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def mwSaveIndexPHP(config={}): """ Save index.php as .html, to preserve license details available at the botom of the page """ if os.path.exists('%s/index.html' % (config['path'])): sys.stderr.write('index.html exists, do not overwrite') else: sys.stderr.write('Downloading index.php (Main Page) as index.html') raw = wikiteam.getURL(url=config['index'], data={}) wikiteam.delay(config=config) raw = mwRemoveIP(raw=raw) with open('%s/index.html' % (config['path']), 'w') as outfile: outfile.write(raw)
Example #24
Source File: mediawiki.py From wikiteam with GNU General Public License v3.0 | 5 votes |
def mwSaveSpecialVersion(config={}): """ Save Special:Version as .html, to preserve extensions details """ if os.path.exists('%s/Special:Version.html' % (config['path'])): sys.stderr.write('Special:Version.html exists, do not overwrite') else: sys.stderr.write('Downloading Special:Version with extensions and other related info') raw = wikiteam.getURL(url=config['index'], data={'title': 'Special:Version'}) wikiteam.delay(config=config) raw = mwRemoveIP(raw=raw) with open('%s/Special:Version.html' % (config['path']), 'w') as outfile: outfile.write(raw)
Example #25
Source File: hicFindRestSite.py From HiCExplorer with GNU General Public License v3.0 | 5 votes |
def parse_arguments(args=None): parser = argparse.ArgumentParser(description='Identifies the genomic locations of restriction sites. ', add_help=False, usage='%(prog)s --fasta mm10.fa ' '--searchPattern AAGCTT -o rest_site_positions.bed') parserRequired = parser.add_argument_group('Required arguments') # define the arguments parserRequired.add_argument('--fasta', '-f', help='Path to fasta file for the organism genome.', type=argparse.FileType('r'), required=True) # define the arguments parserRequired.add_argument('--searchPattern', '-p', help='Search pattern. For example, for HindIII this pattern is "AAGCTT". ' 'Both, forward and reverse strand are searched for a match. The pattern ' 'is a regexp and can contain regexp specif syntax ' '(see https://docs.python.org/2/library/re.html). For example the pattern' 'CG..GC will find all occurrence of CG followed by any two bases and then GC.', required=True) parserRequired.add_argument('--outFile', '-o', help='Name for the resulting bed file.', type=argparse.FileType('w'), required=True) parserOpt = parser.add_argument_group('Optional arguments') parserOpt.add_argument("--help", "-h", action="help", help="show this help message and exit") parserOpt.add_argument('--version', action='version', version='%(prog)s {}'.format(__version__)) return parser
Example #26
Source File: recensor.py From calebj-cogs with GNU General Public License v3.0 | 5 votes |
def recensor_help(self, ctx): """ Posts links to online reference material """ await self.bot.say( "ReCensor manual: <https://github.com/calebj/calebj-cogs/#recensor>\n" "A how-to for Python's regex: <https://docs.python.org/3/howto/regex.html>\n" "Full docs on regex syntax: <https://docs.python.org/3/library/re.html#regular-expression-syntax>" )
Example #27
Source File: test_reader.py From IRCLogParser with GNU General Public License v3.0 | 5 votes |
def test_linux_input_non_existent_file_slack(self): expected_captured_output = util.load_from_disk(self.current_directory + "/data/stdout_captured_linux_input_slack_non_existent_file") expected_log_data = util.load_from_disk(self.current_directory + "/data/log_data_for_test_linux_input_non_existent_file_slack") capturedOutput = StringIO.StringIO() sys.stdout = capturedOutput log_data = reader.linux_input_slack(self.current_directory + "/data/slackware_with_missing_files/", "2013-1-1","2013-1-6") output = capturedOutput.getvalue() capturedOutput.close() sys.stdout = sys.__stdout__ #See https://docs.python.org/2/library/re.html for more details. output = re.sub(r'(?P<begin>.+ )/.+/(?P<constant>IRCLogParser/.+\n)',r'\g<begin>\g<constant>', output) self.assertEqual(expected_log_data, log_data) self.assertEqual(expected_captured_output, output)
Example #28
Source File: x3270.py From Robot-Framework-Mainframe-3270-Library with MIT License | 5 votes |
def execute_command(self, cmd): """Execute an [http://x3270.bgp.nu/wc3270-man.html#Actions|x3270 command]. Examples: | Execute Command | Enter | | Execute Command | Home | | Execute Command | Tab | | Execute Command | PF(1) | """ self.mf.exec_command((str(cmd)).encode("utf-8")) time.sleep(self.wait)
Example #29
Source File: x3270.py From Robot-Framework-Mainframe-3270-Library with MIT License | 5 votes |
def set_screenshot_folder(self, path): """Set a folder to keep the html files generated by the `Take Screenshot` keyword. Example: | Set Screenshot Folder | C:\\\Temp\\\Images | """ if os.path.exists(os.path.normpath(os.path.join(self.output_folder, path))): self.imgfolder = path else: logger.error('Given screenshots path "%s" does not exist' % path) logger.warn('Screenshots will be saved in "%s"' % self.imgfolder)
Example #30
Source File: x3270.py From Robot-Framework-Mainframe-3270-Library with MIT License | 5 votes |
def page_should_match_regex(self, regex_pattern): """Fails if string does not match pattern as a regular expression. Regular expression check is implemented using the Python [https://docs.python.org/2/library/re.html|re module]. Python's regular expression syntax is derived from Perl, and it is thus also very similar to the syntax used, for example, in Java, Ruby and .NET. Backslash is an escape character in the test data, and possible backslashes in the pattern must thus be escaped with another backslash (e.g. \\\d\\\w+). """ page_text = self._read_all_screen() if not re.findall(regex_pattern, page_text, re.MULTILINE): raise Exception('No matches found for "' + regex_pattern + '" pattern')