Python re.sub() Examples

The following are 30 code examples of re.sub(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: tokenizer_udpipe_mod.py    From Turku-neural-parser-pipeline with Apache License 2.0 8 votes vote down vote up
def parse_text(self,txt):
        err=udpipe.ProcessingError()
        tokenized=""
        current_block=[]
        for line in txt.split("\n"):
            if re.match(comment_regex, line.lstrip()): # comment line
                if current_block:
                    tokenized+=self.pipeline.process("\n".join(current_block),err)
                    current_block=[]
                tokenized+=re.sub(comment_regex, "# ", line.lstrip()+"\n")
                continue
            # normal text line, save to current block to be tokenized
            current_block.append(line)
        if current_block:
            tokenized+=self.pipeline.process("\n".join(current_block),err)
        return tokenized 
Example #2
Source File: progress_bar.py    From clikit with MIT License 6 votes vote down vote up
def display(self):
        """
        Output the current progress string.
        """
        if self._io.is_quiet():
            return

        if self._format is None:
            self._set_real_format(
                self._internal_format or self._determine_best_format()
            )

        self._overwrite(
            re.sub(
                r"(?i)%([a-z\-_]+)(?::([^%]+))?%",
                self._overwrite_callback,
                self._format,
            )
        ) 
Example #3
Source File: __init__.py    From ALF with Apache License 2.0 6 votes vote down vote up
def test_quo9(self):
        #right: "<h5 id='id824837' onload='chat(\'id705147\',1,\' width=\\\'2pt\\\'\')'>"
        #                                                        ^  -- esc() --   ^
        #wrong: "<h5 id='id824837' onload='chat(\'id705147\',1,\\\' width=\\\'2pt\'\')'>"
        #                                                      ^  -- esc() --   ^
        w = Grammar("@id 8\n"
                    "root   \"<h5 id='\" id \"' onload='\" esc(func) \"'>\" #rclean\n"
                    "id     'id' [0-9]{6}\n"
                    "func   \"chat('\" id \"',\" [0-9] \",'\" esc(\" width='2pt'\") \"')\"\n"
                    , esc=lambda x:re.sub(r"('|\\)", r"\\\1", x))
        self.assertRegex(w.generate(), r"^<h5 id='id[0-9]{6}' onload='chat\(\\'id[0-9]{6}"
                                       r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$")
        # same grammar with '@id' in chat() instead of 'id'
        w = Grammar("@id 8\n"
                    "root   \"<h5 id='\" id \"' onload='\" esc(func) \"'>\" #rclean\n"
                    "id     'id' [0-9]{6}\n"
                    "func   \"chat('\" @id \"',\" [0-9] \",'\" esc(\" width='2pt'\") \"')\"\n"
                    , esc=lambda x:re.sub(r"('|\\)", r"\\\1", x))
        self.assertRegex(w.generate(), r"^<h5 id='(id[0-9]{6})' onload='chat\(\\'\1"
                                       r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$") 
Example #4
Source File: grammr2_test.py    From ALF with Apache License 2.0 6 votes vote down vote up
def test_quo9(self):
        #right: "<h5 id='id824837' onload='chat(\'id705147\',1,\' width=\\\'2pt\\\'\')'>"
        #                                                        ^  -- esc() --   ^
        #wrong: "<h5 id='id824837' onload='chat(\'id705147\',1,\\\' width=\\\'2pt\'\')'>"
        #                                                      ^  -- esc() --   ^
        w = Grammar("root   \"<h5 id='\" id \"' onload='\" esc(func) \"'>\"\n"
                    "id     'id' /[0-9]{6}/\n"
                    "func   \"chat('\" id \"',\" /[0-9]/ \",'\" esc(\" width='2pt'\") \"')\"\n"
                    , esc=lambda x: re.sub(r"('|\\)", r"\\\1", x))
        self.assertRegex(w.generate(), r"^<h5 id='id[0-9]{6}' onload='chat\(\\'id[0-9]{6}"
                                       r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$")
        # same grammar with '@id' in chat() instead of 'id'
        w = Grammar("root   \"<h5 id='\" id \"' onload='\" esc(func) \"'>\"\n"
                    "id     'id' /[0-9]{6}/\n"
                    "func   \"chat('\" @id \"',\" /[0-9]/ \",'\" esc(\" width='2pt'\") \"')\"\n"
                    , esc=lambda x: re.sub(r"('|\\)", r"\\\1", x))
        self.assertRegex(w.generate(), r"^<h5 id='(id[0-9]{6})' onload='chat\(\\'\1"
                                       r"\\',[0-9],\\' width=\\\\\\'2pt\\\\\\'\\'\)'>$") 
Example #5
Source File: views.py    From MPContribs with MIT License 6 votes vote down vote up
def add_comp_one(compstr):
    """
    Adds stoichiometries of 1 to compstr that don't have them
    :param compstr:  composition as a string
    :return:         compositon with stoichiometries of 1 added
    """
    sample = re.sub(r"([A-Z])", r" \1", compstr).split()
    sample = ["".join(g) for _, g in groupby(str(sample), str.isalpha)]
    samp_new = ""
    for k in range(len(sample)):
        spl_samp = re.sub(r"([A-Z])", r" \1", sample[k]).split()
        for l in range(len(spl_samp)):
            if spl_samp[l][-1].isalpha() and spl_samp[l][-1] != "x":
                spl_samp[l] = spl_samp[l] + "1"
            samp_new += spl_samp[l]

    return samp_new 
Example #6
Source File: pre_submission.py    From MPContribs with MIT License 6 votes vote down vote up
def add_comp_one(compstr):
    """
    Adds stoichiometries of 1 to compstr that don't have them
    :param compstr:  composition as a string
    :return:         compositon with stoichiometries of 1 added
    """
    sample = pd.np.array(re.sub(r"([A-Z])", r" \1", compstr).split()).astype(str)
    sample = ["".join(g) for _, g in groupby(sample, str.isalpha)]
    samp_new = ""
    for k in range(len(sample)):
        spl_samp = re.sub(r"([A-Z])", r" \1", sample[k]).split()
        for l in range(len(spl_samp)):
            if spl_samp[l][-1].isalpha() and spl_samp[l][-1] != "x":
                spl_samp[l] = spl_samp[l] + "1"
            samp_new += spl_samp[l]
    return samp_new 
Example #7
Source File: uninstall_distro.py    From multibootusb with GNU General Public License v2.0 6 votes vote down vote up
def update_sys_cfg_file(uninstall_distro_dir_name):
    """
    Main function to remove uninstall distro specific operations.
    :return:
    """

    sys_cfg_file = os.path.join(config.usb_mount, "multibootusb", "syslinux.cfg")
    if not os.path.exists(sys_cfg_file):
        gen.log("syslinux.cfg file not found for updating changes.")
    else:
        gen.log("Updating syslinux.cfg file...")
        string = open(sys_cfg_file).read()
        string = re.sub(r'#start ' + re.escape(uninstall_distro_dir_name)
                        + '.*?' + '#end '
                        + re.escape(uninstall_distro_dir_name)
                        + r'\s*', '', string, flags=re.DOTALL)
        config_file = open(sys_cfg_file, "w")
        config_file.write(string)
        config_file.close() 
Example #8
Source File: uninstall_distro.py    From multibootusb with GNU General Public License v2.0 6 votes vote down vote up
def update_grub_cfg_file(uninstall_distro_dir_name):
    """
    Main function to remove uninstall distro name from the grub.cfg file.
    :return:
    """

    grub_cfg_file = os.path.join(config.usb_mount, "multibootusb",
                                 "grub", "grub.cfg")
    if not os.path.exists(grub_cfg_file):
        gen.log("grub.cfg file not found for updating changes.")
    else:
        gen.log("Updating grub.cfg file...")
        string = open(grub_cfg_file).read()
        string = re.sub(r'#start ' + re.escape(uninstall_distro_dir_name)
                        + '.*?' + '#end '
                        + re.escape(uninstall_distro_dir_name)
                        + r'\s*', '', string, flags=re.DOTALL)
        config_file = open(grub_cfg_file, "w")
        config_file.write(string)
        config_file.close() 
Example #9
Source File: pagination.py    From grlc with MIT License 6 votes vote down vote up
def buildPaginationHeader(resultCount, resultsPerPage, pageArg, url):
    """Build link header for result pagination"""
    lastPage = resultCount / resultsPerPage

    if pageArg:
        page = int(pageArg)
        next_url = re.sub("page=[0-9]+", "page={}".format(page + 1), url)
        prev_url = re.sub("page=[0-9]+", "page={}".format(page - 1), url)
        first_url = re.sub("page=[0-9]+", "page=1", url)
        last_url = re.sub("page=[0-9]+", "page={}".format(lastPage), url)
    else:
        page = 1
        next_url = url + "?page=2"
        prev_url = ""
        first_url = url + "?page=1"
        last_url = url + "?page={}".format(lastPage)

    if page == 1:
        headerLink = "<{}>; rel=next, <{}>; rel=last".format(next_url, last_url)
    elif page == lastPage:
        headerLink = "<{}>; rel=prev, <{}>; rel=first".format(prev_url, first_url)
    else:
        headerLink = "<{}>; rel=next, <{}>; rel=prev, <{}>; rel=first, <{}>; rel=last".format(next_url, prev_url, first_url, last_url)
    return headerLink 
Example #10
Source File: gquery.py    From grlc with MIT License 6 votes vote down vote up
def paginate_query(query, results_per_page, get_args):
    """Modify the given query so that it can be paginated. The paginated query will 
    split display a maximum of `results_per_page`."""
    page = get_args.get('page', 1)

    glogger.info("Paginating query for page {}, {} results per page".format(page, results_per_page))

    # If contains LIMIT or OFFSET, remove them
    glogger.debug("Original query: " + query)
    no_limit_query = re.sub("((LIMIT|OFFSET)\s+[0-9]+)*", "", query)
    glogger.debug("No limit query: " + no_limit_query)

    # Append LIMIT results_per_page OFFSET (page-1)*results_per_page
    paginated_query = no_limit_query + " LIMIT {} OFFSET {}".format(results_per_page,
                                                                    (int(page) - 1) * results_per_page)
    glogger.debug("Paginated query: " + paginated_query)

    return paginated_query 
Example #11
Source File: mask_db.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def removeLines(mg):
    removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_ncbids.txt'
    #removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_accession_silva.txt'
    srcFilePath = str('/net/metagenomics/projects/PPSmg/data/markerGenes/db/' + mg + '_bact+arch_dnaV.tax')
    dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/markerGenes/db/' + mg + '_bact+arch_dnaV.tax')
    #srcFilePath = str('/net/metagenomics/projects/PPSmg/data/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.tax' )
    #dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.tax' )
    pattern = r'.*ncbid:([0-9]+)$'
    #pattern = r'^([^\-]+)\-.*$'

    removeSet = set(csv.getColumnAsList(removeListFilePath, colNum=0, comment='#'))
    col0 = csv.getColumnAsList(srcFilePath, colNum=0, sep='\t', comment='#')
    col1 = csv.getColumnAsList(srcFilePath, colNum=1, sep='\t', comment='#')
    out = csv.OutFileBuffer(dstFilePath)
    removed = 0
    for col0,col1 in zip(col0,col1):
        if re.sub(pattern, r'\1', col0) not in removeSet:
            out.writeText(str(col0 + '\t' + col1 + '\n'))
        else:
            removed += 1

    out.close()
    print mg, 'removeLines', removed 
Example #12
Source File: mask_db.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def removeSequences(mg):
    removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_ncbids.txt'
    #removeListFilePath = '/net/metagenomics/projects/PPSmg/data/V35/genome_accession_silva.txt'
    srcFilePath = str('/net/metagenomics/projects/PPSmg/data/markerGenes/db/' + mg + '_bact+arch_dnaV.noalign.fna')
    dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/markerGenes/db/' + mg + '_bact+arch_dnaV.noalign.fna')
    #srcFilePath = str('/net/metagenomics/projects/PPSmg/data/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.fna' )
    #dstFilePath = str('/net/metagenomics/projects/PPSmg/data/V35/genomesRemoved/silva/' + mg + '_silva106_ncbitax.bacteria+archaea.fna' )
    pattern = r'.*ncbid:([0-9]+)$'
    #pattern = r'^([^\-]+)\-.*$'

    removeSet = set(csv.getColumnAsList(removeListFilePath, colNum=0, comment='#'))
    seqIdToSeq = fas.fastaFileToDict(srcFilePath)
    out = csv.OutFileBuffer(dstFilePath)
    removed = 0
    for seqId in seqIdToSeq:
        if re.sub(pattern, r'\1', str(seqId)) not in removeSet:
            out.writeText(str('>' + str(seqId) + '\n' + str(seqIdToSeq[seqId]) + '\n'))
        else:
            removed += 1

    out.close()
    print mg, 'removeSequences', removed 
Example #13
Source File: analysis_mg.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def parse(self, line):
        lineArray = line.split()
        if len(lineArray) != 2:
            print '_MothurOutFileParser: wrong line', line
            return
        name = re.sub(r'^([0-9]+_[0-9]+)_[0-9]+_[0-9]+_[pr]+[0-2]$',r'\1', lineArray[0])
        tag = re.sub(r'^[0-9]+_[0-9]+_([0-9]+_[0-9]+_[pr]+[0-2])$',r'\1', lineArray[0])
        placementList = lineArray[1].replace('unclassified;', '').rsplit(';')
        if len(placementList) < 2:
            #print '_MothurOutFileParser: skip line', line
            return

        placement = placementList[-2]
        try:
            clade = int(re.sub('([0-9]+)\(.*', r'\1' , placement))
        except ValueError:
            return
        weight = float(re.sub('[0-9]+\(([0-9\.]+)\)', r'\1' , placement))

        entry = str(str(name) + '\t' + str(clade) + '\t' + str(weight) + '\t' + str(self.source) + '\t' + str(tag))
        if self.outBuffer.isEmpty():
            self.outBuffer.writeText(entry)
        else:
            self.outBuffer.writeText(str('\n' + entry)) 
Example #14
Source File: analysis_mg.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def parse(self, line):
        if line.strip() == '':
            return

        if re.match(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$', line):
            scaffoldId = int(re.sub(r'^([0-9]+)_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line))
            contigId = int(re.sub(r'^[0-9]+_([0-9]+)\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line))
            ncbid = int(re.sub(r'^[0-9]+_[0-9]+\t([0-9]+)\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line))
            weight = float(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t([0-9\.]+)\t[^\t]+\t[^\t]+$',r'\1' ,line))
            source = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t([^\t]+)\t[^\t]+$',r'\1' ,line))
            tag = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t([^\t]+)$',r'\1' ,line))

        if ncbid != 1:
            taxPathDict = self.taxonomy.getPathToRoot(ncbid)
            if taxPathDict is not None and taxPathDict.keys() >= 1:
                self.sequences.setCandidateTaxonomyPath(contigId, scaffoldId, taxPathDict, weight, source, tag)
                self.assignedIdList.append(contigId)
            else:
                sys.stderr.write(str('No taxonomic path found for ncbid: ' + str(ncbid))) 
Example #15
Source File: cluster.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def __init__(self, line):
        tokens = line.split(',')
        self._threshold = float(re.sub(r'^([^\t]+)\t[^\t]+\t.*', r'\1', tokens[0]))
        tokens[0] = re.sub(r'^[^\t]+\t[^\t]+\t(.*)', r'\1', tokens[0])
        self.groupIdCount = 0
        self.seqNameToGroupId = dict([])
        self.groupIdToSeqNameSet = dict([])
        for token in tokens:
            names = token.split('\t')
            self.groupIdToSeqNameSet[self.groupIdCount] = set([])
            for name in names:
                #print name
                if re.match(r'^[0-9]+_.*$', name):
                    seqName = re.sub(r'^([0-9]+_[0-9]+)_.*$',r'\1', name)
                    self.seqNameToGroupId[seqName] = self.groupIdCount
                    self.groupIdToSeqNameSet[self.groupIdCount].add(seqName)
            self.groupIdCount += 1 
Example #16
Source File: DataLoader_NER.py    From pytorch_NER_BiLSTM_CNN_CRF with Apache License 2.0 6 votes vote down vote up
def _clean_str(string):
        """
        Tokenization/string cleaning for all datasets except for SST.
        Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
        """
        string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
        string = re.sub(r"\'s", " \'s", string)
        string = re.sub(r"\'ve", " \'ve", string)
        string = re.sub(r"n\'t", " n\'t", string)
        string = re.sub(r"\'re", " \'re", string)
        string = re.sub(r"\'d", " \'d", string)
        string = re.sub(r"\'ll", " \'ll", string)
        string = re.sub(r",", " , ", string)
        string = re.sub(r"!", " ! ", string)
        string = re.sub(r"\(", " \( ", string)
        string = re.sub(r"\)", " \) ", string)
        string = re.sub(r"\?", " \? ", string)
        string = re.sub(r"\s{2,}", " ", string)
        return string.strip().lower() 
Example #17
Source File: data_helpers.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower() 
Example #18
Source File: data_helpers.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower() 
Example #19
Source File: lint.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def get_header_guard_dmlc(filename):
    """Get Header Guard Convention for DMLC Projects.

    For headers in include, directly use the path
    For headers in src, use project name plus path

    Examples: with project-name = dmlc
        include/dmlc/timer.h -> DMLC_TIMTER_H_
        src/io/libsvm_parser.h -> DMLC_IO_LIBSVM_PARSER_H_
    """
    fileinfo = cpplint.FileInfo(filename)
    file_path_from_root = fileinfo.RepositoryName()
    inc_list = ['include', 'api', 'wrapper']

    if file_path_from_root.find('src/') != -1 and _HELPER.project_name is not None:
        idx = file_path_from_root.find('src/')
        file_path_from_root = _HELPER.project_name +  file_path_from_root[idx + 3:]
    else:
        for spath in inc_list:
            prefix = spath + os.sep
            if file_path_from_root.startswith(prefix):
                file_path_from_root = re.sub('^' + prefix, '', file_path_from_root)
                break
    return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_' 
Example #20
Source File: symbol_doc.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def _build_doc(func_name,
               desc,
               arg_names,
               arg_types,
               arg_desc,
               key_var_num_args=None,
               ret_type=None):
    """Build docstring for symbolic functions."""
    param_str = _build_param_doc(arg_names, arg_types, arg_desc)
    if key_var_num_args:
        desc += '\nThis function support variable length of positional input.'
    doc_str = ('%s\n\n' +
               '%s\n' +
               'name : string, optional.\n' +
               '    Name of the resulting symbol.\n\n' +
               'Returns\n' +
               '-------\n' +
               'Symbol\n' +
               '    The result symbol.')
    doc_str = doc_str % (desc, param_str)
    extra_doc = "\n" + '\n'.join([x.__doc__ for x in type.__subclasses__(SymbolDoc)
                                  if x.__name__ == '%sDoc' % func_name])
    doc_str += _re.sub(_re.compile("    "), "", extra_doc)
    doc_str = _re.sub('NDArray-or-Symbol', 'Symbol', doc_str)
    return doc_str 
Example #21
Source File: lint.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def get_header_guard_dmlc(filename):
    """Get Header Guard Convention for DMLC Projects.
    For headers in include, directly use the path
    For headers in src, use project name plus path
    Examples: with project-name = dmlc
        include/dmlc/timer.h -> DMLC_TIMTER_H_
        src/io/libsvm_parser.h -> DMLC_IO_LIBSVM_PARSER_H_
    """
    fileinfo = cpplint.FileInfo(filename)
    file_path_from_root = fileinfo.RepositoryName()
    inc_list = ['include', 'api', 'wrapper']

    if file_path_from_root.find('src/') != -1 and _HELPER.project_name is not None:
        idx = file_path_from_root.find('src/')
        file_path_from_root = _HELPER.project_name +  file_path_from_root[idx + 3:]
    else:
        for spath in inc_list:
            prefix = spath + os.sep
            if file_path_from_root.startswith(prefix):
                file_path_from_root = re.sub('^' + prefix, '', file_path_from_root)
                break
    return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_' 
Example #22
Source File: straight_dope_test_utils.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def _override_epochs(notebook):
    """Overrides the number of epochs in the notebook to 1 epoch. Note this operation is idempotent.

    Args:
        notebook : string
            notebook name in folder/notebook format
    """
    notebook_path = os.path.join(*([NOTEBOOKS_DIR] + notebook.split('/'))) + ".ipynb"

    # Read the notebook and set epochs to num_epochs.
    with io.open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = f.read()

    # Set number of epochs to 1.
    modified_notebook = re.sub(EPOCHS_REGEX, 'epochs = 1', notebook)

    # Replace the original notebook with the modified one.
    with io.open(notebook_path, 'w', encoding='utf-8') as f:
        f.write(modified_notebook) 
Example #23
Source File: straight_dope_test_utils.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def _override_relative_paths(notebook):
    """Overrides the relative path for the data and image directories to point
    to the right places. This is required as we run the notebooks in a different
    directory hierarchy more suitable for testing.

    Args:
        notebook : string
            notebook name in folder/notebook format
    """
    notebook_path = os.path.join(*([NOTEBOOKS_DIR] + notebook.split('/'))) + ".ipynb"

    # Read the notebook and set epochs to num_epochs.
    with io.open(notebook_path, 'r', encoding='utf-8') as f:
        notebook = f.read()

    # Update the location for the data directory.
    modified_notebook = re.sub(RELATIVE_PATH_REGEX, NOTEBOOKS_DIR, notebook)

    # Replace the original notebook with the modified one.
    with io.open(notebook_path, 'w', encoding='utf-8') as f:
        f.write(modified_notebook) 
Example #24
Source File: test_supervisor_logging.py    From supervisor-logging with Apache License 2.0 5 votes vote down vote up
def strip_volatile(message):
    """
    Strip volatile parts (PID, datetime) from a logging message.
    """

    volatile = (
        (socket.gethostname(), 'HOST'),
        (r'\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{3}Z', 'DATE'),
    )

    for regexp, replacement in volatile:
        message = re.sub(regexp, replacement, message)

    return message 
Example #25
Source File: application_config.py    From clikit with MIT License 5 votes vote down vote up
def default_display_name(self):  # type: () -> Optional[str]
        if self._name is None:
            return

        return re.sub(r"[\s\-_]+", " ", self._name).title() 
Example #26
Source File: progress_indicator.py    From clikit with MIT License 5 votes vote down vote up
def _display(self):
        if self._io.is_quiet():
            return

        self._overwrite(
            re.sub(
                r"(?i){([a-z\-_]+)(?::([^}]+))?}", self._overwrite_callback, self._fmt
            )
        ) 
Example #27
Source File: paragraph.py    From clikit with MIT License 5 votes vote down vote up
def render(self, io, indentation=0):  # type: (IO, int) -> None
        line_prefix = " " * indentation
        text_width = io.terminal_dimensions.width - 1 - indentation

        text = re.sub(
            r"\n(?!\n)",
            "\n" + line_prefix,
            "\n".join(textwrap.wrap(self._text, text_width)),
        )

        io.write(line_prefix + text.rstrip() + "\n") 
Example #28
Source File: labeled_paragraph.py    From clikit with MIT License 5 votes vote down vote up
def render(self, io, indentation=0):  # type: (IO, int) -> None
        line_prefix = " " * indentation
        visible_label = io.remove_format(self._label)
        style_tag_length = len(self._label) - len(visible_label)

        if self._aligned and self._alignment:
            text_offset = self._alignment.text_offset - indentation
        else:
            text_offset = 0

        text_offset = max(text_offset, len(visible_label) + self._padding)
        text_prefix = " " * text_offset

        # 1 trailing space
        text_width = io.terminal_dimensions.width - 1 - text_offset - indentation
        text = re.sub(
            r"\n(?!\n)",
            "\n" + line_prefix + text_prefix,
            "\n".join(textwrap.wrap(self._text, text_width)),
        )

        # Add the total length of the style tags ("<b>", ...)
        label_width = text_offset + style_tag_length

        io.write(
            "{}{:<{}}{}".format(
                line_prefix, self._label, label_width, text.rstrip()
            ).rstrip()
            + "\n"
        ) 
Example #29
Source File: printing.py    From aegea with Apache License 2.0 5 votes vote down vote up
def strip_ansi_codes(i):
    return re.sub(ansi_pattern, "", i) 
Example #30
Source File: utils.py    From Att-ChemdNER with Apache License 2.0 5 votes vote down vote up
def zero_digits(s):
#{{{
    """
    Replace every digit in a string by a zero.
    """
    return re.sub('\d', '0', s)
#}}}