Python re.match() Examples
The following are 30
code examples of re.match().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.
Example #1
Source File: tokenizer_udpipe_mod.py From Turku-neural-parser-pipeline with Apache License 2.0 | 8 votes |
def parse_text(self,txt): err=udpipe.ProcessingError() tokenized="" current_block=[] for line in txt.split("\n"): if re.match(comment_regex, line.lstrip()): # comment line if current_block: tokenized+=self.pipeline.process("\n".join(current_block),err) current_block=[] tokenized+=re.sub(comment_regex, "# ", line.lstrip()+"\n") continue # normal text line, save to current block to be tokenized current_block.append(line) if current_block: tokenized+=self.pipeline.process("\n".join(current_block),err) return tokenized
Example #2
Source File: avclass_common.py From BASS with GNU General Public License v2.0 | 6 votes |
def __remove_suffixes(av_name, label): '''Remove AV specific suffixes from given label Returns updated label''' # Truncate after last '.' if av_name in set(['Norman', 'Avast', 'Avira', 'McAffee-GW-Edition', 'McAffee', 'Kaspersky', 'ESET-NOD32', 'Fortinet', 'Jiangmin', 'Comodo', 'GData', 'Avast', 'Sophos', 'TrendMicro-HouseCall', 'TrendMicro', 'NANO-Antivirus', 'Microsoft']): label = label.rsplit('.', 1)[0] # Truncate after last '.' # if suffix only contains digits or uppercase (no lowercase) chars if av_name == 'AVG': tokens = label.rsplit('.', 1) if len(tokens) > 1 and re.match("^[A-Z0-9]+$", tokens[1]): label = tokens[0] # Truncate after last '!' if av_name == 'Agnitum': label = label.rsplit('!', 1)[0] return label
Example #3
Source File: cluster.py From CAMISIM with Apache License 2.0 | 6 votes |
def __init__(self, line): tokens = line.split(',') self._threshold = float(re.sub(r'^([^\t]+)\t[^\t]+\t.*', r'\1', tokens[0])) tokens[0] = re.sub(r'^[^\t]+\t[^\t]+\t(.*)', r'\1', tokens[0]) self.groupIdCount = 0 self.seqNameToGroupId = dict([]) self.groupIdToSeqNameSet = dict([]) for token in tokens: names = token.split('\t') self.groupIdToSeqNameSet[self.groupIdCount] = set([]) for name in names: #print name if re.match(r'^[0-9]+_.*$', name): seqName = re.sub(r'^([0-9]+_[0-9]+)_.*$',r'\1', name) self.seqNameToGroupId[seqName] = self.groupIdCount self.groupIdToSeqNameSet[self.groupIdCount].add(seqName) self.groupIdCount += 1
Example #4
Source File: analysis_mg.py From CAMISIM with Apache License 2.0 | 6 votes |
def parse(self, line): if line.strip() == '': return if re.match(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$', line): scaffoldId = int(re.sub(r'^([0-9]+)_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line)) contigId = int(re.sub(r'^[0-9]+_([0-9]+)\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line)) ncbid = int(re.sub(r'^[0-9]+_[0-9]+\t([0-9]+)\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line)) weight = float(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t([0-9\.]+)\t[^\t]+\t[^\t]+$',r'\1' ,line)) source = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t([^\t]+)\t[^\t]+$',r'\1' ,line)) tag = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t([^\t]+)$',r'\1' ,line)) if ncbid != 1: taxPathDict = self.taxonomy.getPathToRoot(ncbid) if taxPathDict is not None and taxPathDict.keys() >= 1: self.sequences.setCandidateTaxonomyPath(contigId, scaffoldId, taxPathDict, weight, source, tag) self.assignedIdList.append(contigId) else: sys.stderr.write(str('No taxonomic path found for ncbid: ' + str(ncbid)))
Example #5
Source File: output_mod.py From Turku-neural-parser-pipeline with Apache License 2.0 | 6 votes |
def launch(args,q_in,q_out): start=time.time() total_parsed_trees=0 total_parsed_tokens=0 next_report=start+10.0 #report every 10sec at most while True: jobid,txt=q_in.get() if jobid=="FINAL": print("Output exiting",file=sys.stderr,flush=True) return total_parsed_trees+=sum(1 for line in txt.split("\n") if line.startswith("1\t")) total_parsed_tokens+=sum(1 for line in txt.split("\n") if re.match(token_regex, line)) if total_parsed_trees>0 and time.time()>next_report: time_spent=time.time()-start print("Runtime: {}:{} [m:s] Parsed: {} [trees], {} [tokens] Speed: {} [trees/sec] {} [sec/tree] {} [tokens/sec]".format(int(time_spent)//60,int(time_spent)%60,total_parsed_trees,total_parsed_tokens, total_parsed_trees/time_spent,time_spent/total_parsed_trees, total_parsed_tokens/time_spent) ,file=sys.stderr,flush=True) next_report=time.time()+10 print(txt,end="",flush=True)
Example #6
Source File: abstract_option.py From clikit with MIT License | 6 votes |
def _validate_short_name( self, short_name, flags ): # type: (Optional[str], int) -> None if short_name is None: if flags & self.PREFER_SHORT_NAME: raise ValueError( "The short option name must be given if the option flag PREFER_SHORT_NAME is selected." ) return if not isinstance(short_name, basestring): raise ValueError( "The short option name must be a string. Got: {}".format( type(short_name) ) ) if not short_name: raise ValueError("The short option name must not be empty.") if not re.match(r"^[a-zA-Z]$", short_name): raise ValueError("The short option name must be exactly one letter.")
Example #7
Source File: qaData.py From QA with GNU General Public License v3.0 | 6 votes |
def sentenceToIndex(sentence, word2idx, maxLen): """ 将句子分词,并转换成embeddings列表的索引值 :param sentence: 句子 :param word2idx: 词语的索引 :param maxLen: 句子的最大长度 :return: 句子的词向量索引表示 """ unknown = word2idx.get("UNKNOWN", 0) num = word2idx.get("NUM", len(word2idx)) index = [unknown] * maxLen i = 0 for word in jieba.cut(sentence): if word in word2idx: index[i] = word2idx[word] else: if re.match("\d+", word): index[i] = num else: index[i] = unknown if i >= maxLen - 1: break i += 1 return index
Example #8
Source File: rp.py From pywarp with Apache License 2.0 | 6 votes |
def verify(self, authenticator_data, client_data_json, signature, user_handle, raw_id, email): "Ascertain the validity of credentials supplied by the client user agent via navigator.credentials.get()" email = email.decode() if not re.match(r"[^@]+@[^@]+\.[^@]+", email): raise Exception("Invalid email address") client_data_hash = hashlib.sha256(client_data_json).digest() client_data = json.loads(client_data_json) assert client_data["type"] == "webauthn.get" expect_challenge = self.storage_backend.get_challenge_for_user(email=email, type="authentication") assert b64url_decode(client_data["challenge"]) == expect_challenge print("expect RP ID:", self.rp_id) if self.rp_id: assert "https://" + self.rp_id == client_data["origin"] # Verify that the value of C.origin matches the Relying Party's origin. # Verify that the RP ID hash in authData is indeed the SHA-256 hash of the RP ID expected by the RP. authenticator_data = AuthenticatorData(authenticator_data) assert authenticator_data.user_present credential = self.storage_backend.get_credential_by_email(email) credential.verify(signature, authenticator_data.raw_auth_data + client_data_hash) # signature counter check return {"verified": True}
Example #9
Source File: _cpreqbody.py From cherrypy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def process(self): """Execute the best-match processor for the given media type.""" proc = None ct = self.content_type.value try: proc = self.processors[ct] except KeyError: toptype = ct.split('/', 1)[0] try: proc = self.processors[toptype] except KeyError: pass if proc is None: self.default_proc() else: proc(self)
Example #10
Source File: encryption.py From everyclass-server with Mozilla Public License 2.0 | 6 votes |
def decrypt(data: str, encryption_key: str = None, resource_type: str = None): """ 解密资源标识符 :param data: 加密后的字符串 :param encryption_key: 可选的 key :param resource_type: 验证资源类型(student、teacher、klass、room) :return: 资源类型和资源ID """ if not encryption_key: encryption_key = get_config().RESOURCE_IDENTIFIER_ENCRYPTION_KEY data = _aes_decrypt(encryption_key, data) group = re.match(r'^(student|teacher|klass|room);([\s\S]+)$', data) # 通过正则校验确定数据的正确性 if group is None: raise ValueError('Decrypted data is invalid: %s' % data) else: if resource_type and group.group(1) != resource_type: raise ValueError('Resource type not correspond') return group.group(1), group.group(2)
Example #11
Source File: update_cfg_file.py From multibootusb with GNU General Public License v2.0 | 5 votes |
def extract_distroinfo_from_fname(self, which_dir, regex, distro_group, version_group): p = re.compile(regex, re.I) for fname in os.listdir(self.fullpath(which_dir)): m = p.match(fname) if m: return (m.group(distro_group), [int(x) for x in m.group(version_group).split('.')]) return None
Example #12
Source File: predicate.py From python-clean-architecture with MIT License | 5 votes |
def any(self, cond: t.Union[Predicate, t.Iterable]) -> Predicate: """ Checks if a condition is met by any element in a list, where a condition can also be a sequence (e.g. list). >>> var('f1').any(var('f2').exists()) Matches:: {'f1': [{'f2': 1}, {'f2': 0}]} >>> var('f1').any([1, 2, 3]) # Match f1 that contains any element from [1, 2, 3] Matches:: {'f1': [1, 2]} {'f1': [3, 4, 5]} :param cond: Either a Predicate that at least one element has to match or a list of which at least one element has to be contained in the tested element. - """ if callable(cond): def _cmp(value): return is_iterable(value) and any(cond(e) for e in value) else: def _cmp(value): return is_iterable(value) and any(e in cond for e in value) return self._build_predicate( lambda lhs, value: _cmp(lhs), Operation.ANY, (self._path, freeze(cond)) )
Example #13
Source File: predicate.py From python-clean-architecture with MIT License | 5 votes |
def search(self, regex: str) -> Predicate: """ Run a regex test against the value (only substring string has to match). >>> var('f1').search(r'^\\w+$') :param regex: The regular expression to use for matching """ return self._build_predicate( lambda lhs, value: bool(re.search(regex, lhs)), Operation.SEARCH, (self._path, regex) )
Example #14
Source File: predicate.py From python-clean-architecture with MIT License | 5 votes |
def matches(self, regex: str) -> Predicate: """ Run a regex test against a dict value (whole string has to match). >>> var('f1').matches(r'^\\w+$') :param regex: The regular expression to use for matching """ return self._build_predicate( lambda lhs, value: bool(re.match(regex, lhs)), Operation.MATCHES, (self._path, regex) )
Example #15
Source File: predicate.py From python-clean-architecture with MIT License | 5 votes |
def all(self, cond: t.Union[Predicate, t.Iterable]) -> Predicate: """ Checks if a condition is met by any element in a list, where a condition can also be a sequence (e.g. list). >>> var('f1').all(var('f2').exists()) Matches:: {'f1': [{'f2': 1}, {'f2': 1}]} >>> var('f1').all([1, 2, 3]) # Match f1 that contains any element from [1, 2, 3] Matches:: {'f1': [1, 2, 3, 4, 5]} :param cond: Either a Predicate that all elements have to match or a list which has to be contained in the tested element. """ if callable(cond): def _cmp(value): return is_iterable(value) and all(cond(e) for e in value) else: def _cmp(value): return is_iterable(value) and all(e in value for e in cond) return self._build_predicate( lambda lhs, value: _cmp(lhs), Operation.ALL, (self._path, freeze(cond)) ) # noinspection PyProtectedMember
Example #16
Source File: rp.py From pywarp with Apache License 2.0 | 5 votes |
def register(self, client_data_json, attestation_object, email): "Store the credential public key and related metadata on the server using the associated storage backend" authenticator_attestation_response = cbor2.loads(attestation_object) email = email.decode() if not re.match(r"[^@]+@[^@]+\.[^@]+", email): raise Exception("Invalid email address") client_data_hash = hashlib.sha256(client_data_json).digest() client_data = json.loads(client_data_json) assert client_data["type"] == "webauthn.create" print("client data", client_data) expect_challenge = self.storage_backend.get_challenge_for_user(email=email, type="registration") assert b64url_decode(client_data["challenge"]) == expect_challenge print("expect RP ID:", self.rp_id) if self.rp_id: assert "https://" + self.rp_id == client_data["origin"] # Verify that the value of C.origin matches the Relying Party's origin. # Verify that the RP ID hash in authData is indeed the SHA-256 hash of the RP ID expected by the RP. authenticator_data = AuthenticatorData(authenticator_attestation_response["authData"]) assert authenticator_data.user_present # If user verification is required for this registration, # verify that the User Verified bit of the flags in authData is set. assert authenticator_attestation_response["fmt"] == "fido-u2f" att_stmt = FIDOU2FAttestationStatement(authenticator_attestation_response['attStmt']) attestation = att_stmt.validate(authenticator_data, rp_id_hash=authenticator_data.rp_id_hash, client_data_hash=client_data_hash) credential = attestation.credential # TODO: ascertain user identity here self.storage_backend.save_credential_for_user(email=email, credential=credential) return {"registered": True} # https://www.w3.org/TR/webauthn/#verifying-assertion
Example #17
Source File: helper.py From cherrypy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def setup_client(): """Set up the WebCase classes to match the server's socket settings.""" webtest.WebCase.PORT = cherrypy.server.socket_port webtest.WebCase.HOST = cherrypy.server.socket_host if cherrypy.server.ssl_certificate: CPWebCase.scheme = 'https' # --------------------------- Spawning helpers --------------------------- #
Example #18
Source File: helper.py From cherrypy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def assertErrorPage(self, status, message=None, pattern=''): """Compare the response body with a built in error page. The function will optionally look for the regexp pattern, within the exception embedded in the error page.""" # This will never contain a traceback page = cherrypy._cperror.get_error_page(status, message=message) # First, test the response body without checking the traceback. # Stick a match-all group (.*) in to grab the traceback. def esc(text): return re.escape(ntob(text)) epage = re.escape(page) epage = epage.replace( esc('<pre id="traceback"></pre>'), esc('<pre id="traceback">') + b'(.*)' + esc('</pre>')) m = re.match(epage, self.body, re.DOTALL) if not m: self._handlewebError( 'Error page does not match; expected:\n' + page) return # Now test the pattern against the traceback if pattern is None: # Special-case None to mean that there should be *no* traceback. if m and m.group(1): self._handlewebError('Error page contains traceback') else: if (m is None) or ( not re.search(ntob(re.escape(pattern), self.encoding), m.group(1))): msg = 'Error page does not contain %s in traceback' self._handlewebError(msg % repr(pattern))
Example #19
Source File: fasta.py From CAMISIM with Apache License 2.0 | 5 votes |
def fastaFileToDictWholeNames(filePath): """ Reads a fasta file and returns mapping: seqName -> sequence the whole sequence name is used as seqName!!! (even if it contains space) """ seqIdToSeq = {} f = None try: f = open(os.path.normpath(filePath),'r') except Exception: print "Cannot open file:", filePath raise else: name = '' seq = '' for line in f: line = noNewLine(line) if re.match('>', line): if seq != '': assert name != '' seqIdToSeq[name] = seq seq = '' name = line.replace('>','') else: seq += line if seq != '': assert name != '' seqIdToSeq[name] = seq finally: if f is not None: f.close() return seqIdToSeq
Example #20
Source File: cptools.py From cherrypy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def referer(pattern, accept=True, accept_missing=False, error=403, message='Forbidden Referer header.', debug=False): """Raise HTTPError if Referer header does/does not match the given pattern. pattern A regular expression pattern to test against the Referer. accept If True, the Referer must match the pattern; if False, the Referer must NOT match the pattern. accept_missing If True, permit requests with no Referer header. error The HTTP error code to return to the client on failure. message A string to include in the response body on failure. """ try: ref = cherrypy.serving.request.headers['Referer'] match = bool(re.match(pattern, ref)) if debug: cherrypy.log('Referer %r matches %r' % (ref, pattern), 'TOOLS.REFERER') if accept == match: return except KeyError: if debug: cherrypy.log('No Referer header', 'TOOLS.REFERER') if accept_missing: return raise cherrypy.HTTPError(error, message)
Example #21
Source File: sequences.py From CAMISIM with Apache License 2.0 | 5 votes |
def replaceIdsWithNames(outputFileContigSubPattern, nameToIDsFile, targetFile, outFile): """ @deprecated: NOT IMPLEMENTED YET!!! replace ids with names @param nameToIdsFile: file that contains lines: contigName tab contigID @param targetFile: file that contain in the first column scaffoldID_contigID which will be replaced by its name @param outFile: file that contain the first column in the form scaffoldID_contigID with the name (that can be modified by substitution defined in the config file .. according to outputFileContigSubPattern) """ idToName = dir([]) assert False, 'NOT IMPLEMENTED YET' #try: # f = open(os.path.normpath(nameToIDsFile), 'r') # for line in f: # if re.match('^#', line): # continue # name = re.sub(outputFileContigSubPattern, r'\1' , noNewLine(re.sub(r'^([^ \t]+)\t[0-9]+$',r'\1', line))) # id = int(noNewLine(re.sub(r'^[^ \t]+\t([0-9]+)$',r'\1', line))) # idToName[id] = name #except Exception: # print "Cannot create a file or write to it:", outFile # raise #finally: # f.close() #now: go through the targetFile and for each line do: # extract contigID and the rest of the line ^[0-9]+_[0-9]+([^0-9].*)$ # write name + rest of the line + \n to the outFile !!!!!!!!!! #compare two sequences according to their length #def seqLenCmp(seq1, seq2): # return seq1.seqBp - seq2.seqBp
Example #22
Source File: _cpreqbody.py From cherrypy with BSD 3-Clause "New" or "Revised" License | 5 votes |
def process_multipart(entity): """Read all multipart parts into entity.parts.""" ib = '' if 'boundary' in entity.content_type.params: # http://tools.ietf.org/html/rfc2046#section-5.1.1 # "The grammar for parameters on the Content-type field is such that it # is often necessary to enclose the boundary parameter values in quotes # on the Content-type line" ib = entity.content_type.params['boundary'].strip('"') if not re.match('^[ -~]{0,200}[!-~]$', ib): raise ValueError('Invalid boundary in multipart form: %r' % (ib,)) ib = ('--' + ib).encode('ascii') # Find the first marker while True: b = entity.readline() if not b: return b = b.strip() if b == ib: break # Read all parts while True: part = entity.part_class.from_fp(entity.fp, ib) entity.parts.append(part) part.process() if part.fp.done: break
Example #23
Source File: pps.py From CAMISIM with Apache License 2.0 | 5 votes |
def toRealNames(config, sequences): """ Transforms a PPS file fileName.fas.PP.out that names sequences according to their ids to their real names. """ outIdsPPSFile = str(config.get('inputIdsFastaFile') + '.PP.out') outNamesPPSFile = outIdsPPSFile + '.n' #os.path.normpath print outNamesPPSFile try: fr = open(os.path.normpath(outIdsPPSFile),'r') fw = open(os.path.normpath(outNamesPPSFile),'w') except Exception: print "Cannot open one of the files:", outIdsPPSFile, "or", outNamesPPSFile raise else: for line in fr: if re.match(r'^[0-9]+_[0-9]+[^0-9].*$', line): id = re.sub(r'^[0-9]+_([0-9]+)[^0-9].*$',r'\1' , line) rest = re.sub(r'^[0-9]+_[0-9]+([^0-9].*)$',r'\1' , line) seq = sequences.getSequence(int(id)) fw.write(seq.name + rest) # seq.scaffold.name else: fw.write(line) finally: fr.close() fw.close()
Example #24
Source File: semester.py From everyclass-server with Mozilla Public License 2.0 | 5 votes |
def __init__(self, para): """ 构造函数,接收一个 tuple (2016,2017,2) 或者学期字符串"2016-2017-2" """ # Semester("2016-2017-2") if isinstance(para, str) and re.match(r'\d{4}-\d{4}-\d', para): self.year1 = int(para[0:4]) self.year2 = int(para[5:9]) self.sem = int(para[10]) # Semester("16-17-2") elif isinstance(para, str) and re.match(r'\d{2}-\d{2}-\d', para): self.year1 = int(para[0:2]) + 2000 self.year2 = int(para[3:5]) + 2000 self.sem = int(para[6]) # Semester((2016,2017,2)) elif isinstance(para, tuple): self.year1 = int(para[0]) self.year2 = int(para[1]) self.sem = int(para[2]) # illegal else: self.year1 = 2020 self.year2 = 2021 self.sem = 1
Example #25
Source File: twitter-export-image-fill.py From twitter-export-image-fill with The Unlicense | 5 votes |
def determine_image_or_video(medium, year_str, month_str, date, tweet, tweet_media_count): # Video if '/video/' in medium['expanded_url']: is_video = True separator = '-video' url = medium['expanded_url'] extension = '.mp4' # Animated GIF transcoded into a video elif 'tweet_video_thumb' in medium['media_url']: is_video = True separator = '-gif-video' id = re.match(r'(.*)tweet_video_thumb/(.*)\.', medium['media_url']).group(2) url = "https://video.twimg.com/tweet_video/%s.mp4" % id extension = os.path.splitext(url)[1] # Regular non-animated image else: is_video = False separator = '' url = medium['media_url_https'] extension = os.path.splitext(url)[1] # Download the original/best image size, rather than the default one url = url + ':orig' local_filename = 'data/js/tweets/%s_%s_media/%s-%s%s-%s%s%s' % \ (year_str, month_str, date, tweet['id'], separator, 'rt-' if is_retweet(tweet) else '', tweet_media_count, extension) return is_video, url, local_filename
Example #26
Source File: WinDBGTrace.py From ALF with Apache License 2.0 | 5 votes |
def get_hung_thread(): hung_thread = 0 cur_max = 0 for thread in get_thread_list(): m = re.match(r"\s+([0-9a-f]+):[0-9a-f]+\s+[0-9]+\sdays\s", thread) if not m: continue run_time = get_thread_run_time(thread) if run_time > cur_max: hung_thread = int(m.group(1)) cur_max = run_time return hung_thread
Example #27
Source File: __init__.py From ALF with Apache License 2.0 | 5 votes |
def _parse_regex(self, refs, line_no, stopchars=""): got = 0 sym = self._grmr.new_symbol("[regex]", line_no) sym.define_regex(line_no) for match in RE_REGEX.finditer(refs[got:]): if match.start(0) != got: break count = match.group("mod") got = match.end(0) if match.group("outer") == ".": cls = REGEX_ALPHABET else: cls = match.group("class") match = RE_REGEX_RANGE.search(cls) while match: start, _, end = match.group(0) cls = cls[:match.start(0)] + "".join(chr(c) for c in range(ord(start), ord(end)+1)) + cls[match.end(0):] match = RE_REGEX_RANGE.search(cls) if cls.startswith("^"): cls = "".join(set(REGEX_ALPHABET) - set(cls[1:])) if not count: count = (1, 1) elif count == "*": count = (0, 5) elif count == "+": count = (1, 5) else: assert count.startswith("{") and count.endswith("}") count = count[1:-1] try: count = int(count) count = (count, count) except ValueError: count = [int(c) for c in count.split(",")] sym.add_regex(cls, count[0], count[1], line_no) stopchars = " %s" % stopchars assert got == len(refs) or refs[got] in stopchars, "invalid end for regex: ('%s') (%d)" % (refs[got], line_no) return sym, refs[got:].lstrip()
Example #28
Source File: __init__.py From ALF with Apache License 2.0 | 5 votes |
def _parse_cfg(self, cfgstr): match = RE_CFG.match(cfgstr) if not match: return cfgs = [_f for _f in ((m.strip() for m in l.split("=")) for l in match.group(1).split(",")) if _f] for cfg, val in cfgs: if cfg in ("star-depth", "max-size", "max-depth"): setattr(self._grmr, cfg.replace("-", "_"), int(val)) else: raise RuntimeError("Unknown cfg item: %s" % cfg)
Example #29
Source File: pps.py From CAMISIM with Apache License 2.0 | 5 votes |
def readPPSOutput(sequences, taxonomy, inputFastaIdsPPSFile, overwriteAllPlacements=False): """ Reads the output file of PPS and for each sequence decides: if overwriteAllPlacements=True is, then the sequence is placed according to the PPS file regardless of its previous placement if overwriteAllPlacements=False then if a sequence is placed to a less specific rank, than PPS suggests then the sequence is placed according to the PPS file """ infile = str(inputFastaIdsPPSFile + '.out') try: f = open(os.path.normpath(infile),'r') except Exception: print "Cannot open file:", infile raise else: #i = 0 for line in f: line = common.noNewLine(line) if re.match(r'^[0-9]+_[0-9]+.*[^0-9]+[0-9]+[^0-9]*$', line): scaffoldId = int(re.sub(r'^([0-9]+)_[0-9]+.*[^0-9]+[0-9]+[^0-9]*$',r'\1' ,line)) contigId = int(re.sub(r'^[0-9]+_([0-9]+).*[^0-9]+[0-9]+[^0-9]*$',r'\1' ,line)) ncbid = int(re.sub(r'^[0-9]+_[0-9]+.*[^0-9]+([0-9]+)[^0-9]*$',r'\1' ,line)) weight = None # the weight is not yet defined !!! if ncbid != 1: #print line, ":", scaffoldId, contigId, ncbid taxPathDictPPS = taxonomy.getPathToRoot(ncbid) if taxPathDictPPS.keys() >= 1: taxPathDictCurrent = sequences.getSequence(contigId).getTaxonomyPath() if taxPathDictCurrent == None: sequences.setTaxonomyPath(contigId, scaffoldId, taxPathDictPPS, weight)#weight = None !!! #i += 1 else: if ((overwriteAllPlacements) or (taxPathDictPPS.keys() > taxPathDictCurrent.keys())): sequences.setTaxonomyPathOverride(contigId, scaffoldId, taxPathDictPPS, weight)#weight = None !!! #i += 1 #print "placed seq by PPS:", i finally: f.close()
Example #30
Source File: wsgi.py From MPContribs with MIT License | 5 votes |
def immutable_file_test(path, url): # Match filename with 20 hex digits before the extension return re.match(r"^.+\.[0-9a-f]{20}\..+$", url) or re.match( r"^.+[0-9a-zA-Z_]{3,31}\.jpg$", url )