Python re.match() Examples
The following are 30
code examples of re.match().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.

Example #1
Source File: tokenizer_udpipe_mod.py From Turku-neural-parser-pipeline with Apache License 2.0 | 8 votes |
def parse_text(self,txt): err=udpipe.ProcessingError() tokenized="" current_block=[] for line in txt.split("\n"): if re.match(comment_regex, line.lstrip()): # comment line if current_block: tokenized+=self.pipeline.process("\n".join(current_block),err) current_block=[] tokenized+=re.sub(comment_regex, "# ", line.lstrip()+"\n") continue # normal text line, save to current block to be tokenized current_block.append(line) if current_block: tokenized+=self.pipeline.process("\n".join(current_block),err) return tokenized
Example #2
Source File: abstract_option.py From clikit with MIT License | 6 votes |
def _validate_short_name( self, short_name, flags ): # type: (Optional[str], int) -> None if short_name is None: if flags & self.PREFER_SHORT_NAME: raise ValueError( "The short option name must be given if the option flag PREFER_SHORT_NAME is selected." ) return if not isinstance(short_name, basestring): raise ValueError( "The short option name must be a string. Got: {}".format( type(short_name) ) ) if not short_name: raise ValueError("The short option name must not be empty.") if not re.match(r"^[a-zA-Z]$", short_name): raise ValueError("The short option name must be exactly one letter.")
Example #3
Source File: avclass_common.py From BASS with GNU General Public License v2.0 | 6 votes |
def __remove_suffixes(av_name, label): '''Remove AV specific suffixes from given label Returns updated label''' # Truncate after last '.' if av_name in set(['Norman', 'Avast', 'Avira', 'McAffee-GW-Edition', 'McAffee', 'Kaspersky', 'ESET-NOD32', 'Fortinet', 'Jiangmin', 'Comodo', 'GData', 'Avast', 'Sophos', 'TrendMicro-HouseCall', 'TrendMicro', 'NANO-Antivirus', 'Microsoft']): label = label.rsplit('.', 1)[0] # Truncate after last '.' # if suffix only contains digits or uppercase (no lowercase) chars if av_name == 'AVG': tokens = label.rsplit('.', 1) if len(tokens) > 1 and re.match("^[A-Z0-9]+$", tokens[1]): label = tokens[0] # Truncate after last '!' if av_name == 'Agnitum': label = label.rsplit('!', 1)[0] return label
Example #4
Source File: encryption.py From everyclass-server with Mozilla Public License 2.0 | 6 votes |
def decrypt(data: str, encryption_key: str = None, resource_type: str = None): """ 解密资源标识符 :param data: 加密后的字符串 :param encryption_key: 可选的 key :param resource_type: 验证资源类型(student、teacher、klass、room) :return: 资源类型和资源ID """ if not encryption_key: encryption_key = get_config().RESOURCE_IDENTIFIER_ENCRYPTION_KEY data = _aes_decrypt(encryption_key, data) group = re.match(r'^(student|teacher|klass|room);([\s\S]+)$', data) # 通过正则校验确定数据的正确性 if group is None: raise ValueError('Decrypted data is invalid: %s' % data) else: if resource_type and group.group(1) != resource_type: raise ValueError('Resource type not correspond') return group.group(1), group.group(2)
Example #5
Source File: _cpreqbody.py From cherrypy with BSD 3-Clause "New" or "Revised" License | 6 votes |
def process(self): """Execute the best-match processor for the given media type.""" proc = None ct = self.content_type.value try: proc = self.processors[ct] except KeyError: toptype = ct.split('/', 1)[0] try: proc = self.processors[toptype] except KeyError: pass if proc is None: self.default_proc() else: proc(self)
Example #6
Source File: rp.py From pywarp with Apache License 2.0 | 6 votes |
def verify(self, authenticator_data, client_data_json, signature, user_handle, raw_id, email): "Ascertain the validity of credentials supplied by the client user agent via navigator.credentials.get()" email = email.decode() if not re.match(r"[^@]+@[^@]+\.[^@]+", email): raise Exception("Invalid email address") client_data_hash = hashlib.sha256(client_data_json).digest() client_data = json.loads(client_data_json) assert client_data["type"] == "webauthn.get" expect_challenge = self.storage_backend.get_challenge_for_user(email=email, type="authentication") assert b64url_decode(client_data["challenge"]) == expect_challenge print("expect RP ID:", self.rp_id) if self.rp_id: assert "https://" + self.rp_id == client_data["origin"] # Verify that the value of C.origin matches the Relying Party's origin. # Verify that the RP ID hash in authData is indeed the SHA-256 hash of the RP ID expected by the RP. authenticator_data = AuthenticatorData(authenticator_data) assert authenticator_data.user_present credential = self.storage_backend.get_credential_by_email(email) credential.verify(signature, authenticator_data.raw_auth_data + client_data_hash) # signature counter check return {"verified": True}
Example #7
Source File: output_mod.py From Turku-neural-parser-pipeline with Apache License 2.0 | 6 votes |
def launch(args,q_in,q_out): start=time.time() total_parsed_trees=0 total_parsed_tokens=0 next_report=start+10.0 #report every 10sec at most while True: jobid,txt=q_in.get() if jobid=="FINAL": print("Output exiting",file=sys.stderr,flush=True) return total_parsed_trees+=sum(1 for line in txt.split("\n") if line.startswith("1\t")) total_parsed_tokens+=sum(1 for line in txt.split("\n") if re.match(token_regex, line)) if total_parsed_trees>0 and time.time()>next_report: time_spent=time.time()-start print("Runtime: {}:{} [m:s] Parsed: {} [trees], {} [tokens] Speed: {} [trees/sec] {} [sec/tree] {} [tokens/sec]".format(int(time_spent)//60,int(time_spent)%60,total_parsed_trees,total_parsed_tokens, total_parsed_trees/time_spent,time_spent/total_parsed_trees, total_parsed_tokens/time_spent) ,file=sys.stderr,flush=True) next_report=time.time()+10 print(txt,end="",flush=True)
Example #8
Source File: qaData.py From QA with GNU General Public License v3.0 | 6 votes |
def sentenceToIndex(sentence, word2idx, maxLen): """ 将句子分词,并转换成embeddings列表的索引值 :param sentence: 句子 :param word2idx: 词语的索引 :param maxLen: 句子的最大长度 :return: 句子的词向量索引表示 """ unknown = word2idx.get("UNKNOWN", 0) num = word2idx.get("NUM", len(word2idx)) index = [unknown] * maxLen i = 0 for word in jieba.cut(sentence): if word in word2idx: index[i] = word2idx[word] else: if re.match("\d+", word): index[i] = num else: index[i] = unknown if i >= maxLen - 1: break i += 1 return index
Example #9
Source File: analysis_mg.py From CAMISIM with Apache License 2.0 | 6 votes |
def parse(self, line): if line.strip() == '': return if re.match(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$', line): scaffoldId = int(re.sub(r'^([0-9]+)_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line)) contigId = int(re.sub(r'^[0-9]+_([0-9]+)\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line)) ncbid = int(re.sub(r'^[0-9]+_[0-9]+\t([0-9]+)\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line)) weight = float(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t([0-9\.]+)\t[^\t]+\t[^\t]+$',r'\1' ,line)) source = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t([^\t]+)\t[^\t]+$',r'\1' ,line)) tag = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t([^\t]+)$',r'\1' ,line)) if ncbid != 1: taxPathDict = self.taxonomy.getPathToRoot(ncbid) if taxPathDict is not None and taxPathDict.keys() >= 1: self.sequences.setCandidateTaxonomyPath(contigId, scaffoldId, taxPathDict, weight, source, tag) self.assignedIdList.append(contigId) else: sys.stderr.write(str('No taxonomic path found for ncbid: ' + str(ncbid)))
Example #10
Source File: cluster.py From CAMISIM with Apache License 2.0 | 6 votes |
def __init__(self, line): tokens = line.split(',') self._threshold = float(re.sub(r'^([^\t]+)\t[^\t]+\t.*', r'\1', tokens[0])) tokens[0] = re.sub(r'^[^\t]+\t[^\t]+\t(.*)', r'\1', tokens[0]) self.groupIdCount = 0 self.seqNameToGroupId = dict([]) self.groupIdToSeqNameSet = dict([]) for token in tokens: names = token.split('\t') self.groupIdToSeqNameSet[self.groupIdCount] = set([]) for name in names: #print name if re.match(r'^[0-9]+_.*$', name): seqName = re.sub(r'^([0-9]+_[0-9]+)_.*$',r'\1', name) self.seqNameToGroupId[seqName] = self.groupIdCount self.groupIdToSeqNameSet[self.groupIdCount].add(seqName) self.groupIdCount += 1
Example #11
Source File: optimization.py From BERT-Classification-Tutorial with Apache License 2.0 | 5 votes |
def _get_variable_name(self, param_name): """Get the variable name from the tensor name.""" m = re.match("^(.*):\\d+$", param_name) if m is not None: param_name = m.group(1) return param_name
Example #12
Source File: modeling.py From BERT-Classification-Tutorial with Apache License 2.0 | 5 votes |
def get_assignment_map_from_checkpoint(tvars, init_checkpoint): """Compute the union of the current variables and checkpoint variables.""" assignment_map = {} initialized_variable_names = {} name_to_variable = collections.OrderedDict() for var in tvars: name = var.name m = re.match("^(.*):\\d+$", name) if m is not None: name = m.group(1) name_to_variable[name] = var init_vars = tf.train.list_variables(init_checkpoint) assignment_map = collections.OrderedDict() for x in init_vars: (name, var) = (x[0], x[1]) if name not in name_to_variable: continue assignment_map[name] = name initialized_variable_names[name] = 1 initialized_variable_names[name + ":0"] = 1 return (assignment_map, initialized_variable_names)
Example #13
Source File: modeling.py From BERT-Classification-Tutorial with Apache License 2.0 | 5 votes |
def assert_rank(tensor, expected_rank, name=None): """Raises an exception if the tensor rank is not of the expected rank. Args: tensor: A tf.Tensor to check the rank of. expected_rank: Python integer or list of integers, expected rank. name: Optional name of the tensor for the error message. Raises: ValueError: If the expected shape doesn't match the actual shape. """ if name is None: name = tensor.name expected_rank_dict = {} if isinstance(expected_rank, six.integer_types): expected_rank_dict[expected_rank] = True else: for x in expected_rank: expected_rank_dict[x] = True actual_rank = tensor.shape.ndims if actual_rank not in expected_rank_dict: scope_name = tf.get_variable_scope().name raise ValueError( "For the tensor `%s` in scope `%s`, the actual rank " "`%d` (shape = %s) is not equal to the expected rank `%s`" % (name, scope_name, actual_rank, str(tensor.shape), str(expected_rank)))
Example #14
Source File: gff.py From svviz with MIT License | 5 votes |
def getAnnotations(self, chrom, start, end, clip=False, extension=1000000): chrom = self.fixChromFormat(chrom) lines = self.tabix.fetch(chrom, max(0, start-extension), end+extension) transcriptsToLines = collections.defaultdict(list) for i, line in enumerate(lines): if len(line) < 2: continue try: tx = re.match(RE_TRANSCRIPT, line).group(1) except AttributeError: tx = "anno{}".format(i) transcriptsToLines[tx].append(line) genes = [] for transcript, lines in transcriptsToLines.items(): genes.append(GTFGene(lines)) if extension > 0: genes = [gene for gene in genes if not (end<gene.start or start>gene.end)]#start<=gene.start<=end or start<=gene.end<=end)] if clip: for gene in genes: gene.clip(start, end) return genes
Example #15
Source File: command_option.py From clikit with MIT License | 5 votes |
def _validate_long_alias(self, alias): # type: (str) -> None if not alias[:1].isalpha(): raise ValueError("A long option alias must start with a letter.") if not re.match("^[a-zA-Z0-9\-]+$", alias): raise ValueError( "A long option alias must contain letters, digits and hyphens only." )
Example #16
Source File: command_option.py From clikit with MIT License | 5 votes |
def _validate_short_alias(self, alias): # type: (str) -> None if not re.match("^[a-zA-Z]$", alias): raise ValueError( 'A short option alias must be exactly one letter. Got: "{}"'.format( alias ) )
Example #17
Source File: abstract_option.py From clikit with MIT License | 5 votes |
def _validate_long_name(self, long_name): # type: (Optional[str]) -> None if long_name is None: raise ValueError("The long option name must not be null.") if not isinstance(long_name, basestring): raise ValueError( "The long option name must be a string. Got: {}".format(type(long_name)) ) if not long_name: raise ValueError("The long option name must not be empty.") if len(long_name) < 2: raise ValueError( 'The long option name must contain more than one character. Got: "{}"'.format( len(long_name) ) ) if not long_name[:1].isalpha(): raise ValueError("The long option name must start with a letter") if not re.match(r"^[a-zA-Z0-9\-]+$", long_name): raise ValueError( "The long option name must contain letters, digits and hyphens only." )
Example #18
Source File: test_exception_trace.py From clikit with MIT License | 5 votes |
def test_render_debug_better_error_message(): io = BufferedIO() io.set_verbosity(DEBUG) try: fail() except Exception as e: # Exception trace = ExceptionTrace(e) trace.render(io) expected = r"""^ Stack trace: 1 {}:112 in test_render_debug_better_error_message 110\│ 111\│ try: → 112\│ fail\(\) 113\│ except Exception as e: # Exception 114\│ trace = ExceptionTrace\(e\) Exception Failed at {}:14 in fail 10\│ from clikit.utils._compat import PY38 11\│ 12\│ 13\│ def fail\(\): → 14\│ raise Exception\("Failed"\) 15\│ 16\│ 17\│ @pytest.mark.skipif\(PY36, reason="Legacy error messages are Python <3.6 only"\) 18\│ def test_render_legacy_error_message\(\): """.format( re.escape(trace._get_relative_file_path(__file__)), re.escape(trace._get_relative_file_path(__file__)), ) assert re.match(expected, io.fetch_output()) is not None
Example #19
Source File: test_exception_trace.py From clikit with MIT License | 5 votes |
def test_render_verbose_better_error_message(): io = BufferedIO() io.set_verbosity(VERBOSE) try: fail() except Exception as e: # Exception trace = ExceptionTrace(e) trace.render(io) expected = r"""^ Stack trace: 1 {}:218 in test_render_verbose_better_error_message fail\(\) Exception Failed at {}:14 in fail 10\│ from clikit.utils._compat import PY38 11\│ 12\│ 13\│ def fail\(\): → 14\│ raise Exception\("Failed"\) 15\│ 16\│ 17\│ @pytest.mark.skipif\(PY36, reason="Legacy error messages are Python <3.6 only"\) 18\│ def test_render_legacy_error_message\(\): """.format( re.escape(trace._get_relative_file_path(__file__)), re.escape(trace._get_relative_file_path(__file__)), ) assert re.match(expected, io.fetch_output()) is not None
Example #20
Source File: test_display.py From vergeml with MIT License | 5 votes |
def test_progress(): buffer = BufferOutput() progress = ProgressBar(range(100), file=buffer) progress.start() progress.update(1) time.sleep(0.001) progress.update(2) assert re.match(r' 3%\|█▏ \| 3/100 \[[0-9]+\.[0-9][0-9] it/sec\]', buffer.getvalue())
Example #21
Source File: __main__.py From vergeml with MIT License | 5 votes |
def _parsebase(argv): """Parse until the second part of the command. """ shortopts = 'vf:m:' # version, file, model longopts = ['version', 'file=', 'model=', 'samples-dir=', 'test-split=', 'val-split=', 'cache-dir=', 'random-seed=', 'trainings-dir=', 'project-dir=', 'cache=', 'device=', 'device-memory='] args, rest = getopt.getopt(argv, shortopts, longopts) args = dict(args) # don't match prefix for opt in map(lambda s: s.rstrip("="), longopts): # pylint: disable=W0640 if ''f'--{opt}' in args and not any(map(lambda a: a.startswith('--' + opt), argv)): # find the key that does not match keys = map(lambda a: a.split("=")[0].lstrip("-"), argv) keys = list(filter(lambda k: k in opt, keys)) if keys: raise getopt.GetoptError('Invalid key', opt='--' + keys[0]) else: raise getopt.GetoptError('Invalid key') # convert from short to long names for sht, lng in (('-v', '--version'), ('-m', '--model'), ('-f', '--file')): if sht in args: args[lng] = args[sht] del args[sht] args = {k.strip('-'):v for k, v in args.items()} return args, rest
Example #22
Source File: utils.py From vergeml with MIT License | 5 votes |
def parse_trained_models(argv): """Parse @syntax for specifying trained models on the command line. """ names = [] for part in argv: if re.match("^@[a-zA-Z0-9_-]+$", part): names.append(part[1:]) else: break rest = argv[len(names):] return names, rest
Example #23
Source File: option.py From vergeml with MIT License | 5 votes |
def validate_value(self, value): if not self.validate: return if not self.is_required() and value in (None, 'null', 'Null', 'NULL'): return if isinstance(self.validate, (tuple, list)) and value not in self.validate: suggestion = None if all(map(lambda e: isinstance(e, str), self.validate)): suggestion = did_you_mean(self.validate, value) raise self._invalid_value(value, suggestion) elif callable(self.validate): self.validate(self, value) elif isinstance(self.validate, str): for validate in self.validate.split(","): validate = validate.strip() try: value = float(value) except ValueError: raise self._invalid_value(value) op, num_str = re.match(_VALIDATE_REGEX, validate).group(1,2) num = float(num_str) if op == '>': if not value > num: raise self._invalid_value(value, f"Must be greater than {num_str}") elif op == '<': if not value < num: raise self._invalid_value(value, f"Must be less than {num_str}") if op == '>=': if not value >= num: raise self._invalid_value(value, f"Must be greater or equal to {num_str}") elif op == '<=': if not value <= num: raise self._invalid_value(value, f"Must be less than or equal to {num_str}")
Example #24
Source File: config.py From vergeml with MIT License | 5 votes |
def _parse_device_id(res, section): """Parse the id option in the device section. """ if 'id' in section: value = section['id'].strip() if not re.match(r"^(gpu:[0-9]+|gpu|cpu|auto)", value): raise _invalid_option('device.id', 'device') if value == 'gpu': value = 'gpu:0' res['id'] = value
Example #25
Source File: config.py From vergeml with MIT License | 5 votes |
def _parse_device_memory(res, section): """Parse the memory option in the device section. """ if 'memory' in section: value = section['memory'].strip() if isinstance(value, float): if value < 0. or value > 1.: raise _invalid_option('device.memory', 'device') res['memory'] = value if value != 'auto': if not re.match(r'^[0-9]+(\.[0-9]*)?%$', value): raise _invalid_option('device.memory', 'device') try: value = float(value.rstrip('%')) except ValueError: raise _invalid_option('device.memory', 'device') if value < 0. or value > 100.: raise _invalid_option('device.memory', 'device') res['memory'] = value/100
Example #26
Source File: test_app.py From hydrus with MIT License | 5 votes |
def test_object_POST(self): """Test replace of a given object using ID.""" index = self.client.get("/{}".format(self.API_NAME)) assert index.status_code == 200 endpoints = json.loads(index.data.decode('utf-8')) for endpoint in endpoints: collection_name = "/".join(endpoints[endpoint].split( "/{}/".format(self.API_NAME))[1:]) if collection_name in self.doc.collections: collection = self.doc.collections[collection_name]["collection"] class_ = self.doc.parsed_classes[collection.class_.title]["class"] class_methods = [x.method for x in class_.supportedOperation] dummy_object = gen_dummy_object( collection.class_.title, self.doc) initial_put_response = self.client.put( endpoints[endpoint], data=json.dumps(dummy_object)) assert initial_put_response.status_code == 201 response = json.loads( initial_put_response.data.decode('utf-8')) regex = r'(.*)ID (.{36})* (.*)' matchObj = re.match(regex, response["description"]) assert matchObj is not None id_ = matchObj.group(2) if "POST" in class_methods: dummy_object = gen_dummy_object( collection.class_.title, self.doc) post_replace_response = self.client.post( '{}/{}'.format(endpoints[endpoint], id_), data=json.dumps(dummy_object)) assert post_replace_response.status_code == 200
Example #27
Source File: test_app.py From hydrus with MIT License | 5 votes |
def test_object_DELETE(self): """Test DELETE of a given object using ID.""" index = self.client.get("/{}".format(self.API_NAME)) assert index.status_code == 200 endpoints = json.loads(index.data.decode('utf-8')) for endpoint in endpoints: collection_name = "/".join(endpoints[endpoint].split( "/{}/".format(self.API_NAME))[1:]) if collection_name in self.doc.collections: collection = self.doc.collections[collection_name]["collection"] class_ = self.doc.parsed_classes[collection.class_.title]["class"] class_methods = [x.method for x in class_.supportedOperation] dummy_object = gen_dummy_object( collection.class_.title, self.doc) initial_put_response = self.client.put( endpoints[endpoint], data=json.dumps(dummy_object)) assert initial_put_response.status_code == 201 response = json.loads( initial_put_response.data.decode('utf-8')) regex = r'(.*)ID (.{36})* (.*)' matchObj = re.match(regex, response["description"]) assert matchObj is not None id_ = matchObj.group(2) if "DELETE" in class_methods: delete_response = self.client.delete( '{}/{}'.format(endpoints[endpoint], id_)) assert delete_response.status_code == 200
Example #28
Source File: test_app.py From hydrus with MIT License | 5 votes |
def test_bad_requests(self): """Checks if bad requests are handled or not.""" index = self.client.get("/{}".format(self.API_NAME)) assert index.status_code == 200 endpoints = json.loads(index.data.decode('utf-8')) for endpoint in endpoints: collection_name = "/".join(endpoints[endpoint].split( "/{}/".format(self.API_NAME))[1:]) if collection_name in self.doc.collections: collection = self.doc.collections[collection_name]["collection"] class_ = self.doc.parsed_classes[collection.class_.title]["class"] class_methods = [x.method for x in class_.supportedOperation] dummy_object = gen_dummy_object( collection.class_.title, self.doc) initial_put_response = self.client.put( endpoints[endpoint], data=json.dumps(dummy_object)) assert initial_put_response.status_code == 201 response = json.loads( initial_put_response.data.decode('utf-8')) regex = r'(.*)ID (.{36})* (.*)' matchObj = re.match(regex, response["description"]) assert matchObj is not None id_ = matchObj.group(2) if "POST" not in class_methods: dummy_object = gen_dummy_object( collection.class_.title, self.doc) post_replace_response = self.client.post( '{}/{}'.format(endpoints[endpoint], id_), data=json.dumps(dummy_object)) assert post_replace_response.status_code == 405 if "DELETE" not in class_methods: delete_response = self.client.delete( '{}/{}'.format(endpoints[endpoint], id_)) assert delete_response.status_code == 405
Example #29
Source File: setup.py From EDeN with MIT License | 5 votes |
def get_version(): try: f = open("eden/_version.py") except EnvironmentError: return None for line in f.readlines(): mo = re.match("__version__ = '([^']+)'", line) if mo: ver = mo.group(1) return ver return None
Example #30
Source File: admin.py From cyberdisc-bot with MIT License | 5 votes |
def check_bad_name(nick): for i in NICKNAME_PATTERNS: if re.match(i, nick, re.IGNORECASE): return True return False