Python re.split() Examples
The following are 30
code examples of re.split().
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example.
You may also want to check out all available functions/classes of the module
re
, or try the search function
.

Example #1
Source File: __init__.py From aegea with Apache License 2.0 | 8 votes |
def validate_hostname(hostname): if len(hostname) > 255: raise Exception("Hostname {} is longer than 255 characters".format(hostname)) if hostname[-1] == ".": hostname = hostname[:-1] allowed = re.compile(r"(?!-)[A-Z\d-]{1,63}(?<!-)$", re.IGNORECASE) if not all(allowed.match(x) for x in hostname.split(".")): raise Exception("Hostname {} is not RFC 1123 compliant".format(hostname))
Example #2
Source File: gitgot.py From GitGot with GNU Lesser General Public License v3.0 | 7 votes |
def gist_fetch(query, page_idx, total_items=1000): gist_url = "https://gist.github.com/search?utf8=%E2%9C%93&q={}&p={}" query = urllib.parse.quote(query) gists = [] try: resp = requests.get(gist_url.format(query, page_idx)) soup = bs4.BeautifulSoup(resp.text, 'html.parser') total_items = min(total_items, int( [x.text.split()[0] for x in soup.find_all('h3') if "gist results" in x.text][0].replace(',', ''))) gists = [x.get("href") for x in soup.findAll( "a", class_="link-overlay")] except IndexError: return {"data": None, "total_items": 0} return {"data": gists, "total_items": total_items}
Example #3
Source File: setup.py From mmdetection with Apache License 2.0 | 6 votes |
def write_version_py(): content = """# GENERATED VERSION FILE # TIME: {} __version__ = '{}' short_version = '{}' version_info = ({}) """ sha = get_hash() with open('mmdet/VERSION', 'r') as f: SHORT_VERSION = f.read().strip() VERSION_INFO = ', '.join(SHORT_VERSION.split('.')) VERSION = SHORT_VERSION + '+' + sha version_file_str = content.format(time.asctime(), VERSION, SHORT_VERSION, VERSION_INFO) with open(version_file, 'w') as f: f.write(version_file_str)
Example #4
Source File: setup.py From mmdetection with Apache License 2.0 | 6 votes |
def make_cuda_ext(name, module, sources, sources_cuda=[]): define_macros = [] extra_compile_args = {'cxx': []} if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1': define_macros += [('WITH_CUDA', None)] extension = CUDAExtension extra_compile_args['nvcc'] = [ '-D__CUDA_NO_HALF_OPERATORS__', '-D__CUDA_NO_HALF_CONVERSIONS__', '-D__CUDA_NO_HALF2_OPERATORS__', ] sources += sources_cuda else: print(f'Compiling {name} without CUDA') extension = CppExtension # raise EnvironmentError('CUDA is required to compile MMDetection!') return extension( name=f'{module}.{name}', sources=[os.path.join(*module.split('.'), p) for p in sources], define_macros=define_macros, extra_compile_args=extra_compile_args)
Example #5
Source File: flakiness_checker.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 6 votes |
def find_test_path(test_file): """Searches for the test file and returns the path if found As a default, the currend working directory is the top of the search. If a directory was provided as part of the argument, the directory will be joined with cwd unless it was an absolute path, in which case, the absolute path will be used instead. """ test_file += ".py" test_path = os.path.split(test_file) top = os.path.join(os.getcwd(), test_path[0]) for (path, dirs, files) in os.walk(top): if test_path[1] in files: return os.path.join(path, test_path[1]) raise FileNotFoundError("Could not find " + test_path[1] + "in directory: " + top)
Example #6
Source File: wmt_utils.py From DOTA_models with Apache License 2.0 | 6 votes |
def basic_tokenizer(sentence): """Very basic tokenizer: split the sentence into a list of tokens.""" words = [] if old_style: for space_separated_fragment in sentence.strip().split(): words.extend(re.split(_OLD_WORD_SPLIT, space_separated_fragment)) return [w for w in words if w] for space_separated_fragment in sentence.strip().split(): tokens = [t for t in re.split(_WORD_SPLIT, space_separated_fragment) if t] first_is_char = False for i, t in enumerate(tokens): if len(t) == 1 and t in _PUNCTUATION: tokens[i] = _CHAR_MARKER + t if i == 0: first_is_char = True if words and words[-1] != _SPACE and (first_is_char or is_char(words[-1])): tokens = [_SPACE] + tokens spaced_tokens = [] for i, tok in enumerate(tokens): spaced_tokens.append(tokens[i]) if i < len(tokens) - 1: if tok != _SPACE and not (is_char(tok) or is_char(tokens[i+1])): spaced_tokens.append(_SPACE) words.extend(spaced_tokens) return words
Example #7
Source File: reportMetrics.py From InsightAgent with Apache License 2.0 | 6 votes |
def extract_fields_db2(obj, line, field_name_regex): line = '#'.join(re.split('\s*#', line)) last_key = '' field_names = re.findall(field_name_regex, line) for field in reversed(field_names): split_at = line.find(field) + len(field) field_name = re.split('\s*:', field)[0] # don't overwrite existing fields if field_name in obj: continue else: obj[field_name] = ' '.join(line[split_at:].split()) if not last_key: last_key = field_name line = line[:split_at - len(field)] return last_key
Example #8
Source File: yamlparser.py From py2swagger with MIT License | 6 votes |
def _parse_docstring(self, docstring=''): """ :param docstring: :return: (summary, description, schema) """ summary, description, schema = None, None, dict() docstring = docstring.strip() if '---' in docstring: head, yml = re.split(r'\s*---+\s*\n', docstring) if yml: schema = self.yaml_load(yml) or dict() else: head = docstring if '\n' in head.strip(): summary, description = map(lambda s: s.strip(), head.split('\n', 1)) elif head: summary = head.strip() return summary, description, schema
Example #9
Source File: wireless.py From Paradrop with Apache License 2.0 | 6 votes |
def get_cipher_list(encryption_mode): """ Get list of ciphers from encryption mode. Example: get_cipher_list("psk2+tkip+aes") -> ["TKIP", "CCMP"] """ parts = encryption_mode.lower().split('+') ciphers = [] if "tkip" in parts: ciphers.append("TKIP") if "ccmp" in parts or "aes" in parts: ciphers.append("CCMP") if len(ciphers) == 0: # We need to enable at least one cipher. Most modes default to CCMP # except for wpa. if parts[0] == "wpa": ciphers.append("TKIP") else: ciphers.append("CCMP") return ciphers
Example #10
Source File: preprocessor_plugins.py From SublimeKSP with GNU General Public License v3.0 | 6 votes |
def __init__(self, name, uiType, size, persistence, familyPrefix, uiParams, tableSize, prefixSymbol, line): self.name = name self.familyPrefix = familyPrefix or "" self.uiType = uiType self.prefixSymbol = prefixSymbol if self.uiType == "ui_text_edit": self.prefixSymbol = "@" self.uiParams = uiParams or "" self.numElements = size self.dimensionsString = size self.underscore = "" if "," in size: self.underscore = "_" self.numElements = "*".join(["(%s)" % dim for dim in size.split(",")]) self.numElements = tryStringEval(self.numElements, line, "UI array size") self.persistence = persistence or "" self.tableSize = tableSize
Example #11
Source File: ksp_compiler.py From SublimeKSP with GNU General Public License v3.0 | 6 votes |
def prefix_with_ns(name, namespaces, function_parameter_names=None, force_prefixing=False): if not namespaces: return name function_parameter_names = function_parameter_names or [] ##name = name.replace('.', '__') # replace . by __ if name[0] in variable_prefixes: prefix, unprefixed_name = name[0], name[1:] else: prefix, unprefixed_name = '', name # if the name consists of multiple parts (eg. myfamily.myvariable extract the first part - myfamily in this example) first_name_part = name.split('.')[0] # if built-in name or function parameter if (unprefixed_name in ksp_builtins.variables_unprefixed or name in ksp_builtins.functions or name in ksp_builtins.keywords or first_name_part in function_parameter_names) and not force_prefixing: return name # don't add prefix # add namespace to name return prefix + '.'.join(namespaces + [unprefixed_name])
Example #12
Source File: collateAllUniqueDetections.py From EXOSIMS with BSD 3-Clause "New" or "Revised" License | 6 votes |
def multiRunPostProcessing(self, PPoutpath, folders): outtext = list() for folder in folders: lines = list() with open(os.path.join(folder,'NEIDinfo.txt'), 'r') as g: #Write to file lines = g.read().split('\n')[0:-1] lines2 = [line.split(',') for line in lines] try: lines2 = lines2.remove(['']) except: pass lines3 = [','.join(line) for line in lines2 if float(line[1]) < 24764.0/6371.0] outtext.append('\n'.join(lines3))#OUTTEXT contains a complete list of all sub-neptune detections with open(os.path.join(PPoutpath,'NEIDallSubNeptunes.txt'), 'w') as g: #Write to file g.write('\n'.join(outtext)) #### Count number of surveys analyzed NumAnalyzed = 0 for folder in folders: pklfiles = glob.glob(os.path.join(folder,'*.pkl')) NumAnalyzed += len(pklfiles) with open(os.path.join(PPoutpath,'NEIDcountFilesAnalyzed.txt'), 'w') as g: #Write to file g.write(str(NumAnalyzed))
Example #13
Source File: string.py From clikit with MIT License | 5 votes |
def get_max_word_length( string, formatter=None ): # type: (str, Optional[Formatter]) -> int if formatter is not None: string = formatter.remove_format(string) max_length = 0 words = re.split("\s+", string) for word in words: max_length = max(max_length, get_string_length(word)) return max_length
Example #14
Source File: string.py From clikit with MIT License | 5 votes |
def get_max_line_length( string, formatter=None ): # type: (str, Optional[Formatter]) -> int if formatter is not None: string = formatter.remove_format(string) max_length = 0 words = re.split("\n", string) for word in words: max_length = max(max_length, get_string_length(word)) return max_length
Example #15
Source File: __init__.py From aegea with Apache License 2.0 | 5 votes |
def natural_sort(i): return sorted(i, key=lambda s: [int(t) if t.isdigit() else t.lower() for t in re.split(r"(\d+)", s)])
Example #16
Source File: config.py From keras_mixnets with MIT License | 5 votes |
def decode_block_string(self, block_string): """Gets a block through a string notation of arguments.""" assert isinstance(block_string, str) ops = block_string.split('_') options = {} for op in ops: splits = re.split(r'(\d.*)', op) if len(splits) >= 2: key, value = splits[:2] options[key] = value if 's' not in options or len(options['s']) != 2: raise ValueError('Strides options should be a pair of integers.') self.input_filters = int(options['i']) self.output_filters = int(options['o']) self.dw_kernel_size = self._parse_ksize(options['k']) self.expand_kernel_size = self._parse_ksize(options['a']) self.project_kernel_size = self._parse_ksize(options['p']) self.num_repeat = int(options['r']) self.identity_skip = ('noskip' not in block_string) self.se_ratio = float(options['se']) if 'se' in options else None self.expand_ratio = int(options['e']) self.strides = [int(options['s'][0]), int(options['s'][1])] self.swish = 'sw' in block_string self.dilated = 'dilated' in block_string return self
Example #17
Source File: config.py From keras_mixnets with MIT License | 5 votes |
def _parse_ksize(self, ss): return [int(k) for k in ss.split('.')]
Example #18
Source File: avclass_common.py From BASS with GNU General Public License v2.0 | 5 votes |
def read_aliases(alfile): '''Read aliases map from given file''' if alfile is None: return {} almap = {} with open(alfile, 'r') as fd: for line in fd: alias, token = line.strip().split()[0:2] almap[alias] = token return almap
Example #19
Source File: avclass_common.py From BASS with GNU General Public License v2.0 | 5 votes |
def __norm_cat(self, label, hashes): if not label: return [] # Initialize list of tokens to return ret = [] # Split label into tokens and process each token for token in re.split("[^0-9a-zA-Z]", label): # Remove leading and trailing backspace from token # and convert to lowercase token = token.lower() # Remove digits at the end # FIXME: What if it is a hash, and removes digits at the end??? end_len = len(re.findall("\d*$", token)[0]) if end_len: token = token[:-end_len] # Ignore short token if len(token) < 4: continue # Ignore token if prefix of a hash of the sample # Most AVs use MD5 prefixes in labels, # but we check SHA1 and SHA256 as well hash_token = False for hash_str in hashes: if hash_str[0:len(token)] == token: hash_token = True break if hash_token: continue for keys, values in self.cat.iteritems(): if token in values: token = keys ret.append(token) break # Add token return ret
Example #20
Source File: gpt.py From comet-commonsense with Apache License 2.0 | 5 votes |
def forward(self, x, sequence_mask): x = self.c_attn(x) query, key, value = x.split(self.split_size, dim=2) query = self.split_heads(query) key = self.split_heads(key, k=True) value = self.split_heads(value) a = self._attn(query, key, value, sequence_mask) a = self.merge_heads(a) a = self.c_proj(a) a = self.resid_dropout(a) return a
Example #21
Source File: setup.py From mmdetection with Apache License 2.0 | 5 votes |
def get_hash(): if os.path.exists('.git'): sha = get_git_hash()[:7] elif os.path.exists(version_file): try: from mmdet.version import __version__ sha = __version__.split('+')[-1] except ImportError: raise ImportError('Unable to get git version') else: sha = 'unknown' return sha
Example #22
Source File: file_api.py From mlimages with MIT License | 5 votes |
def prepare_dir(self, relative): d = os.path.dirname(relative) if d and not os.path.exists(self.to_abs(d)): import re folders = re.split(r"\\|/", d) p = self.root for f in folders: p += os.path.sep + f if not os.path.exists(p): os.makedirs(p)
Example #23
Source File: utils.py From grlc with MIT License | 5 votes |
def dispatchTPFQuery(raw_tpf_query, loader, acceptHeader, content): """Executes the specified TPF query.""" endpoint, auth = gquery.guess_endpoint_uri(raw_tpf_query, loader) glogger.debug("=====================================================") glogger.debug("Sending query to TPF endpoint: {}".format(endpoint)) glogger.debug("=====================================================") # TODO: pagination for TPF # Preapre HTTP request reqHeaders = {'Accept': acceptHeader, 'Authorization': 'token {}'.format(static.ACCESS_TOKEN)} if content: reqHeaders = {'Accept': static.mimetypes[content], 'Authorization': 'token {}'.format(static.ACCESS_TOKEN)} tpf_list = re.split('\n|=', raw_tpf_query) subject = tpf_list[tpf_list.index('subject') + 1] predicate = tpf_list[tpf_list.index('predicate') + 1] object = tpf_list[tpf_list.index('object') + 1] data = {'subject': subject, 'predicate': predicate, 'object': object} response = requests.get(endpoint, params=data, headers=reqHeaders, auth=auth) glogger.debug('Response header from endpoint: ' + response.headers['Content-Type']) # Response headers resp = response.text headers = {} headers['Content-Type'] = response.headers['Content-Type'] headers['Server'] = 'grlc/' + grlc_version return resp, 200, headers
Example #24
Source File: sam_from_reads.py From CAMISIM with Apache License 2.0 | 5 votes |
def read_reference(reference_path): prefixes = [] refseq = {} with open(reference_path, 'r') as ref: for line in ref: if not line.startswith('>'): # seq name refseq[prefix] += line.strip() # dont count newlines else: prefix = line[1:].strip().split()[0] if prefix not in prefixes: prefixes.append(prefix) refseq[prefix] = "" return refseq, prefixes
Example #25
Source File: sam_from_reads.py From CAMISIM with Apache License 2.0 | 5 votes |
def write_sam(read_file, id_to_cigar_map, reference_path, orig_prefix): references, prefixes = read_reference(reference_path) # orig_prefix is prefix without _ in name write_sam = os.path.join(read_file.rsplit("/",1)[0], orig_prefix) + ".sam" write_header(write_sam, references, prefixes) with open(read_file, 'r') as reads: for line in reads: if line.startswith('>'): name, start, align_status, index, strand, soffset, align_length, eoffset = line.strip().split('_') ref_name = name[1:] # first sign of name is ">" QNAME = ref_name + "-" + index query = ref_name + "-" + index if strand == 'R': FLAG = str(16) else: FLAG = str(0) if align_status == "unaligned": #special cigar/no pos for non-mapping reads POS = str(0) CIGAR = "*" RNAME = "*" # treated as unmapped else: POS = start RNAME = ref_name try: CIGAR, pos = id_to_cigar_map[query] except KeyError: #sequence did not have any errors CIGAR, pos = "%sM" % align_length, align_length MAPQ = str(255) RNEXT = '*' PNEXT = '0' QUAL = '*' # no quality for nanosim else: SEQ = line.strip() TLEN = str(len(SEQ)) if CIGAR != '*': # unmapped bases counted as insertions in read CIGAR = soffset + "I" + CIGAR + str(int(align_length) - int(pos)) + "M" + eoffset + "I" sam_line = [QNAME, FLAG, RNAME, POS, MAPQ, CIGAR, RNEXT, PNEXT, TLEN, SEQ, QUAL] clen = get_cigar_length(CIGAR) with open(write_sam, 'a+') as samfile: samfile.write("\t".join(sam_line) + "\n") return prefixes
Example #26
Source File: sam_from_reads.py From CAMISIM with Apache License 2.0 | 5 votes |
def get_cigars_nanosim(error_profile): errors = {} slen = {} with open(error_profile, 'r') as ep: for line in ep: if line.startswith("Seq"): continue # header name, pos, error_type, length, refseq, qseq = re.split(r'\t|\s{2,}',line) # split at tab and multiple whitespace if error_type == "mis": continue # this version ignores mismatches seqname = name.split("_") seqname = seqname[0] + "-" + seqname[-1] # later on used as sequence name if seqname in errors: errors[seqname].append((int(pos),error_type,int(length))) else: errors[seqname] = [(int(pos),error_type,int(length))] cigars = {} # if ins at pos x with length y, then that ins started at x-y! (see test_error_profile) for sequence in errors: sorted_errors = sorted(errors[sequence],key=lambda x: x[0]) ref_len = 0 CIGAR = "" for pos, etype, length in sorted_errors: if etype == 'ins': if (int(pos) - ref_len > 0): CIGAR += str(int(pos) - ref_len) + "M" CIGAR += str(length) + "I" ref_len = int(pos) length = 0 # only relevant if insertion at the end elif etype == 'del': if (int(pos) - ref_len > 0): CIGAR += str(int(pos) - ref_len) + "M" CIGAR += str(length) + "D" ref_len = int(pos) + int(length) # if deletion at the end, the number of matches has to be reduces cigars[sequence] = (CIGAR, int(pos) + int(length)) return cigars
Example #27
Source File: sam_from_reads.py From CAMISIM with Apache License 2.0 | 5 votes |
def convert_fasta(fasta_reads): out_name = fasta_reads.rsplit("_",1)[0] + ".fq" # /path/to/genomeid_reads.fasta with open(fasta_reads,'r') as reads: with open(out_name, 'w') as out: for line in reads: if line.startswith(">"): spl = line.strip().split("_") name = spl[0][1:] # without > index = spl[3] out.write("@" + name + "-" + index + '\n') else: out.write(line) out.write("+\n") out.write("I" * (len(line) - 1) + '\n') # no quality information is available
Example #28
Source File: atlas.py From neuropythy with GNU Affero General Public License v3.0 | 5 votes |
def calc_cortices(subject, atlas_subject, worklog, hemis=None): ''' calc_cortices extracts the hemisphere objects (of the subject) to which the atlas is being applied. By default these are 'lh' and 'rh', but for HCP subjects other hemispheres may be desired. Afferent parameters: @ hemis The hemispheres onto which to put the atlas; this may take a number of forms: 'lh' or 'rh' (or --hemis=lh / --hemis=rh) applies the atlas to the given hemisphere only; otherwise a list of hemispheres may be specified as a python object (e.g. --hemis='("lh_LR32k","rh_LR32k")') or as a comma or whitespace separated string (such as 'lh_LR32k,lh_LR59k' or 'rh rh_LR164k'). 'lr', 'both', and 'all' are equivalent to 'lh rh'; this is the default behavior if hemis is not provided explicitly. @ subject The neuropythy subject object onto which the atlas is being projected. @ atlas_subject Theneuropythy subject object from which the atlas is being projected. ''' if hemis is None or hemis is Ellipsis: hemis = 'lr' if pimms.is_str(hemis): if hemis.lower() in ['lr', 'both', 'all']: hemis = ('lh','rh') else: hemis = re.split(r'([,;:]|\s)+', hemis)[::2] if atlas_subject.name == 'fsaverage_sym': hemis = ['rhx' if h == 'rh' else h for h in hemis] sctcs = {} actcs = {} worklog('Preparing Hemispheres...') for h in hemis: if h not in subject.hemis: raise ValueError('Subject %s does not have requested hemi %s' % (subject.name, h)) sctcs[h] = curry(lambda sub,h: sub.hemis[h], subject, h) h = 'lh' if h == 'rhx' else h if h not in atlas_subject.hemis: raise ValueError('Atlas subject %s does not have requested hemi %s' % ( atlas_subject.name, h)) actcs[h] = curry(lambda sub,h: sub.hemis[h], atlas_subject, h) return {'subject_cortices': pimms.lazy_map(sctcs), 'atlas_cortices': pimms.lazy_map(actcs)}
Example #29
Source File: atlas.py From neuropythy with GNU Affero General Public License v3.0 | 5 votes |
def _format_afferent_doc(docstr, abbrevs=None, cols=80): try: (ln1, docs) = docstr.split('\n\n') except Exception: return '' anm0 = ln1.split(' (')[0] anm = anm0.replace('_', '-') header = ' --' + anm if abbrevs and anm0 in abbrevs: header = header + ' | -' + abbrevs[anm0] docs = [ '\n '.join( textwrap.wrap( ' '.join([s.strip() for s in ss.split('\n')[1:]]), cols-6)) for ss in docs.split('\n)')] return header + '\n' + ''.join([' * ' + d for d in docs])
Example #30
Source File: config_util.py From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 | 5 votes |
def parse_contexts(args): # parse context into Context objects contexts = re.split(r'\W+', args.config.get('common', 'context')) for i, ctx in enumerate(contexts): if ctx[:3] == 'gpu': contexts[i] = mx.context.gpu(int(ctx[3:])) else: contexts[i] = mx.context.cpu(int(ctx[3:])) return contexts