Python re.match() Examples

The following are 30 code examples of re.match(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: tokenizer_udpipe_mod.py    From Turku-neural-parser-pipeline with Apache License 2.0 8 votes vote down vote up
def parse_text(self,txt):
        err=udpipe.ProcessingError()
        tokenized=""
        current_block=[]
        for line in txt.split("\n"):
            if re.match(comment_regex, line.lstrip()): # comment line
                if current_block:
                    tokenized+=self.pipeline.process("\n".join(current_block),err)
                    current_block=[]
                tokenized+=re.sub(comment_regex, "# ", line.lstrip()+"\n")
                continue
            # normal text line, save to current block to be tokenized
            current_block.append(line)
        if current_block:
            tokenized+=self.pipeline.process("\n".join(current_block),err)
        return tokenized 
Example #2
Source File: avclass_common.py    From BASS with GNU General Public License v2.0 6 votes vote down vote up
def __remove_suffixes(av_name, label):
        '''Remove AV specific suffixes from given label
           Returns updated label'''

        # Truncate after last '.'
        if av_name in set(['Norman', 'Avast', 'Avira',
                           'McAffee-GW-Edition', 'McAffee', 'Kaspersky',
                           'ESET-NOD32', 'Fortinet', 'Jiangmin', 'Comodo',
                           'GData', 'Avast', 'Sophos',
                           'TrendMicro-HouseCall', 'TrendMicro',
                           'NANO-Antivirus', 'Microsoft']):
            label = label.rsplit('.', 1)[0]

        # Truncate after last '.' 
        # if suffix only contains digits or uppercase (no lowercase) chars
        if av_name == 'AVG':
            tokens = label.rsplit('.', 1)
            if len(tokens) > 1 and re.match("^[A-Z0-9]+$", tokens[1]):
                label = tokens[0]

        # Truncate after last '!'
        if av_name == 'Agnitum':
            label = label.rsplit('!', 1)[0]

        return label 
Example #3
Source File: cluster.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def __init__(self, line):
        tokens = line.split(',')
        self._threshold = float(re.sub(r'^([^\t]+)\t[^\t]+\t.*', r'\1', tokens[0]))
        tokens[0] = re.sub(r'^[^\t]+\t[^\t]+\t(.*)', r'\1', tokens[0])
        self.groupIdCount = 0
        self.seqNameToGroupId = dict([])
        self.groupIdToSeqNameSet = dict([])
        for token in tokens:
            names = token.split('\t')
            self.groupIdToSeqNameSet[self.groupIdCount] = set([])
            for name in names:
                #print name
                if re.match(r'^[0-9]+_.*$', name):
                    seqName = re.sub(r'^([0-9]+_[0-9]+)_.*$',r'\1', name)
                    self.seqNameToGroupId[seqName] = self.groupIdCount
                    self.groupIdToSeqNameSet[self.groupIdCount].add(seqName)
            self.groupIdCount += 1 
Example #4
Source File: analysis_mg.py    From CAMISIM with Apache License 2.0 6 votes vote down vote up
def parse(self, line):
        if line.strip() == '':
            return

        if re.match(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$', line):
            scaffoldId = int(re.sub(r'^([0-9]+)_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line))
            contigId = int(re.sub(r'^[0-9]+_([0-9]+)\t[0-9]+\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line))
            ncbid = int(re.sub(r'^[0-9]+_[0-9]+\t([0-9]+)\t[0-9\.]+\t[^\t]+\t[^\t]+$',r'\1' ,line))
            weight = float(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t([0-9\.]+)\t[^\t]+\t[^\t]+$',r'\1' ,line))
            source = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t([^\t]+)\t[^\t]+$',r'\1' ,line))
            tag = str(re.sub(r'^[0-9]+_[0-9]+\t[0-9]+\t[0-9\.]+\t[^\t]+\t([^\t]+)$',r'\1' ,line))

        if ncbid != 1:
            taxPathDict = self.taxonomy.getPathToRoot(ncbid)
            if taxPathDict is not None and taxPathDict.keys() >= 1:
                self.sequences.setCandidateTaxonomyPath(contigId, scaffoldId, taxPathDict, weight, source, tag)
                self.assignedIdList.append(contigId)
            else:
                sys.stderr.write(str('No taxonomic path found for ncbid: ' + str(ncbid))) 
Example #5
Source File: output_mod.py    From Turku-neural-parser-pipeline with Apache License 2.0 6 votes vote down vote up
def launch(args,q_in,q_out):
    start=time.time()
    total_parsed_trees=0
    total_parsed_tokens=0
    next_report=start+10.0 #report every 10sec at most
    while True:
        jobid,txt=q_in.get()
        if jobid=="FINAL":
            print("Output exiting",file=sys.stderr,flush=True)
            return
        total_parsed_trees+=sum(1 for line in txt.split("\n") if line.startswith("1\t"))
        total_parsed_tokens+=sum(1 for line in txt.split("\n") if re.match(token_regex, line))
        if total_parsed_trees>0 and time.time()>next_report:
            time_spent=time.time()-start
            print("Runtime: {}:{} [m:s]  Parsed: {} [trees], {} [tokens]  Speed: {} [trees/sec]  {} [sec/tree] {} [tokens/sec]".format(int(time_spent)//60,int(time_spent)%60,total_parsed_trees,total_parsed_tokens, total_parsed_trees/time_spent,time_spent/total_parsed_trees, total_parsed_tokens/time_spent) ,file=sys.stderr,flush=True)
            next_report=time.time()+10
        print(txt,end="",flush=True) 
Example #6
Source File: abstract_option.py    From clikit with MIT License 6 votes vote down vote up
def _validate_short_name(
        self, short_name, flags
    ):  # type: (Optional[str], int) -> None
        if short_name is None:
            if flags & self.PREFER_SHORT_NAME:
                raise ValueError(
                    "The short option name must be given if the option flag PREFER_SHORT_NAME is selected."
                )

            return

        if not isinstance(short_name, basestring):
            raise ValueError(
                "The short option name must be a string. Got: {}".format(
                    type(short_name)
                )
            )

        if not short_name:
            raise ValueError("The short option name must not be empty.")

        if not re.match(r"^[a-zA-Z]$", short_name):
            raise ValueError("The short option name must be exactly one letter.") 
Example #7
Source File: qaData.py    From QA with GNU General Public License v3.0 6 votes vote down vote up
def sentenceToIndex(sentence, word2idx, maxLen):
    """
    将句子分词,并转换成embeddings列表的索引值

    :param sentence: 句子
    :param word2idx: 词语的索引
    :param maxLen: 句子的最大长度
    :return: 句子的词向量索引表示
    """
    unknown = word2idx.get("UNKNOWN", 0)
    num = word2idx.get("NUM", len(word2idx))
    index = [unknown] * maxLen
    i = 0
    for word in jieba.cut(sentence):
        if word in word2idx:
            index[i] = word2idx[word]
        else:
            if re.match("\d+", word):
                index[i] = num
            else:
                index[i] = unknown
        if i >= maxLen - 1:
            break
        i += 1
    return index 
Example #8
Source File: rp.py    From pywarp with Apache License 2.0 6 votes vote down vote up
def verify(self, authenticator_data, client_data_json, signature, user_handle, raw_id, email):
        "Ascertain the validity of credentials supplied by the client user agent via navigator.credentials.get()"
        email = email.decode()
        if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
            raise Exception("Invalid email address")
        client_data_hash = hashlib.sha256(client_data_json).digest()
        client_data = json.loads(client_data_json)
        assert client_data["type"] == "webauthn.get"
        expect_challenge = self.storage_backend.get_challenge_for_user(email=email, type="authentication")
        assert b64url_decode(client_data["challenge"]) == expect_challenge
        print("expect RP ID:", self.rp_id)
        if self.rp_id:
            assert "https://" + self.rp_id == client_data["origin"]
        # Verify that the value of C.origin matches the Relying Party's origin.
        # Verify that the RP ID hash in authData is indeed the SHA-256 hash of the RP ID expected by the RP.
        authenticator_data = AuthenticatorData(authenticator_data)
        assert authenticator_data.user_present
        credential = self.storage_backend.get_credential_by_email(email)
        credential.verify(signature, authenticator_data.raw_auth_data + client_data_hash)
        # signature counter check
        return {"verified": True} 
Example #9
Source File: _cpreqbody.py    From cherrypy with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def process(self):
        """Execute the best-match processor for the given media type."""
        proc = None
        ct = self.content_type.value
        try:
            proc = self.processors[ct]
        except KeyError:
            toptype = ct.split('/', 1)[0]
            try:
                proc = self.processors[toptype]
            except KeyError:
                pass
        if proc is None:
            self.default_proc()
        else:
            proc(self) 
Example #10
Source File: encryption.py    From everyclass-server with Mozilla Public License 2.0 6 votes vote down vote up
def decrypt(data: str, encryption_key: str = None, resource_type: str = None):
    """
    解密资源标识符

    :param data: 加密后的字符串
    :param encryption_key: 可选的 key
    :param resource_type: 验证资源类型(student、teacher、klass、room)
    :return: 资源类型和资源ID
    """
    if not encryption_key:
        encryption_key = get_config().RESOURCE_IDENTIFIER_ENCRYPTION_KEY

    data = _aes_decrypt(encryption_key, data)

    group = re.match(r'^(student|teacher|klass|room);([\s\S]+)$', data)  # 通过正则校验确定数据的正确性
    if group is None:
        raise ValueError('Decrypted data is invalid: %s' % data)
    else:
        if resource_type and group.group(1) != resource_type:
            raise ValueError('Resource type not correspond')
        return group.group(1), group.group(2) 
Example #11
Source File: update_cfg_file.py    From multibootusb with GNU General Public License v2.0 5 votes vote down vote up
def extract_distroinfo_from_fname(self, which_dir, regex, distro_group,
                                      version_group):
        p = re.compile(regex, re.I)
        for fname in os.listdir(self.fullpath(which_dir)):
            m = p.match(fname)
            if m:
                return (m.group(distro_group),
                        [int(x) for x in m.group(version_group).split('.')])
        return None 
Example #12
Source File: predicate.py    From python-clean-architecture with MIT License 5 votes vote down vote up
def any(self, cond: t.Union[Predicate, t.Iterable]) -> Predicate:
        """
        Checks if a condition is met by any element in a list,
        where a condition can also be a sequence (e.g. list).
        >>> var('f1').any(var('f2').exists())
        Matches::
            {'f1': [{'f2': 1}, {'f2': 0}]}
        >>> var('f1').any([1, 2, 3])
        # Match f1 that contains any element from [1, 2, 3]
        Matches::
            {'f1': [1, 2]}
            {'f1': [3, 4, 5]}

        :param cond: Either a Predicate that at least one element has to match
         or a list of which at least one element has to be contained
         in the tested element.
-        """
        if callable(cond):
            def _cmp(value):
                return is_iterable(value) and any(cond(e) for e in value)

        else:
            def _cmp(value):
                return is_iterable(value) and any(e in cond for e in value)

        return self._build_predicate(
            lambda lhs, value: _cmp(lhs),
            Operation.ANY,
            (self._path, freeze(cond))
        ) 
Example #13
Source File: predicate.py    From python-clean-architecture with MIT License 5 votes vote down vote up
def search(self, regex: str) -> Predicate:
        """
        Run a regex test against the value (only substring string has to
        match).
        >>> var('f1').search(r'^\\w+$')

        :param regex: The regular expression to use for matching
        """
        return self._build_predicate(
            lambda lhs, value: bool(re.search(regex, lhs)),
            Operation.SEARCH,
            (self._path, regex)
        ) 
Example #14
Source File: predicate.py    From python-clean-architecture with MIT License 5 votes vote down vote up
def matches(self, regex: str) -> Predicate:
        """
        Run a regex test against a dict value (whole string has to match).
        >>> var('f1').matches(r'^\\w+$')

        :param regex: The regular expression to use for matching
        """
        return self._build_predicate(
            lambda lhs, value: bool(re.match(regex, lhs)),
            Operation.MATCHES,
            (self._path, regex)
        ) 
Example #15
Source File: predicate.py    From python-clean-architecture with MIT License 5 votes vote down vote up
def all(self, cond: t.Union[Predicate, t.Iterable]) -> Predicate:
        """
        Checks if a condition is met by any element in a list,
        where a condition can also be a sequence (e.g. list).
        >>> var('f1').all(var('f2').exists())
        Matches::
            {'f1': [{'f2': 1}, {'f2': 1}]}
        >>> var('f1').all([1, 2, 3])
        # Match f1 that contains any element from [1, 2, 3]
        Matches::
            {'f1': [1, 2, 3, 4, 5]}

        :param cond: Either a Predicate that all elements have to match or
         a list which has to be contained in the tested element.
        """
        if callable(cond):
            def _cmp(value):
                return is_iterable(value) and all(cond(e) for e in value)

        else:
            def _cmp(value):
                return is_iterable(value) and all(e in value for e in cond)

        return self._build_predicate(
            lambda lhs, value: _cmp(lhs),
            Operation.ALL,
            (self._path, freeze(cond))
        )


# noinspection PyProtectedMember 
Example #16
Source File: rp.py    From pywarp with Apache License 2.0 5 votes vote down vote up
def register(self, client_data_json, attestation_object, email):
        "Store the credential public key and related metadata on the server using the associated storage backend"
        authenticator_attestation_response = cbor2.loads(attestation_object)
        email = email.decode()
        if not re.match(r"[^@]+@[^@]+\.[^@]+", email):
            raise Exception("Invalid email address")
        client_data_hash = hashlib.sha256(client_data_json).digest()
        client_data = json.loads(client_data_json)
        assert client_data["type"] == "webauthn.create"
        print("client data", client_data)
        expect_challenge = self.storage_backend.get_challenge_for_user(email=email, type="registration")
        assert b64url_decode(client_data["challenge"]) == expect_challenge
        print("expect RP ID:", self.rp_id)
        if self.rp_id:
            assert "https://" + self.rp_id == client_data["origin"]
        # Verify that the value of C.origin matches the Relying Party's origin.
        # Verify that the RP ID hash in authData is indeed the SHA-256 hash of the RP ID expected by the RP.
        authenticator_data = AuthenticatorData(authenticator_attestation_response["authData"])
        assert authenticator_data.user_present
        # If user verification is required for this registration,
        # verify that the User Verified bit of the flags in authData is set.
        assert authenticator_attestation_response["fmt"] == "fido-u2f"
        att_stmt = FIDOU2FAttestationStatement(authenticator_attestation_response['attStmt'])
        attestation = att_stmt.validate(authenticator_data,
                                        rp_id_hash=authenticator_data.rp_id_hash,
                                        client_data_hash=client_data_hash)
        credential = attestation.credential
        # TODO: ascertain user identity here
        self.storage_backend.save_credential_for_user(email=email, credential=credential)
        return {"registered": True}

    # https://www.w3.org/TR/webauthn/#verifying-assertion 
Example #17
Source File: helper.py    From cherrypy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def setup_client():
    """Set up the WebCase classes to match the server's socket settings."""
    webtest.WebCase.PORT = cherrypy.server.socket_port
    webtest.WebCase.HOST = cherrypy.server.socket_host
    if cherrypy.server.ssl_certificate:
        CPWebCase.scheme = 'https'

# --------------------------- Spawning helpers --------------------------- # 
Example #18
Source File: helper.py    From cherrypy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def assertErrorPage(self, status, message=None, pattern=''):
        """Compare the response body with a built in error page.

        The function will optionally look for the regexp pattern,
        within the exception embedded in the error page."""

        # This will never contain a traceback
        page = cherrypy._cperror.get_error_page(status, message=message)

        # First, test the response body without checking the traceback.
        # Stick a match-all group (.*) in to grab the traceback.
        def esc(text):
            return re.escape(ntob(text))
        epage = re.escape(page)
        epage = epage.replace(
            esc('<pre id="traceback"></pre>'),
            esc('<pre id="traceback">') + b'(.*)' + esc('</pre>'))
        m = re.match(epage, self.body, re.DOTALL)
        if not m:
            self._handlewebError(
                'Error page does not match; expected:\n' + page)
            return

        # Now test the pattern against the traceback
        if pattern is None:
            # Special-case None to mean that there should be *no* traceback.
            if m and m.group(1):
                self._handlewebError('Error page contains traceback')
        else:
            if (m is None) or (
                not re.search(ntob(re.escape(pattern), self.encoding),
                              m.group(1))):
                msg = 'Error page does not contain %s in traceback'
                self._handlewebError(msg % repr(pattern)) 
Example #19
Source File: fasta.py    From CAMISIM with Apache License 2.0 5 votes vote down vote up
def fastaFileToDictWholeNames(filePath):
    """
        Reads a fasta file and returns mapping: seqName -> sequence the whole sequence name is used
        as seqName!!! (even if it contains space)
    """
    seqIdToSeq = {}
    f = None
    try:
        f = open(os.path.normpath(filePath),'r')
    except Exception:
        print "Cannot open file:", filePath
        raise
    else:
        name = ''
        seq = ''
        for line in f:
            line = noNewLine(line)
            if re.match('>', line):
                if seq != '':
                    assert name != ''
                    seqIdToSeq[name] = seq
                    seq = ''
                name = line.replace('>','')
            else:
                seq += line
        if seq != '':
            assert name != ''
            seqIdToSeq[name] = seq
    finally:
        if f is not None:
            f.close()
    return seqIdToSeq 
Example #20
Source File: cptools.py    From cherrypy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def referer(pattern, accept=True, accept_missing=False, error=403,
            message='Forbidden Referer header.', debug=False):
    """Raise HTTPError if Referer header does/does not match the given pattern.

    pattern
        A regular expression pattern to test against the Referer.

    accept
        If True, the Referer must match the pattern; if False,
        the Referer must NOT match the pattern.

    accept_missing
        If True, permit requests with no Referer header.

    error
        The HTTP error code to return to the client on failure.

    message
        A string to include in the response body on failure.

    """
    try:
        ref = cherrypy.serving.request.headers['Referer']
        match = bool(re.match(pattern, ref))
        if debug:
            cherrypy.log('Referer %r matches %r' % (ref, pattern),
                         'TOOLS.REFERER')
        if accept == match:
            return
    except KeyError:
        if debug:
            cherrypy.log('No Referer header', 'TOOLS.REFERER')
        if accept_missing:
            return

    raise cherrypy.HTTPError(error, message) 
Example #21
Source File: sequences.py    From CAMISIM with Apache License 2.0 5 votes vote down vote up
def replaceIdsWithNames(outputFileContigSubPattern, nameToIDsFile, targetFile, outFile):
    """
        @deprecated: NOT IMPLEMENTED YET!!!
        replace ids with names
        @param nameToIdsFile: file that contains lines: contigName tab contigID
        @param targetFile: file that contain in the first column scaffoldID_contigID which will be replaced by its name
        @param outFile: file that contain the first column in the form scaffoldID_contigID with the name
        (that can be modified by substitution defined in the config file .. according to outputFileContigSubPattern)
    """
    idToName = dir([])
    assert False, 'NOT IMPLEMENTED YET'
    #try:
    #    f = open(os.path.normpath(nameToIDsFile), 'r')
    #    for line in f:
    #        if re.match('^#', line):
    #            continue
    #        name = re.sub(outputFileContigSubPattern, r'\1' , noNewLine(re.sub(r'^([^ \t]+)\t[0-9]+$',r'\1', line)))
    #        id = int(noNewLine(re.sub(r'^[^ \t]+\t([0-9]+)$',r'\1', line)))
    #        idToName[id] = name
    #except Exception:
    #    print "Cannot create a file or write to it:", outFile
    #    raise
    #finally:
    #    f.close()

    #now: go through the targetFile and for each line do:
    #    extract contigID and the rest of the line ^[0-9]+_[0-9]+([^0-9].*)$
    #    write name + rest of the line + \n to the outFile !!!!!!!!!!



#compare two sequences according to their length
#def seqLenCmp(seq1, seq2):
#    return seq1.seqBp - seq2.seqBp 
Example #22
Source File: _cpreqbody.py    From cherrypy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def process_multipart(entity):
    """Read all multipart parts into entity.parts."""
    ib = ''
    if 'boundary' in entity.content_type.params:
        # http://tools.ietf.org/html/rfc2046#section-5.1.1
        # "The grammar for parameters on the Content-type field is such that it
        # is often necessary to enclose the boundary parameter values in quotes
        # on the Content-type line"
        ib = entity.content_type.params['boundary'].strip('"')

    if not re.match('^[ -~]{0,200}[!-~]$', ib):
        raise ValueError('Invalid boundary in multipart form: %r' % (ib,))

    ib = ('--' + ib).encode('ascii')

    # Find the first marker
    while True:
        b = entity.readline()
        if not b:
            return

        b = b.strip()
        if b == ib:
            break

    # Read all parts
    while True:
        part = entity.part_class.from_fp(entity.fp, ib)
        entity.parts.append(part)
        part.process()
        if part.fp.done:
            break 
Example #23
Source File: pps.py    From CAMISIM with Apache License 2.0 5 votes vote down vote up
def toRealNames(config, sequences):
    """
        Transforms a PPS file fileName.fas.PP.out that names sequences according to their ids to their real names.
    """
    outIdsPPSFile = str(config.get('inputIdsFastaFile') + '.PP.out')
    outNamesPPSFile = outIdsPPSFile + '.n'
    #os.path.normpath
    print outNamesPPSFile

    try:
        fr = open(os.path.normpath(outIdsPPSFile),'r')
        fw = open(os.path.normpath(outNamesPPSFile),'w')
    except Exception:
        print "Cannot open one of the files:", outIdsPPSFile, "or", outNamesPPSFile
        raise
    else:
        for line in fr:
            if re.match(r'^[0-9]+_[0-9]+[^0-9].*$', line):
                id = re.sub(r'^[0-9]+_([0-9]+)[^0-9].*$',r'\1' , line)
                rest = re.sub(r'^[0-9]+_[0-9]+([^0-9].*)$',r'\1' , line)
                seq = sequences.getSequence(int(id))
                fw.write(seq.name + rest) # seq.scaffold.name
            else:
                fw.write(line)
    finally:
        fr.close()
        fw.close() 
Example #24
Source File: semester.py    From everyclass-server with Mozilla Public License 2.0 5 votes vote down vote up
def __init__(self, para):
        """
        构造函数,接收一个 tuple (2016,2017,2) 或者学期字符串"2016-2017-2"
        """
        # Semester("2016-2017-2")
        if isinstance(para, str) and re.match(r'\d{4}-\d{4}-\d', para):
            self.year1 = int(para[0:4])
            self.year2 = int(para[5:9])
            self.sem = int(para[10])

        # Semester("16-17-2")
        elif isinstance(para, str) and re.match(r'\d{2}-\d{2}-\d', para):
            self.year1 = int(para[0:2]) + 2000
            self.year2 = int(para[3:5]) + 2000
            self.sem = int(para[6])

        # Semester((2016,2017,2))
        elif isinstance(para, tuple):
            self.year1 = int(para[0])
            self.year2 = int(para[1])
            self.sem = int(para[2])

        # illegal
        else:
            self.year1 = 2020
            self.year2 = 2021
            self.sem = 1 
Example #25
Source File: twitter-export-image-fill.py    From twitter-export-image-fill with The Unlicense 5 votes vote down vote up
def determine_image_or_video(medium, year_str, month_str, date, tweet, tweet_media_count):
  # Video
  if '/video/' in medium['expanded_url']:
    is_video = True
    separator = '-video'
    url = medium['expanded_url']
    extension = '.mp4'
  # Animated GIF transcoded into a video
  elif 'tweet_video_thumb' in medium['media_url']:
    is_video = True
    separator = '-gif-video'
    id = re.match(r'(.*)tweet_video_thumb/(.*)\.', medium['media_url']).group(2)
    url = "https://video.twimg.com/tweet_video/%s.mp4" % id
    extension = os.path.splitext(url)[1]
  # Regular non-animated image
  else:
    is_video = False
    separator = ''
    url = medium['media_url_https']
    extension = os.path.splitext(url)[1]
    # Download the original/best image size, rather than the default one
    url = url + ':orig'

  local_filename = 'data/js/tweets/%s_%s_media/%s-%s%s-%s%s%s' % \
      (year_str, month_str, date, tweet['id'], separator,
      'rt-' if is_retweet(tweet) else '', tweet_media_count, extension)

  return is_video, url, local_filename 
Example #26
Source File: WinDBGTrace.py    From ALF with Apache License 2.0 5 votes vote down vote up
def get_hung_thread():
    hung_thread = 0
    cur_max = 0
    for thread in get_thread_list():
        m = re.match(r"\s+([0-9a-f]+):[0-9a-f]+\s+[0-9]+\sdays\s", thread)
        if not m:
            continue
        run_time = get_thread_run_time(thread)
        if run_time > cur_max:
            hung_thread = int(m.group(1))
            cur_max = run_time
    return hung_thread 
Example #27
Source File: __init__.py    From ALF with Apache License 2.0 5 votes vote down vote up
def _parse_regex(self, refs, line_no, stopchars=""):
        got = 0
        sym = self._grmr.new_symbol("[regex]", line_no)
        sym.define_regex(line_no)
        for match in RE_REGEX.finditer(refs[got:]):
            if match.start(0) != got:
                break
            count = match.group("mod")
            got = match.end(0)
            if match.group("outer") == ".":
                cls = REGEX_ALPHABET
            else:
                cls = match.group("class")
                match = RE_REGEX_RANGE.search(cls)
                while match:
                    start, _, end = match.group(0)
                    cls = cls[:match.start(0)] + "".join(chr(c) for c in range(ord(start), ord(end)+1)) + cls[match.end(0):]
                    match = RE_REGEX_RANGE.search(cls)
                if cls.startswith("^"):
                    cls = "".join(set(REGEX_ALPHABET) - set(cls[1:]))
            if not count:
                count = (1, 1)
            elif count == "*":
                count = (0, 5)
            elif count == "+":
                count = (1, 5)
            else:
                assert count.startswith("{") and count.endswith("}")
                count = count[1:-1]
                try:
                    count = int(count)
                    count = (count, count)
                except ValueError:
                    count = [int(c) for c in count.split(",")]
            sym.add_regex(cls, count[0], count[1], line_no)
        stopchars = " %s" % stopchars
        assert got == len(refs) or refs[got] in stopchars, "invalid end for regex: ('%s') (%d)" % (refs[got], line_no)
        return sym, refs[got:].lstrip() 
Example #28
Source File: __init__.py    From ALF with Apache License 2.0 5 votes vote down vote up
def _parse_cfg(self, cfgstr):
        match = RE_CFG.match(cfgstr)
        if not match:
            return
        cfgs = [_f for _f in ((m.strip() for m in l.split("=")) for l in match.group(1).split(",")) if _f]
        for cfg, val in cfgs:
            if cfg in ("star-depth", "max-size", "max-depth"):
                setattr(self._grmr, cfg.replace("-", "_"), int(val))
            else:
                raise RuntimeError("Unknown cfg item: %s" % cfg) 
Example #29
Source File: pps.py    From CAMISIM with Apache License 2.0 5 votes vote down vote up
def readPPSOutput(sequences, taxonomy, inputFastaIdsPPSFile, overwriteAllPlacements=False):
    """
        Reads the output file of PPS and for each sequence decides:
        if overwriteAllPlacements=True is, then the sequence is placed according to the PPS file regardless of its
        previous placement
        if overwriteAllPlacements=False then if a sequence is placed to a less specific rank, than PPS suggests then
        the sequence is placed according to the PPS file
    """

    infile = str(inputFastaIdsPPSFile + '.out')
    try:
        f = open(os.path.normpath(infile),'r')
    except Exception:
            print "Cannot open file:", infile
            raise
    else:
        #i = 0
        for line in f:
            line = common.noNewLine(line)
            if re.match(r'^[0-9]+_[0-9]+.*[^0-9]+[0-9]+[^0-9]*$', line):
                scaffoldId = int(re.sub(r'^([0-9]+)_[0-9]+.*[^0-9]+[0-9]+[^0-9]*$',r'\1' ,line))
                contigId = int(re.sub(r'^[0-9]+_([0-9]+).*[^0-9]+[0-9]+[^0-9]*$',r'\1' ,line))
                ncbid = int(re.sub(r'^[0-9]+_[0-9]+.*[^0-9]+([0-9]+)[^0-9]*$',r'\1' ,line))
                weight = None # the weight is not yet defined !!!
                if ncbid != 1:
                    #print line, ":", scaffoldId, contigId, ncbid
                    taxPathDictPPS = taxonomy.getPathToRoot(ncbid)
                    if taxPathDictPPS.keys() >= 1:
                        taxPathDictCurrent = sequences.getSequence(contigId).getTaxonomyPath()
                        if taxPathDictCurrent == None:
                            sequences.setTaxonomyPath(contigId, scaffoldId, taxPathDictPPS, weight)#weight = None !!!
                            #i += 1
                        else:
                            if ((overwriteAllPlacements) or (taxPathDictPPS.keys() > taxPathDictCurrent.keys())):
                                sequences.setTaxonomyPathOverride(contigId, scaffoldId, taxPathDictPPS, weight)#weight = None !!!
                                #i += 1
        #print "placed seq by PPS:", i

    finally:
        f.close() 
Example #30
Source File: wsgi.py    From MPContribs with MIT License 5 votes vote down vote up
def immutable_file_test(path, url):
    # Match filename with 20 hex digits before the extension
    return re.match(r"^.+\.[0-9a-f]{20}\..+$", url) or re.match(
        r"^.+[0-9a-zA-Z_]{3,31}\.jpg$", url
    )