Python re.findall() Examples

The following are 30 code examples of re.findall(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module re , or try the search function .
Example #1
Source File: utils.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def parse_labelme_poly(filename):
    """ Parse a labelme xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['deleted'] = obj.find('deleted').text
        obj_struct['verified'] = int(obj.find('verified').text)
        obj_struct['occluded'] = obj.find('occluded').text
        obj_struct['attributes'] = obj.find('attributes').text
        poly = obj.find('polygon').findall('pt')
        obj_struct['polygon'] = []
        for point in poly:
            pt = [point.find('x').text, point.find('y').text]
            obj_struct['polygon'] = obj_struct['polygon'] + pt
        objects.append(obj_struct)
    return objects 
Example #2
Source File: reportMetrics.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def extract_fields_db2(obj, line, field_name_regex):
    line = '#'.join(re.split('\s*#', line))

    last_key = ''
    field_names = re.findall(field_name_regex, line)
    for field in reversed(field_names):
        split_at = line.find(field) + len(field)
        field_name = re.split('\s*:', field)[0]
        # don't overwrite existing fields
        if field_name in obj:
            continue
        else:
            obj[field_name] = ' '.join(line[split_at:].split())
            if not last_key:
                last_key = field_name
        line = line[:split_at - len(field)]
    return last_key 
Example #3
Source File: helpers.py    From tensortrade with Apache License 2.0 6 votes vote down vote up
def scale_times_to_generate(times_to_generate: int, time_frame: str):

    if 'MIN' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0])
    elif 'H' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60
    elif 'D' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24
    elif 'W' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 * 7
    elif 'M' in time_frame.upper():
        times_to_generate *= int(re.findall(r'\d+', time_frame)[0]) * 60 * 24 * 7 * 30
    else:
        raise ValueError('Timeframe must be either in minutes (min), hours (H), days (D), weeks (W), or months (M)')

    return times_to_generate 
Example #4
Source File: adventure.py    From Dumb-Cogs with MIT License 6 votes vote down vote up
def adventure_command(self, ctx, *, text):
        "Do something in your adventure"
        words = re.findall(r'\w+', text)
        if words:
            # await self.baudout(ctx, game.do_command(words))
            channel = ctx.message.channel
            server = ctx.message.server
            author = ctx.message.author
            try:
                team = self.players[server.id][channel.id][author.id]
            except:
                await self.bot.reply('You are not in an adventure. If your team has embarked on one, join them using `{}adventure join`, otherwise embark on your own adventure.'.format(ctx.prefix))
                return
            await self.baudout(ctx, self.game_loops[server.id][team][channel.id]["GAME"].do_command(words, ctx, self))

        pass


    # edited - irdumbs 
Example #5
Source File: test_sanity_tutorials.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 6 votes vote down vote up
def test_tutorial_tested():
    """
    Make sure that every tutorial that isn't in the whitelist
    has been added to the tutorial test file
    """
    tutorial_test_file = os.path.join(os.path.dirname(__file__), 'test_tutorials.py')
    f = open(tutorial_test_file, 'r')
    tutorial_test_text = '\n'.join(f.readlines())
    tutorial_path = os.path.join(os.path.dirname(__file__), '..', '..', 'docs', 'tutorials')
    tutorials = glob.glob(os.path.join(tutorial_path, '**', '*.md'))

    tested_tutorials = set(re.findall(r"assert _test_tutorial_nb\('(.*)'\)", tutorial_test_text))
    for tutorial in tutorials:
        friendly_name = '/'.join(tutorial.split('/')[-2:]).split('.')[0]
        if friendly_name not in tested_tutorials and friendly_name+".md" not in whitelist_set:
            assert False, "{} has not been added to the tests/tutorials/test_tutorials.py test_suite".format(friendly_name) 
Example #6
Source File: method.py    From py2swagger with MIT License 6 votes vote down vote up
def _get_path_parameters(self):
        """
        Creates parameters described in url path
        :return: list of parameters
        :rtype: list
        """
        params = []
        url_parameters = re.findall(r'/{(.+?)}', self.introspector.path)

        for parameter in url_parameters:
            params.append({
                'name': parameter,
                'type': 'string',
                'in': 'path',
                'required': True
            })

        return params 
Example #7
Source File: utils.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def parse_rec(filename):
    """ Parse a PASCAL VOC xml file """
    tree = ET.parse(filename)
    objects = []
    for obj in tree.findall('object'):
        obj_struct = {}
        obj_struct['name'] = obj.find('name').text
        obj_struct['pose'] = obj.find('pose').text
        obj_struct['truncated'] = int(obj.find('truncated').text)
        obj_struct['difficult'] = int(obj.find('difficult').text)
        bbox = obj.find('bndbox')
        obj_struct['bbox'] = [int(bbox.find('xmin').text),
                              int(bbox.find('ymin').text),
                              int(bbox.find('xmax').text),
                              int(bbox.find('ymax').text)]
        objects.append(obj_struct)
    return objects 
Example #8
Source File: simplesam.py    From simplesam with MIT License 6 votes vote down vote up
def parse_md(self):
        """ Return the ungapped reference sequence from the MD tag, if present.
        """
        try:
            return self._cache['parse_md']
        except KeyError:
            pass
        try:
            md = self['MD']
        except KeyError:
            raise KeyError('MD tag not found in SAM record.')
        ref_seq = list(self.gapped('seq'))
        md_match = re.findall(r"([0-9]+)\^?([A-Z]+)?", md)
        ref_seq_i = 0
        for i, b in md_match:
            ref_seq_i += int(i)
            for mismatch in b:
                try:
                    ref_seq[ref_seq_i] = mismatch
                except IndexError:
                    raise IndexError(locals())
                ref_seq_i += 1
        self._cache['parse_md'] = ref_seq
        return ref_seq 
Example #9
Source File: utils.py    From DOTA_models with Apache License 2.0 6 votes vote down vote up
def reWriteImgWithMask(srcpath, dstpath, gtpath, srcform, dstform):
    namelist = GetFileFromThisRootDir(gtpath)
    for fullname in namelist:
        objects = parse_bod_poly(fullname)
        mask_polys = []
        for obj in objects:
            clsname = obj['name']
            matches = re.findall('area|mask', clsname)
            if 'mask' in matches:
                #print('mask:')
                mask_polys.append(shgeo.Polygon(obj['poly']))
            elif 'area' in matches:
                #print('area:')
                mask_polys.append(shgeo.Polygon(obj['poly']))
        basename = mybasename(fullname)
        imgname = os.path.join(srcpath, basename + srcform)
        img = cv2.imread(imgname)
        dstname = os.path.join(dstpath, basename + dstform)
        if len(mask_polys) > 0:
            saveimageWithMask(img, dstname, mask_polys) 
Example #10
Source File: compiler.py    From PyOptiX with MIT License 6 votes vote down vote up
def _has_modified_includes(cls, file_path, modified_after, depth=4):
        if depth == 0:
            return False

        include_pattern = '#include\s*"(.*)"'

        with open(file_path) as f:
            content = f.read()
            for included_path in re.findall(include_pattern, content):
                for compiler_include_path in cls._program_directories:
                    included_file_path = os.path.join(compiler_include_path, included_path)
                    if not os.path.exists(included_file_path):
                        continue

                    included_file_mtime = os.path.getmtime(included_file_path)

                    if included_file_mtime > modified_after:
                        return True
                    elif cls._has_modified_includes(included_file_path, modified_after, depth=depth - 1):
                        return True

        return False 
Example #11
Source File: huaban.py    From PickTrue with MIT License 6 votes vote down vote up
def __init__(self, board_url_or_id):
        board_id = str(board_url_or_id)
        self.fetcher = HuaBanFetcher()
        if "http" in board_id:
            board_id = re.findall(r'boards/(\d+)/', board_id)[0]
        self.id = board_id
        path = "/boards/{board_id}/".format(
            board_id=board_id,
        )
        self.base_url = urljoin(BASE_URL, path)
        self.further_pin_url_tpl = urljoin(
            self.base_url,
            "?{random_string}"
            "&max={pin_id}"
            "&limit=20"
            "&wfl=1"
        )

        # uninitialized properties
        self.pin_count = None
        self.title = None
        self.description = None
        self._pins = []
        self._init_board() 
Example #12
Source File: vcc_utils.py    From VEX_Syntax with MIT License 6 votes vote down vote up
def context_function_signatures(context, vcc_path=VCC_PATH):
    ctx_info = subprocess.check_output([vcc_path, '-X', context])
    ctx_info = ctx_info.decode('ascii')

    sigs = []
    for s in re.findall('(\w+(\[\])?) (\w+)\((.*)\)', ctx_info):
        sig_str  = '%s %s(%s)' % (s[0], s[2], s[3])
        if s[3] == 'void':
            hint_str = ''
        else:
            hint_str = '%s\n(%s)' % (s[0], s[3].rstrip().lstrip().rstrip(';'))
        args = [x.strip() for x in s[3].split(';')]
        sigs.append({'returns':s[0], 'name':s[2], 'ctx':context, 'args':args, 'str':sig_str,
                     'hint':hint_str})

    return sigs 
Example #13
Source File: tnslsnr-ping.py    From zbxdb with GNU General Public License v3.0 6 votes vote down vote up
def ParseNestedParen(string, level):
    """
    Generate strings contained in nested (), indexing i = level
    """

    if len(re.findall(r"\(", string)) == len(re.findall(r"\)", string)):
        LeftRightIndex = [x for x in zip(
            [Left.start()+1 for Left in re.finditer(r'\(', string)],
            reversed([Right.start() for Right in re.finditer(r'\)', string)]))]

    elif len(re.findall(r"\(", string)) > len(re.findall(r"\)", string)):
        return ParseNestedParen(string + ')', level)

    elif len(re.findall(r"\(", string)) < len(re.findall(r"\)", string)):
        return ParseNestedParen('(' + string, level)

    else:
        return 'fail'

    return [string[LeftRightIndex[level][0]:LeftRightIndex[level][1]]] 
Example #14
Source File: test_re.py    From jawfish with MIT License 6 votes vote down vote up
def test_string_boundaries(self):
        # See http://bugs.python.org/issue10713
        self.assertEqual(re.search(r"\b(abc)\b", "abc").group(1),
                         "abc")
        # There's a word boundary at the start of a string.
        self.assertTrue(re.match(r"\b", "abc"))
        # A non-empty string includes a non-boundary zero-length match.
        self.assertTrue(re.search(r"\B", "abc"))
        # There is no non-boundary match at the start of a string.
        self.assertFalse(re.match(r"\B", "abc"))
        # However, an empty string contains no word boundaries, and also no
        # non-boundaries.
        self.assertEqual(re.search(r"\B", ""), None)
        # This one is questionable and different from the perlre behaviour,
        # but describes current behavior.
        self.assertEqual(re.search(r"\b", ""), None)
        # A single word-character string has two boundaries, but no
        # non-boundary gaps.
        self.assertEqual(len(re.findall(r"\b", "a")), 2)
        self.assertEqual(len(re.findall(r"\B", "a")), 0)
        # If there are no words, there are no boundaries
        self.assertEqual(len(re.findall(r"\b", " ")), 0)
        self.assertEqual(len(re.findall(r"\b", "   ")), 0)
        # Can match around the whitespace.
        self.assertEqual(len(re.findall(r"\B", " ")), 2) 
Example #15
Source File: conftest.py    From sanic with MIT License 5 votes vote down vote up
def generate_url_for_template(template):
        url = template
        for pattern, param_type in re.findall(
            re.compile(r"((?:<\w+:(string|int|number|alpha|uuid)>)+)"),
            template,
        ):
            value = TYPE_TO_GENERATOR_MAP.get(param_type)()
            url = url.replace(pattern, str(value), -1)
        return url 
Example #16
Source File: data_process.py    From nlp-tensorflow with MIT License 5 votes vote down vote up
def tokenizer(sentence):
    tokens = re.findall(r"[\w]+|[^\s\w]", sentence)
    return tokens 
Example #17
Source File: ksp_plugin.py    From SublimeKSP with GNU General Public License v3.0 5 votes vote down vote up
def _extract_completions(self, view, prefix, point):
        # the sublime view.extract_completions implementation doesn't seem to allow for
        # the . character to be included in the prefix irrespectively of the "word_separators" setting
        if '.' in prefix:
            # potentially slow work around for the case where there is a period in the prefix
            code = view.substr(sublime.Region(0, view.size()))
            return sorted(re.findall(re.escape(prefix) + r'[a-zA-Z0-9_.]+', code))
        else:
            return view.extract_completions(prefix, point) # default implementation if no '.' in the prefix 
Example #18
Source File: data_process.py    From nlp-tensorflow with MIT License 5 votes vote down vote up
def tokenizer(sentence):
    tokens = re.findall(r"[\w]+|[^\s\w]", sentence)
    return tokens 
Example #19
Source File: mailslurper_import.py    From sarlacc with MIT License 5 votes vote down vote up
def main():
    config = ConfigParser()
    config.read("./smtpd.cfg")


    store = storage.StorageControl(config)

    cnx = mysql.connector.connect(
            user="root", password="root",
            host="localhost",
            database="sarlacc")

    mysql_cursor = cnx.cursor()

    mysql_cursor.execute("SELECT dateSent, fromAddress, toAddressList, subject, body FROM mailitem;")

    for (dateSent, fromAddress, toAddressList, subject, body) in mysql_cursor:
        # tidy up fromAddress
        fromAddress = cleanupAddress(re.findall(r"<(.*?)>", fromAddress)[0])

        # tidy up toaAdressList
        toAddressList = re.findall(r"<(.*?)>", toAddressList)

        body = str(b64decode(body))

        store.store_email(subject, toAddressList, fromAddress, body, dateSent, [])

    mysql_cursor.close()
    cnx.close() 
Example #20
Source File: webfinger.py    From Webfinger with GNU General Public License v3.0 5 votes vote down vote up
def check_rule(self, key, header, body, title):
		"""指纹识别"""
		try:
			if 'title="' in key:
				if re.findall(rtitle, key)[0].lower() in title.lower():
					return True
			elif 'body="' in key:
				if re.findall(rbody, key)[0] in body:return True
			else:
				if re.findall(rheader, key)[0] in header:return True
		except Exception as e:
			pass 
Example #21
Source File: tumblrdownloader.py    From TumblrDownloader with MIT License 5 votes vote down vote up
def _getimages(self):
		'''
			Get all images returned by Tumblr API
		'''
		site = self.api_url.replace("#start#",str(self._start))

		file = urlopen(site)
		data = file.read().decode('utf8')
		file.close()

		regex		= r"<photo-url max-width=\"" + str(self._resolution) + "\">(.+?)</photo-url>"
		imagelist	= re.findall(regex, data)
		return imagelist 
Example #22
Source File: inputs.py    From NGU-scripts with GNU Lesser General Public License v3.0 5 votes vote down vote up
def get_numbers(s :str) -> Iterable[int]:
        """Finds all numbers in a string"""
        s = Inputs.remove_spaces(s)
        s = Inputs.remove_number_separators(s)
        match = re.findall(r"(\d+(\.\d+E\+\d+)?)", s)
        nums = [int(float(x[0])) for x in match]
        return nums 
Example #23
Source File: data_process.py    From nlp-tensorflow with MIT License 5 votes vote down vote up
def tokenizer(sentence):
    tokens = re.findall(r"[\w]+|[^\s\w]", sentence)
    return tokens 
Example #24
Source File: dvrlogin.py    From hkdvr_login with MIT License 5 votes vote down vote up
def getinfo(host):
    username = "admin"
    password = "12345"
    timeout = 5

    for port in range(80,100):
        try:
            req = requests.get(url='http://'+ username +':'+ password +'@'+ host +':'+ str(port) +'/ISAPI/Security/userCheck',timeout=timeout)
            result = req.text
            status = re.findall(r'<statusValue>(.*)</statusValue>', result)
            if status[0] == '200':
                print '[√] Host http://'+ host +':'+ str(port) +' Login Success!'
        except:
            pass 
Example #25
Source File: adventure.py    From Dumb-Cogs with MIT License 5 votes vote down vote up
def team_saves(self, ctx, team=None):
        # TeamNebNeb didn't show saves also !advernture embark didn't load save
        author = ctx.message.author
        server = ctx.message.server
        channel = ctx.message.channel

        if team is None:
            try:
                team = self.players[server.id][channel.id][author.id]
            except:
                try:
                    teams = self.teams[server.id]["MEMBERS"][author.id]
                    if len(teams) != 1:
                        await self.bot.reply('You are in more than one team. Please specify which team to see the saves for.')
                        return
                    team = teams[0]
                except:
                    await self.bot.reply('You are not in any team. Find one that will recruit you or create you own with `{}team new`'.format(ctx.prefix))
                    return
        team = self._safe_path(team).lower()
        tname = self._team_name(server, team)
        try:
            # http://stackoverflow.com/questions/168409/how-do-you-get-a-directory-listing-sorted-by-creation-date-in-python
            files = list(filter(os.path.isfile, glob.glob('data/adventure/saves/{}/{}/*.save'.format(server.id, team))))
            files.sort(key=os.path.getmtime, reverse=True)
            if not files:
                raise NoSave
            msg = tname+"'s save"
            if len(files) > 1:
                msg += 's'
            reg = re.compile('data/adventure/saves/{}/{}/([^/]*).save'.format(server.id,team)) # just bein verbose
            msg += ':\n' + '\n'.join([str(num+1) + ". " + re.findall(reg, sv)[0] for num,sv in enumerate(files)])
            
            await self.bot.reply(msg)
        except Exception as e:
            print(e)
            await self.bot.reply('The {} team does not have any saves'.format(tname))


    # only leaders can recruit? 
Example #26
Source File: alot.py    From Dumb-Cogs with MIT License 5 votes vote down vote up
def alot_of_checks(self, message):
        if message.author.id == self.bot.user.id:
            return

        server = message.server
        #let PMs
        if server != None:
            if server.id not in self.settings["SERVERS"]:
                #default off
                self.settings["SERVERS"][server.id] = False
            if not self.settings["SERVERS"][server.id]:
                return


        lower = message.content.lower()
        if ' ' not in lower:
            return

        if lower == "what's an alot?":
            await self.bot.send_message(message.channel, "This is an alot: http://hyperboleandahalf.blogspot.com/2010/04/alot-is-better-than-you-at-everything.html")
            return

        lowerm = re.sub(self.alotRegex,"",lower,1)
        if lowerm == lower:
            return


        matchedKeys = re.findall(self.keyRegex,lowerm)
        matchedTags = []
        for k in matchedKeys:
            vals = self.alotTags[k]
            for tag in vals:
                if tag not in matchedTags:
                    matchedTags.append(tag)
        url = ""
        if matchedTags == []:
            url = randchoice(list(self.alots.values()))
        else:
            url = self.alots[randchoice(matchedTags)]
        await self.bot.send_message(message.channel,url) 
Example #27
Source File: lolz.py    From Dumb-Cogs with MIT License 5 votes vote down vote up
def translate_sentence(self, sentence):
        # no links
        if re.findall(self.regex['link'], sentence):
            return sentence

        new_sentence = ''
        # reminder to self...
        # ([\w]*) - match 0 or more a-zA-Z0-9_ group
        # ([\W]*) - match 0 or more non-(see above) group
        for word, space in re.findall("([:\w]*)([^:\w]*)", sentence):
            word = self.translate_word(word)
            # if word != '':
            new_sentence += word + space
        return new_sentence 
Example #28
Source File: avclass_common.py    From BASS with GNU General Public License v2.0 5 votes vote down vote up
def __norm_cat(self, label, hashes):
        if not label:
            return []

        # Initialize list of tokens to return
        ret = []

        # Split label into tokens and process each token
        for token in re.split("[^0-9a-zA-Z]", label):
            # Remove leading and trailing backspace from token
            # and convert to lowercase
            token = token.lower()

            # Remove digits at the end
            # FIXME: What if it is a hash, and removes digits at the end???
            end_len = len(re.findall("\d*$", token)[0])
            if end_len:
                token = token[:-end_len]

            # Ignore short token
            if len(token) < 4:
                continue

            # Ignore token if prefix of a hash of the sample 
            # Most AVs use MD5 prefixes in labels, 
            # but we check SHA1 and SHA256 as well
            hash_token = False
            for hash_str in hashes:
                if hash_str[0:len(token)] == token:
                    hash_token = True
                    break
            if hash_token:
                continue
            for keys, values in self.cat.iteritems():
                if token in values:
                    token = keys
                    ret.append(token)
                    break
                    # Add token
        return ret 
Example #29
Source File: interval.py    From rate.sx with MIT License 5 votes vote down vote up
def parse_length(length):
    """
    Parse ``length``` and return parsed length interval (in seconds)
    or None if length can't be parsed.

    >>> parse_length('1m')
    60
    >>> parse_length('1h1m')
    3660
    >>> parse_length('1')
    >>> parse_length('1hX1m')
    >>> parse_length('1d')
    86400
    >>> parse_length('2M')
    5184000
    """

    sum_ = 0
    joined = ""
    letters = "".join(INTERVAL_LENGTH.keys())
    for number, int_spec in re.findall('([0-9]+)([%s])' % letters, length):
        joined += number + int_spec
        try:
            sum_ += int(number)*INTERVAL_LENGTH[int_spec]
        except KeyError:
            return None

    # if there were some skipped characters,
    # it was not a correct interval specification,
    # return None
    if joined != length:
        return None

    return sum_ 
Example #30
Source File: tnslsnr-ping.py    From zbxdb with GNU General Public License v3.0 5 votes vote down vote up
def getVersion(cmd):
    """send get verson cmd"""
    cmdl = len(cmd).to_bytes(2, byteorder='big')
    pckl = (len(cmd)+len(TNSPacket)).to_bytes(2, byteorder='big')
    TNSPacket[0] = pckl[0]
    TNSPacket[1] = pckl[1]
    TNSPacket[24] = cmdl[0]
    TNSPacket[25] = cmdl[1]
    # print(cmd)

    try:
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.settimeout(3)
            _start = timer()
            s.connect((HOST, PORT))
            scmd = TNSPacket + bytes(cmd, 'utf-8')
            s.sendall(scmd)
            data = s.recv(1024)
            ela = round((timer() - _start)*1000)

            rectxt = (ParseNestedParen(str(data), 0))
            vsnnum = re.findall(r'(?<=VSNNUM=).+?(?=\))',
                                str(rectxt), flags=re.IGNORECASE)
            err = re.findall(r'(?<=ERR=).+?(?=\))',
                             str(rectxt), flags=re.IGNORECASE)
            version = vsnnumToVersion(vsnnum[0])

            return vsnnum[0], err[0], version, ela
    except:
        return 0, "12541", "notfound"