Python re.finditer() Examples

The following are code examples for showing how to use re.finditer(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: malware   Author: JustF0rWork   File: voipbl.py    (license) View Source Project 13 votes vote down vote up
def fetch():
    retval = {}
    content = retrieve_content(__url__)

    if __check__ in content:
        for match in re.finditer(r"(\d+\.\d+\.\d+\.\d+)/(\d+)", content):
            prefix, mask = match.groups()
            mask = int(mask)
            start_int = addr_to_int(prefix) & make_mask(mask)
            end_int = start_int | ((1 << 32 - mask) - 1)
            if 0 <= end_int - start_int <= 1024:
                address = start_int
                while start_int <= address <= end_int:
                    retval[int_to_addr(address)] = (__info__, __reference__)
                    address += 1

    return retval 
Example 2
Project: Sci-Finder   Author: snverse   File: parser.py    (license) View Source Project 7 votes vote down vote up
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv 
Example 3
Project: PlasoScaffolder   Author: ClaudiaSaxer   File: sqlite_type_helper.py    (license) View Source Project 6 votes vote down vote up
def _GetEndOfTableIfNotAlias(self, query: str, column_name: str) -> bool:
    """Getting the start of the column if it is not an alias column

    Args:
      query (str): the query to be searched
      column_name (str): the name to be searched for

    Returns:
      bool: 0 if no column could be found or the starting position of the
          column
    """
    wrong_positions = [name.start() for name in
                       re.finditer('.{0} as'.format(column_name), query)]
    found_positions = []
    for space in self._POSSIBLEQUERYSEPERATOR:
      found_positions += [name.start() for name in
                          re.finditer('.{0}{1}'.format(column_name, space),
                                      query)]

    position = set(found_positions) - set(wrong_positions)

    if position:
      return position.pop()
    else:
      return 0 
Example 4
Project: whatstyle   Author: mikr   File: whatstyle.py    (MIT License) View Source Project 6 votes vote down vote up
def register_options(self):
        # type: () -> None
        """Parse options from text like this:
        Preferences:
          [+|-]alignArguments                                        Enable/disable ...
          ...
          [+|-]spacesWithinPatternBinders                            Enable/disable ...
          -alignSingleLineCaseStatements.maxArrowIndent=[1-100]      Set Maximum number ...
          -indentSpaces=[1-10]                                       Set Number of spaces ...
        """
        exeresult = run_executable(self.exe, ['--help'], cache=self.cache)
        options = []
        text = unistr(exeresult.stdout)
        for m in re.finditer(r'^  (\[\+\|-\]|-)([a-z][a-zA-Z.]+)(?:=\[(\d+)-(\d+)\])?', text,
                             re.MULTILINE):
            optionprefix, optionname, start, end = m.groups()
            if start is None:
                optiontype = 'bool'
                configs = [True, False]  # type: List[OptionValue]
            else:
                optiontype = 'int'
                configs = list(inclusiverange(int(start), int(end)))
            options.append(option_make(optionname, optiontype, configs))
        self.styledefinition = styledef_make(options) 
Example 5
Project: nettools   Author: germandutchwindtunnels   File: Cisco.py    (GNU General Public License v2.0) View Source Project 6 votes vote down vote up
def filter_output(self, output, regex):
        """ Filter output from a command """
        result = {}
        result_list = []
        if isinstance(output, str):
            lines = [output]
        else:
            lines = output

        for line in lines:
            iterator = re.finditer(regex, line)
            try:
                while True:
                    cur = iterator.next()
                    result = cur.groupdict()
                    result['hostname'] = self.host
                    result_list.append(result)
            except StopIteration:
                pass

        return result_list 
Example 6
Project: swjtu-pyscraper   Author: Desgard   File: parser.py    (MIT License) View Source Project 6 votes vote down vote up
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv 
Example 7
Project: SerpScrap   Author: ecoron   File: urlscrape.py    (MIT License) View Source Project 6 votes vote down vote up
def split_into_sentences(text):
    potential_end_pat = re.compile(r"".join([
        r"([\w\.'’&\]\)]+[\.\?!])",  # A word that ends with punctuation
        r"([‘’“”'\"\)\]]*)",  # Followed by optional quote/parens/etc
        r"(\s+(?![a-z\-–—]))",  # Followed by whitespace + non-(lowercase or dash)
        ]),
        re.U
    )
    dot_iter = re.finditer(potential_end_pat, text)
    end_indices = [
        (x.start() + len(x.group(1)) + len(x.group(2)))
        for x in dot_iter
        if is_sentence_ender(x.group(1))
    ]
    spans = zip([None] + end_indices, end_indices + [None])
    sentences = [
        text[start:end].strip() for start, end in spans
    ]
    return sentences 
Example 8
Project: infi.clickhouse_orm   Author: Infinidat   File: fields.py    (license) View Source Project 6 votes vote down vote up
def create_ad_hoc_field(cls, db_type):
        '''
        Give an SQL column description such as "Enum8('apple' = 1, 'banana' = 2, 'orange' = 3)"
        this method returns a matching enum field.
        '''
        import re
        try:
            Enum # exists in Python 3.4+
        except NameError:
            from enum import Enum # use the enum34 library instead
        members = {}
        for match in re.finditer("'(\w+)' = (\d+)", db_type):
            members[match.group(1)] = int(match.group(2))
        enum_cls = Enum('AdHocEnum', members)
        field_class = Enum8Field if db_type.startswith('Enum8') else Enum16Field
        return field_class(enum_cls) 
Example 9
Project: routersploit   Author: reverse-shell   File: __init__.py    (license) View Source Project 6 votes vote down vote up
def tokenize(token_specification, text):
    Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])

    token_specification.extend((
        ('NEWLINE', r'\n'),          # Line endings
        ('SKIP', r'.'),              # Any other character
    ))

    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, text):
        kind = mo.lastgroup
        value = filter(lambda x: x is not None, mo.groups())
        if kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
        elif kind == 'SKIP':
            pass
        else:
            column = mo.start() - line_start
            yield Token(kind, value, line_num, column, mo) 
Example 10
Project: malware   Author: JustF0rWork   File: deepviz.py    (license) View Source Project 6 votes vote down vote up
def fetch():
    retval = {}
    content = retrieve_content(__url__)

    if __check__ in content:
        for match in re.finditer(r"(?m)^([\d.]+),IP used by ([^,]+) C&C", content):
            retval[match.group(1)] = ("%s (malware)" % match.group(2).lower(), __reference__)

    for row in re.finditer(r"(?s)<tr>(.+?)</tr>", content):
        if "<span>100%</span>" in row.group(1):
            domain = re.search(r"get_data_domain\('([^']+)", row.group(1))
            if domain:
                tag = re.search(r">(trojan|spyware|adware)\.([^<]+)", row.group(1))
                retval[domain.group(1)] = (("%s (malware)" % tag.group(2)) if tag else "malware", __reference__)

    return retval 
Example 11
Project: BITSInject   Author: SafeBreach-Labs   File: BITSInject.py    (license) View Source Project 6 votes vote down vote up
def remove_job(self, job_guid):
        """
        Remove a job given its GUID, including decreasing the job counter of the queue
        :param job_guid: the GUID of the job to remove from queue
        :return: void
        """
        global _os_ver
        occurs = [(oc.start(), oc.end()) for oc in
                  list(re.finditer('%s' % (BITSStateFile.JOB_HEADER_FOOTER_HEX[_os_ver].decode('hex')),
                                   self._original_data))
                  ]
        if occurs:
            self.set_jobs_counter(self.get_jobs_counter() - 1)
            state_off = self._get_job_state_off(job_guid)
            new_data_list = list(self._new_data)
            job_start_off, job_end_off = BITSStateFile._get_job_limits_by_index_in_between(occurs, state_off)
            new_data_list = new_data_list[:job_start_off + 1] + new_data_list[job_end_off + 1:]
            self._update_new_data(0, "".join(new_data_list))
            self.commit() 
Example 12
Project: Sci-Finder   Author: snverse   File: parser.py    (license) View Source Project 6 votes vote down vote up
def split_arg_string(string):
    """Given an argument string this attempts to split it into small parts."""
    rv = []
    for match in re.finditer(r"('([^'\\]*(?:\\.[^'\\]*)*)'"
                             r'|"([^"\\]*(?:\\.[^"\\]*)*)"'
                             r'|\S+)\s*', string, re.S):
        arg = match.group().strip()
        if arg[:1] == arg[-1:] and arg[:1] in '"\'':
            arg = arg[1:-1].encode('ascii', 'backslashreplace') \
                .decode('unicode-escape')
        try:
            arg = type(string)(arg)
        except UnicodeError:
            pass
        rv.append(arg)
    return rv 
Example 13
Project: CoBL-public   Author: lingdb   File: views.py    (license) View Source Project 6 votes vote down vote up
def post(self, request, pk, **kwargs):
        instance = CognateClassCitation.objects.get(id=pk)
        form = EditCognateClassCitationForm(request.POST, instance=instance)
        try:
            # validate {ref foo ...}
            s = Source.objects.all().filter(deprecated=False)
            pattern = re.compile(r'(\{ref +([^\{]+?)(:[^\{]+?)? *\})')
            for m in re.finditer(pattern, form.data['comment']):
                foundSet = s.filter(shorthand=m.group(2))
                if not foundSet.count() == 1:
                    raise ValidationError('In field “Comment” source shorthand “%(name)s” is unknown.', 
                                                params={'name': m.group(2)})
            form.save()
        except ValidationError as e:
            messages.error(
                request,
                'Sorry, the server had problems updating the cognate citation. %s' % e)
            return self.render_to_response({"form": form})
        return HttpResponseRedirect(reverse('cognate-class-citation-detail', args=[pk])) 
Example 14
Project: purelove   Author: hucmosin   File: __init__.py    (license) View Source Project 6 votes vote down vote up
def tokenize(token_specification, text):
    Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column', 'mo'])

    token_specification.extend((
        ('NEWLINE', r'\n'),          # Line endings
        ('SKIP', r'.'),              # Any other character
    ))

    tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
    line_num = 1
    line_start = 0
    for mo in re.finditer(tok_regex, text):
        kind = mo.lastgroup
        value = filter(lambda x: x is not None, mo.groups())
        if kind == 'NEWLINE':
            line_start = mo.end()
            line_num += 1
        elif kind == 'SKIP':
            pass
        else:
            column = mo.start() - line_start
            yield Token(kind, value, line_num, column, mo) 
Example 15
Project: annotated-py-sqlalchemy   Author: hhstore   File: ansisql.py    (license) View Source Project 6 votes vote down vote up
def after_compile(self):
        if self.engine.positional:
            self.positiontup = []
            match = r'%\(([\w_]+)\)s'
            params = re.finditer(match, self.strings[self.statement])
            for p in params:
                self.positiontup.append(p.group(1))
            if self.engine.paramstyle=='qmark':
                self.strings[self.statement] = re.sub(match, '?', self.strings[self.statement])
            elif self.engine.paramstyle=='format':
                self.strings[self.statement] = re.sub(match, '%s', self.strings[self.statement])
            elif self.engine.paramstyle=='numeric':
                i = [0]
                def getnum(x):
                    i[0] += 1
                    return str(i[0])
                self.strings[self.statement] = re.sub(match, getnum, self.strings[self.statement]) 
Example 16
Project: pymotw3   Author: reingart   File: re_test_patterns.py    (license) View Source Project 6 votes vote down vote up
def test_patterns(text, patterns):
    """Given source text and a list of patterns, look for
    matches for each pattern within the text and print
    them to stdout.
    """
    # Look for each pattern in the text and print the results
    for pattern, desc in patterns:
        print("'{}' ({})\n".format(pattern, desc))
        print("  '{}'".format(text))
        for match in re.finditer(pattern, text):
            s = match.start()
            e = match.end()
            substr = text[s:e]
            n_backslashes = text[:s].count('\\')
            prefix = '.' * (s + n_backslashes)
            print("  {}'{}'".format(prefix, substr))
        print()
    return 
Example 17
Project: Sparcli   Author: 4Kaylum   File: Internet.py    (license) View Source Project 6 votes vote down vote up
def pun(self, ctx):
        '''
        Gives a random pun from the depths of the internet
        '''

        # Read from page
        async with self.session.get('http://www.punoftheday.com/cgi-bin/randompun.pl') as r:
            page = await r.text()

        # Scrape the raw HTML
        r = r'(<div class=\"dropshadow1\">\n<p>).*(</p>\n</div>)'
        foundPun = [i for i in finditer(r, page)][0].group()

        # Filter out the pun
        r = r'(>).*(<)'
        filteredPun = [i for i in finditer(r, foundPun)][0].group()

        # Boop it out
        fullPun = filteredPun[1:-1]
        await self.sparcli.say(fullPun) 
Example 18
Project: Sparcli   Author: 4Kaylum   File: Steam.py    (license) View Source Project 6 votes vote down vote up
def steamid(self, ctx, *, gameURL:str):
        '''
        Gets the information of a game from Steam URL
        '''

        await self.sparcli.send_typing(ctx.message.channel)

        # Grab the game ID from the user input
        regexMatches = finditer(r'\d+', gameURL)
        regexList = [i for i in regexMatches]

        # Parse it as a group
        if len(regexList) == 0:
            await self.sparcli.say('I was unable to find the ID of that game on the Steam API.')
            return
        else:
            await self.getSteamGameInfo(regexList[0].group()) 
Example 19
Project: Sparcli   Author: 4Kaylum   File: Messages.py    (license) View Source Project 6 votes vote down vote up
def messageToEmbed(message):

    # Get some default values that'll be in the embed
    author = message.author 
    description = message.content
    image = False

    # Check to see if any images were added
    regexMatch = r'.+(.png)|.+(.jpg)|.+(.jpeg)|.+(.gif)'
    if len(message.attachments) > 0:
        attachment = message.attachments[0]
        matchList = [i for i in finditer(regexMatch, attachment['filename'])]
        if len(matchList) > 0:
            image = attachment['url']

    # Get the time the message was created
    createdTime = '.'.join(str(message.timestamp).split('.')[:-1])

    # Make and return the embed
    return makeEmbed(user=author, description=description, image=image, footer=createdTime) 
Example 20
Project: deep_throat   Author: wdbm   File: deep_throat.py    (license) View Source Project 6 votes vote down vote up
def match_and_replace(
    text    = None,
    rule    = None,
    phoneme = None
    ):
    """
    Replace found text from a single rule.
    """
    # Find all rule matches.
    matches = [(match.start(), match.end()) for \
        match in re.finditer(rule, text)]
    # Start from behind, so replace in-place.
    matches.reverse()
    # Convert to characters because strings are immutable.
    characters = list(text)
    for start, end in matches:
        characters[start:end] = phoneme
    # Convert back to string.
    return "".join(characters) 
Example 21
Project: expdevBadChars   Author: mgeeky   File: badchars.py    (license) View Source Project 6 votes vote down vote up
def unpack_dword(line):
        outs = ''
        i = 0
        for m in re.finditer(r'((?:0x[0-9a-f]{8},?\s*))', line):
            l = m.group(0)
            l = l.replace(',', '')
            l = l.replace(' ', '')
            dword = int(l, 16)
            unpack = reversed([
                (dword & 0xff000000) >> 24,
                (dword & 0x00ff0000) >> 16,
                (dword & 0x0000ff00) >>  8,
                (dword & 0x000000ff)
            ])
            i += 4
            for b in unpack:
                outs += '%02x' % b

        out(dbg("After callback ('%s')" % outs))
        return BytesParser.formats_compiled['hexstring'].match(outs) 
Example 22
Project: Qyoutube-dl   Author: lzambella   File: youtube.py    (license) View Source Project 6 votes vote down vote up
def extract_videos_from_page(self, page):
        ids_in_page = []
        titles_in_page = []
        for mobj in re.finditer(self._VIDEO_RE, page):
            # The link with index 0 is not the first video of the playlist (not sure if still actual)
            if 'index' in mobj.groupdict() and mobj.group('id') == '0':
                continue
            video_id = mobj.group('id')
            video_title = unescapeHTML(mobj.group('title'))
            if video_title:
                video_title = video_title.strip()
            try:
                idx = ids_in_page.index(video_id)
                if video_title and not titles_in_page[idx]:
                    titles_in_page[idx] = video_title
            except ValueError:
                ids_in_page.append(video_id)
                titles_in_page.append(video_title)
        return zip(ids_in_page, titles_in_page) 
Example 23
Project: Qyoutube-dl   Author: lzambella   File: metadatafromtitle.py    (license) View Source Project 6 votes vote down vote up
def format_to_regex(self, fmt):
        """
        Converts a string like
           '%(title)s - %(artist)s'
        to a regex like
           '(?P<title>.+)\ \-\ (?P<artist>.+)'
        """
        lastpos = 0
        regex = ""
        # replace %(..)s with regex group and escape other string parts
        for match in re.finditer(r'%\((\w+)\)s', fmt):
            regex += re.escape(fmt[lastpos:match.start()])
            regex += r'(?P<' + match.group(1) + '>.+)'
            lastpos = match.end()
        if lastpos < len(fmt):
            regex += re.escape(fmt[lastpos:len(fmt)])
        return regex 
Example 24
Project: Qyoutube-dl   Author: lzambella   File: jsinterp.py    (license) View Source Project 6 votes vote down vote up
def extract_object(self, objname):
        obj = {}
        obj_m = re.search(
            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
            r'\s*(?P<fields>([a-zA-Z$0-9]+\s*:\s*function\(.*?\)\s*\{.*?\}(?:,\s*)?)*)' +
            r'\}\s*;',
            self.code)
        fields = obj_m.group('fields')
        # Currently, it only supports function definitions
        fields_m = re.finditer(
            r'(?P<key>[a-zA-Z$0-9]+)\s*:\s*function'
            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
            fields)
        for f in fields_m:
            argnames = f.group('args').split(',')
            obj[f.group('key')] = self.build_function(argnames, f.group('code'))

        return obj 
Example 25
Project: python-tutorial   Author: Akuli   File: common.py    (license) View Source Project 6 votes vote down vote up
def find_links(file):
    """Find all markdown links in a file object.

    Yield (lineno, regexmatch) tuples.
    """
    # don't yield same link twice
    seen = set()

    # we need to loop over the file two lines at a time to support
    # multi-line (actually two-line) links, so this is kind of a mess
    firsts, seconds = itertools.tee(file)
    next(seconds)  # first line is never second line

    # we want 1-based indexing instead of 0-based and one-line links get
    # caught from linepair[1], so we need to start at two
    for lineno, linepair in enumerate(zip(firsts, seconds), start=2):
        lines = linepair[0] + linepair[1]
        for match in re.finditer(_LINK_REGEX, lines, flags=re.DOTALL):
            if match.group(0) not in seen:
                seen.add(match.group(0))
                yield match, lineno 
Example 26
Project: kingpin   Author: pinterest   File: zk_util.py    (license) View Source Project 6 votes vote down vote up
def split_problematic_endpoints_line(line):
    """
    If the line of host contains more than one ":",
    for example: 10.99.184.69:900010.37.170.125:9006
    this splits the line and return a list of correct endpoints

    Args:
        ``line``: the problemtic line which contains more than one endpoint string.

    Returns:
        the splitted list of the problematic line which has correct endpoint strings.
    """

    colon_parts = line.strip().split(":")
    offset = len(colon_parts[-1])
    colon_positions = [m.start() for m in re.finditer(':', line)]
    start = 0
    split_parts = []
    for colon_position in colon_positions:
        end = colon_position + offset + 1
        split_part = line[start:end]
        split_parts.append(split_part)
        start = end
    return split_parts 
Example 27
Project: gwot-physical   Author: JanVan01   File: utils.py    (license) View Source Project 6 votes vote down vote up
def get_classes(self, folder, class_suffix, selected = None):
		classes = []
		for file in glob.glob(folder + "/*.py"):
			handle = open(file, "r")
			content = handle.read()
			handle.close()
			
			module = folder.replace('/', '.').replace('\\', '.') + '.' + os.path.basename(file).replace('.py', '')

			regexp = "\sclass\s+([\w\d]+"+class_suffix+")\s*\(([\w\d]*)\)\s*:\s"
			for m in re.finditer(regexp, content):
				parent_class = m.group(2)
				if len(parent_class) == 0 or parent_class == 'object':
					continue
				class_name = m.group(1)
				classes.append(module + '.' + class_name)
				
		return classes 
Example 28
Project: thesis-check   Author: albalitz   File: weasel_words.py    (license) View Source Project 6 votes vote down vote up
def analyze(line, linenum, lang):
    annotations = []

    if lang is Language.en_EN or lang is None:
        weasel_words = WEASEL_WORDS_EN
    elif lang is Language.de_DE:
        weasel_words = WEASEL_WORDS_DE

    for weasel_pattern in weasel_words:
        for match in re.finditer(weasel_pattern, line, flags=re.IGNORECASE):
            if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
                continue

            index = match.start()
            annotation = WeaselWord(linenum, line, index, word=match.group(0))
            annotations.append(annotation)

    return annotations 
Example 29
Project: thesis-check   Author: albalitz   File: contractions.py    (license) View Source Project 6 votes vote down vote up
def analyze(line, linenum, lang):
    annotations = []

    if lang is Language.en_EN or lang is None:
        contractions = CONTRACTIONS_EN
    elif lang is Language.de_DE:
        contractions = CONTRACTIONS_DE

    for pattern in contractions:
        for match in re.finditer(pattern, line, flags=re.IGNORECASE):
            if (not verify_match(match, line)) or (not matches_whole_words(match, line)):
                continue

            index = match.start()
            replaced_contraction = re.sub(pattern, contractions[pattern], match.group(0), flags=re.IGNORECASE)
            annotation = Contraction(linenum, line, index, word=match.group(0), contraction=replaced_contraction)
            annotations.append(annotation)

    return annotations 
Example 30
Project: astropy-bot   Author: astropy   File: github_api.py    (license) View Source Project 6 votes vote down vote up
def paged_github_json_request(url, headers=None):

    response = requests.get(url, headers=headers)
    assert response.ok, response.content
    results = response.json()

    if 'Link' in response.headers:

        links = response.headers['Link']

        # There are likely better ways to parse/extract the link information
        # but here we just find the last page number mentioned in the header
        # 'Link' section and then loop over all pages to get the comments
        last_match = list(re.finditer('page=[0-9]+', links))[-1]
        last_page = int(links[last_match.start():last_match.end()].split('=')[1])

        # If there are other pages, just loop over them and get all the
        # comments
        if last_page > 1:
            for page in range(2, last_page + 1):
                response = requests.get(url + '?page={0}'.format(page), headers=headers)
                assert response.ok, response.content
                results += response.json()

    return results 
Example 31
Project: Python-GoogleDrive-VideoStream   Author: ddurdle   File: gdrive_api2.py    (license) View Source Project 6 votes vote down vote up
def getOfflineMediaList(self, folderName=False, title=False, contentType=7):

        mediaFiles = []
        for r1 in re.finditer('\{(.*?)\"spaces\"\:' , entryS, re.DOTALL):
            entry = r1.group(1)
            media = self.getMediaPackage(entry, folderName=folderName, contentType=contentType, fanart=folderFanart, icon=folderIcon)
            if media is not None:
                mediaFiles.append(media)



        return mediaFiles





    ##
    # retrieve a list of videos, using playback type stream
    #   parameters: prompt for video quality (optional), cache type (optional)
    #   returns: list of videos
    ## 
Example 32
Project: sequana   Author: sequana   File: sequence.py    (license) View Source Project 6 votes vote down vote up
def get_occurences(self, pattern, overlap=False):
        """Return position of the input pattern in the sequence

        ::

            >>> from sequana import Sequence
            >>> s = Sequence('ACGTTTTACGT')
            >>> s.get_occurences("ACGT")
            [0, 7]

        """
        if overlap is False:
            res = [m.start() for m in re.finditer(pattern, self.sequence)]
        elif overlap is True:
            res = [m.start() for m in re.finditer('(?=%s)'%pattern, self.sequence)]
        return res

        # reverse find-all without overlaps, you can combine positive and
        # negative lookahead into an expression like this:
        #res = [m.start() for m in re.finditer('(?=%s)(?!.{1,%d}%s)' % (search,
        #    len(pattern)-1, pattern), 'ttt')] 
Example 33
Project: core-python   Author: yidao620c   File: lover.py    (license) View Source Project 6 votes vote down vote up
def find_cute(url):
    # ??????
    r = requests.get(url)
    # ??r.encoding
    encoding = re.search('content="text/html;\s*charset=(.*?)"', r.text).group(1)
    r.encoding = encoding
    # print(r.text)
    finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text)
    i = random.randint(0, sum(1 for _ in finds))
    start = 0
    finds = re.finditer(r'<p>\s*([^>]*?)\s*\n', r.text)
    for f in finds:
        if start == i:
            print(f.group(1))
            break
        start += 1 
Example 34
Project: python-   Author: secondtonone1   File: tarfile.py    (license) View Source Project 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 35
Project: python-   Author: secondtonone1   File: tarfile.py    (license) View Source Project 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 36
Project: my-first-blog   Author: AnkurBegining   File: tarfile.py    (license) View Source Project 5 votes vote down vote up
def _proc_gnusparse_00(self, next, pax_headers, buf):
        """Process a GNU tar extended sparse header, version 0.0.
        """
        offsets = []
        for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
            offsets.append(int(match.group(1)))
        numbytes = []
        for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
            numbytes.append(int(match.group(1)))
        next.sparse = list(zip(offsets, numbytes)) 
Example 37
Project: mysql-er   Author: StefanLim0   File: SqlExtractor.py    (MIT License) View Source Project 5 votes vote down vote up
def get_selects_from_text(content):
        sqls = []
        select_keyword = '@Select\s*\('
        for m in re.finditer(select_keyword, content):
            rparen_pos = MybatisInlineSqlExtractor.find_right_paren_pos(content[m.end():])
            if rparen_pos < 0:
                continue
            sqls.append(SQL('', eval(content[m.end():m.end() + rparen_pos].replace('\r', '').replace('\n', '')).strip()))
        return sqls 
Example 38
Project: cellranger   Author: 10XGenomics   File: reference.py    (license) View Source Project 5 votes vote down vote up
def get_properties_dict(self, properties_str):
        if isinstance(properties_str, dict):
            return properties_str

        properties = collections.OrderedDict()
        pattern = re.compile('(\S+?)\s*"(.*?)"')
        for m in re.finditer(pattern, properties_str):
            key = m.group(1)
            value = m.group(2)
            properties[key] = value
        return properties 
Example 39
Project: cellranger   Author: 10XGenomics   File: reference.py    (license) View Source Project 5 votes vote down vote up
def get_unambiguous_regions(reference_path):
    '''Calculate regions corresponding to unambiguous bases'''
    chrom_map = {}
    for chrom, seq in open_reference(reference_path).items():
        regions = [(m.start(), m.end()) for m in re.finditer('[acgtACGT]+', seq[:])]
        chrom_map[chrom] = Regions(regions=regions)
    return chrom_map 
Example 40
Project: tokenize-uk   Author: lang-uk   File: tokenize_uk.py    (MIT License) View Source Project 5 votes vote down vote up
def tokenize_sents(string):
    """
    Tokenize input text to sentences.

    :param string: Text to tokenize
    :type string: str or unicode
    :return: sentences
    :rtype: list of strings
    """
    string = six.text_type(string)

    spans = []
    for match in re.finditer('[^\s]+', string):
        spans.append(match)
    spans_count = len(spans)

    rez = []
    off = 0

    for i in range(spans_count):
        tok = string[spans[i].start():spans[i].end()]
        if i == spans_count - 1:
            rez.append(string[off:spans[i].end()])
        elif tok[-1] in ['.', '!', '?', '…', '»']:
            tok1 = tok[re.search('[.!?…»]', tok).start()-1]
            next_tok = string[spans[i + 1].start():spans[i + 1].end()]
            if (next_tok[0].isupper()
                and not tok1.isupper()
                and not (tok[-1] != '.'
                         or tok1[0] == '('
                         or tok in ABBRS)):
                rez.append(string[off:spans[i].end()])
                off = spans[i + 1].start()

    return rez 
Example 41
Project: whatstyle   Author: mikr   File: whatstyle.py    (MIT License) View Source Project 5 votes vote down vote up
def register_options(self):
        # type: () -> None
        """Parse options from text like this:
        # Uncrustify 0.63
        #
        # General options
        #

        newlines                                  { Auto, LF, CR, CRLF }
          The type of line endings

        input_tab_size                            Number
          The original size of tabs in the input

        indent_align_string                       { False, True }
          Whether to indent strings broken by '\' so that they line up
        """
        exeresult = run_executable(self.exe, ['--show-config'], cache=self.cache)
        options = []
        text = unistr(exeresult.stdout)
        for m in re.finditer(r'^(\w+)\s+(.*?)\s*$', text, re.MULTILINE):
            optionname, optiondesc = m.group(1), m.group(2)
            if optiondesc.startswith('{'):
                optiontype = 'Enum'
                configs = optiondesc[1:-1].strip().split(', ')
                configs = [c.lower() for c in configs]
            else:
                optiontype = optiondesc
                configs = []
            options.append(option_make(optionname, optiontype, configs))
        self.styledefinition = styledef_make(options) 
Example 42
Project: plugin.video.exodus   Author: lastship   File: jsunfuck.py    (license) View Source Project 5 votes vote down vote up
def __handle_tostring(self):
        for match in re.finditer('(\d+)\[t\+o\+S\+t\+r\+i\+n\+g\](\d+)', self.js):
            repl = to_base(match.group(1), match.group(2))
            self.js = self.js.replace(match.group(0), repl) 
Example 43
Project: plugin.video.exodus   Author: lastship   File: dom_parser.py    (license) View Source Project 5 votes vote down vote up
def __get_attribs(element):
    attribs = {}
    for match in re.finditer('''\s+(?P<key>[^=]+)=\s*(?:(?P<delim>["'])(?P<value1>.*?)(?P=delim)|(?P<value2>[^"'][^>\s]*))''', element):
        match = match.groupdict()
        value1 = match.get('value1')
        value2 = match.get('value2')
        value = value1 if value1 is not None else value2
        if value is None: continue
        attribs[match['key'].lower().strip()] = value
    return attribs 
Example 44
Project: Radiojavan   Author: nimasaj   File: RJ_3.5.py    (license) View Source Project 5 votes vote down vote up
def album(URL):
    track_list=[]
    if (URL.find('?index=')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?index=')]
        current_track_no=int(URL[len(a1)+len('?index='):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    elif (URL.find('?start')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?start')]
        current_track_no=int(URL[len(a1)+len('?start'):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    else:
        all_track_nr=(html.count('?index='))//2
        a1=URL
        current_track_no='null'
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    i=0
    b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')]
    b_len=len('<span class="song_name">')
    iter=re.finditer(r'<span class="song_name">', b)
    indices=[m.start(0) for m in iter]
    while i<all_track_nr:
        track_list.append('%s?index=%d'%(a1,i))
        d=(b[indices[i]:].find('</span>'))
        track_name=b[indices[i]+b_len:indices[i]+d]
        track_list.append(track_name)
        i+=1
    return(track_list) 
Example 45
Project: Radiojavan   Author: nimasaj   File: RJ_3.py    (license) View Source Project 5 votes vote down vote up
def album(URL):
    track_list=[]
    if (URL.find('?index=')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?index=')]
        current_track_no=int(URL[len(a1)+len('?index='):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    elif (URL.find('?start')>0):
        all_track_nr=((html.count('?index='))//2)-1
        a1=URL[:URL.find('?start')]
        current_track_no=int(URL[len(a1)+len('?start'):])
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    else:
        all_track_nr=(html.count('?index='))//2
        a1=URL
        current_track_no='null'
        ID=a1[a1.find('/album/')+len('/album/'):]
        track_list.append('%s'%current_track_no)
    i=0
    b=html[html.find('<span class="song_name">'):html.rfind('<span class="song_name">')]
    b_len=len('<span class="song_name">')
    iter=re.finditer(r'<span class="song_name">', b)
    indices=[m.start(0) for m in iter]
    while i<all_track_nr:
        track_list.append('%s?index=%d'%(a1,i))
        d=(b[indices[i]:].find('</span>'))
        track_name=b[indices[i]+b_len:indices[i]+d]
        track_list.append(track_name)
        i+=1
    return(track_list) 
Example 46
Project: googletranslate.popclipext   Author: wizyoung   File: utils.py    (MIT License) View Source Project 5 votes vote down vote up
def legacy_format_json(original):
    # save state
    states = []
    text = original
    
    # save position for double-quoted texts
    for i, pos in enumerate(re.finditer('"', text)):
        # pos.start() is a double-quote
        p = pos.start() + 1
        if i % 2 == 0:
            nxt = text.find('"', p)
            states.append((p, text[p:nxt]))

    # replace all weired characters in text
    while text.find(',,') > -1:
        text = text.replace(',,', ',null,')
    while text.find('[,') > -1:
        text = text.replace('[,', '[null,')

    # recover state
    for i, pos in enumerate(re.finditer('"', text)):
        p = pos.start() + 1
        if i % 2 == 0:
            j = int(i / 2)
            nxt = text.find('"', p)
            # replacing a portion of a string
            # use slicing to extract those parts of the original string to be kept
            text = text[:p] + states[j][1] + text[nxt:]

    converted = json.loads(text)
    return converted 
Example 47
Project: BioNanoAnalyst   Author: AppliedBioinformatics   File: Analysis.py    (GNU General Public License v3.0) View Source Project 5 votes vote down vote up
def make_RefCmap(fasta_file, enz=None, min_len=20, min_nsite=5, path=None):
    name = fasta_file.rsplit('.',1)[0].split('/')[-1]
    index = 0
    enzymes = {'BspQI':'GCTCTTC',
                'BbvCI':'CCTCAGC',
                'Bsml':'GAATGC',
                'BsrDI':'GCAATG',
                'bseCI':'ATCGAT',
                'BssSI':'CACGAG'}
    try:
        cmap_file='%s/%s_%s.cmap'%(path,name,enz)
        forwards = enzymes[enz]
        reverse = str(Seq(forwards).reverse_complement())
        with open (cmap_file,'a') as ref_cmap:
            ref_cmap.write('# CMAP File Version:\t0.1\n')
            ref_cmap.write('# Label Channels:\t1\n')
            ref_cmap.write('# Nickase Recognition Site 1:\t%s\n'%forwards)
            ref_cmap.write('# Enzyme1:\tNt.%s\n'%enz)
            ref_cmap.write('# Number of Consensus Nanomaps:\tN/A\n')
            ref_cmap.write('#h CMapId\tContigLength\tNumSites\tSiteID\tLabelChannel\tPosition\tStdDev\tCoverage\tOccurrence\n')
            ref_cmap.write('#f int\tfloat\tint\tint\tint\tfloat\tfloat\tint\tint\n')
            for seqs in SeqIO.parse(fasta_file,'fasta'):
       	        seq = str(seqs.seq.upper())
       	        seq_len = len(seq)
       	        index+=1
       	        if seq_len >= min_len*1000:
                    nsites = len(re.findall('%s|%s'%(forwards,reverse),seq))
                    if nsites >=min_nsite:
                        j=1
                        for o in re.finditer('%s|%s'%(forwards,reverse),seq):
                            ref_cmap.write('%s\t%.1f\t%d\t%d\t1\t%.1f\t1.0\t1\t1\n'%(index,seq_len,nsites,j,o.start()+1))
                            j+=1
                        ref_cmap.write('%s\t%.1f\t%d\t%d\t0\t%.1f\t0.0\t1\t0\n'%(index,seq_len,nsites,j,seq_len))
    except:
        pass 
Example 48
Project: mod_stat   Author: DadoZe   File: mod_stat.py    (license) View Source Project 5 votes vote down vote up
def formatString(self, text, stats, not_found_replacement = None):
        #try:
        values = stats['values']
        for m in re.finditer("{{([gc]:)?([^}:]*)((:d)|(:1f)|:(\d+)|:(\d+)\.(\d+)f|(:\+d)|(:\+1f))?}}", text):
            g, g1, key, g2, sg1, sg2, sg3, sg4a, sg4b, sg5, sg6 = m.group(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
            if not key in values:
                if not_found_replacement is None:
                    if d: LOG_NOTE('No key in values of %s (%s)' % (stats.get('_type', 'unknown'), key))
                else:
                    text = text.replace('%s' % g, not_found_replacement)
            elif g1 is None:
                if g2 is None:
                    text = text.replace('{{%s}}' % key, self.applyMacros(values[key]))
                elif sg1:
                    text = text.replace('{{%s:d}}' % key, self.applyMacros(values[key], 0))
                elif sg2:
                    text = text.replace('{{%s:1f}}' % key, self.applyMacros(values[key], 1))
                elif sg3:
                    xx = int(sg3)
                    text = text.replace('{{%s:%d}}' % (key, xx), self.applyMacros2(values[key], xx))
                elif sg4a:
                    xx, yy = int(sg4a), int(sg4b)
                    text = text.replace('{{%s:%d.%df}}' % (key, xx, yy), self.applyMacros2(values[key], xx, yy))
                elif sg5:
                    text = text.replace('{{%s:+d}}' % key, self.applyMacros(values[key], 0, '+'))
                elif sg6:
                    text = text.replace('{{%s:+1f}}' % key, self.applyMacros(values[key], 1, '+'))
            elif g1=="g:":
                text = text.replace('{{g:%s}}' % key, stats['gradient'][key])
            elif g1=="c:":
                text = text.replace('{{c:%s}}' % key, stats['palette'][key])
        #except:
        #  LOG_CURRENT_EXCEPTION()
        #finally:
        return text 
Example 49
Project: yargy   Author: natasha   File: tokenizer.py    (MIT License) View Source Project 5 votes vote down vote up
def __call__(self, text):
        for match in re.finditer(self.regexp, text):
            name = match.lastgroup
            value = match.group(0)
            span = match.span()
            rule = self.mapping[name]
            token = rule(value, span)
            yield token 
Example 50
Project: aapm_thoracic_challenge   Author: xf4j   File: model.py    (MIT License) View Source Project 5 votes vote down vote up
def load(self, model_name='main'):
        checkpoint_dir = os.path.join(self.checkpoint_dir, self.model_dir)
        
        ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
            self.saver.restore(self.sess, os.path.join(checkpoint_dir, ckpt_name))
            counter = int(next(re.finditer("(\d+)(?!.*\d)", ckpt_name)).group(0))
            return True, counter
        else:
            print("Failed to find a checkpoint")
            return False, 0