Python re.I() Examples

The following are code examples for showing how to use re.I(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: python-   Author: secondtonone1   File: locators.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock() 
Example 2
Project: python-   Author: secondtonone1   File: utils.py    (license) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 3
Project: python-   Author: secondtonone1   File: warnings.py    (license) View Source Project 6 votes vote down vote up
def filterwarnings(action, message="", category=Warning, module="", lineno=0,
                   append=False):
    """Insert an entry into the list of warnings filters (at the front).

    'action' -- one of "error", "ignore", "always", "default", "module",
                or "once"
    'message' -- a regex that the warning message must match
    'category' -- a class that the warning must be a subclass of
    'module' -- a regex that the module name must match
    'lineno' -- an integer line number, 0 matches all warnings
    'append' -- if true, append to the list of filters
    """
    import re
    assert action in ("error", "ignore", "always", "default", "module",
                      "once"), "invalid action: %r" % (action,)
    assert isinstance(message, str), "message must be a string"
    assert isinstance(category, type), "category must be a class"
    assert issubclass(category, Warning), "category must be a Warning subclass"
    assert isinstance(module, str), "module must be a string"
    assert isinstance(lineno, int) and lineno >= 0, \
           "lineno must be an int >= 0"
    _add_filter(action, re.compile(message, re.I), category,
            re.compile(module), lineno, append=append) 
Example 4
Project: my-first-blog   Author: AnkurBegining   File: locators.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock() 
Example 5
Project: my-first-blog   Author: AnkurBegining   File: utils.py    (license) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 6
Project: kinect-2-libras   Author: inessadl   File: urllib.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def __init__(self, proxies=None, **x509):
        if proxies is None:
            proxies = getproxies()
        assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
        self.proxies = proxies
        self.key_file = x509.get('key_file')
        self.cert_file = x509.get('cert_file')
        self.addheaders = [('User-Agent', self.version)]
        self.__tempfiles = []
        self.__unlink = os.unlink # See cleanup()
        self.tempcache = None
        # Undocumented feature: if you assign {} to tempcache,
        # it is used to cache files retrieved with
        # self.retrieve().  This is not enabled by default
        # since it does not work for changing documents (and I
        # haven't got the logic to check expiration headers
        # yet).
        self.ftpcache = ftpcache
        # Undocumented feature: you can use a different
        # ftp cache by assigning to the .ftpcache member;
        # in case you want logically independent URL openers
        # XXX This is not threadsafe.  Bah. 
Example 7
Project: conv2mp4-py   Author: Kameecoding   File: conv2mp4-server.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def find_media_files(media_path):
	unconverted = []

	for dirname, directories, files in os.walk(media_path):
		for file in files:
			#skip hidden files
			if file.startswith('.'):
				continue

			if is_video(file) or is_subtitle(file):
				file = os.path.join(dirname, file)
				#Skip Sample files			   
				if re.search(".sample.",file,re.I):
					continue		
							
				unconverted.append(file)

	sorted_unconvered =  sorted(unconverted)

	return sorted_unconvered 
Example 8
Project: conv2mp4-py   Author: Kameecoding   File: conv2mp4-py.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def add_targets(self):
    
        self.target_dir = os.path.dirname(self.input_video)
        self.hard_link = ''
        
        if MOVE_FILES:
            #If we care about foreign languages execute this part
            if FOREIGN:
                audiostreams = self.get_audio_streams()
                #if we want to create hard links and there is both english and locale audio stream in the file or in the name
                if HARD_LINK and ((LOCALE in audiostreams and 'eng' in audiostreams) or (re.search('.{}.'.format(LOCALE),self.input_video,re.I) and re.search('.eng.',self.input_video,re.I))):
                    self.target_dir = TVSHOW_TARGET if self.is_show else MOVIE_TARGET
                    self.hard_link = LANG_TVSHOW_TARGET if self.is_show else LANG_MOVIE_TARGET          
                else:
                    #If the the input is matches LOCALE put it in the lang folders
                    if re.search(LANG_PATTERN,self.input_video,re.I | re.M):
                        self.target_dir = LANG_TVSHOW_TARGET if self.is_show else LANG_MOVIE_TARGET
                    #Else put them in the main folder
                    else:
                        self.target_dir = TVSHOW_TARGET if self.is_show else MOVIE_TARGET
            #if we don't give a shit about multiple languages simply determine if tvshow or movie
            else:
                self.target_dir = TVSHOW_TARGET if self.is_show else MOVIE_TARGET 
Example 9
Project: conv2mp4-py   Author: Kameecoding   File: conv2mp4-py.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def append_folder(self):

        if (CREATE_TVSHOW_DIRS and self.is_show):
            sub_folder=os.path.basename(self.input_video)[:os.path.basename(self.input_video).find('-')-1]
            if CREATE_SEASON_DIRS: 
                match = re.search(TV_SHOW_PATTERNS[2],self.input_video,re.I)
                if match:
                    season = match.group(1)
                if 'season' in locals():
                    if len(season) == 1:
                        season = ' 0' + season
                    else:
                        season = ' ' + season
                else:
                    Logger.info('Failed to match season pattern in {new}'.format(new=self.input_video))
                    sys.exit(0)
                sub_folder = os.path.join(sub_folder,'Season' + season)
        elif (CREATE_MOVIE_DIRS and not self.is_show):
            sub_folder=os.path.basename(self.input_video)[:-4]
        if 'sub_folder' in locals():
            self.target_dir = os.path.join(self.target_dir,sub_folder)
            if self.hard_link:
                self.hard_link = os.path.join(self.hard_link,sub_folder) 
Example 10
Project: games_nebula   Author: yancharkin   File: games_nebula.py    (license) View Source Project 6 votes vote down vote up
def goglib_search_filter(self, search_bar):

        self.goglib_search_filter_list = []

        filter = search_bar.get_text()

        for game_name in self.goglib_games_list:

            # Find sequence of characters in the beggining of the string
            if bool(re.match(filter, self.goglib_dict_name_to_title[game_name], re.I)):
                self.goglib_search_filter_list.append(game_name)
            # Find sequence of characters anywere in the string
            if len(filter) > 1:
                if filter.lower() in self.goglib_dict_name_to_title[game_name].lower():
                    self.goglib_search_filter_list.append(game_name)

        self.goglib_apply_filters() 
Example 11
Project: games_nebula   Author: yancharkin   File: games_nebula.py    (license) View Source Project 6 votes vote down vote up
def mylib_search_filter(self, search_bar):

        self.mylib_search_filter_list = []

        filter = search_bar.get_text()

        for game_name in self.mylib_games_list:

            # Find sequence of characters in the beggining of the string
            if bool(re.match(filter, self.mylib_dict_name_to_title[game_name], re.I)):
                self.mylib_search_filter_list.append(game_name)
            # Find sequence of characters anywere in the string
            if len(filter) > 1:
                if filter.lower() in self.mylib_dict_name_to_title[game_name].lower():
                    self.mylib_search_filter_list.append(game_name)

        self.mylib_apply_filters() 
Example 12
Project: Bahubali---DDOS-Toolkit   Author: navanchauhan   File: BeautifulSoup.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def search(self, markup):
        #print 'looking for %s in %s' % (self, markup)
        found = None
        # If given a list of items, scan it for a text element that
        # matches.
        if isList(markup) and not isinstance(markup, Tag):
            for element in markup:
                if isinstance(element, NavigableString) \
                       and self.search(element):
                    found = element
                    break
        # If it's a Tag, make sure its name or attributes match.
        # Don't bother with Tags if we're searching for text.
        elif isinstance(markup, Tag):
            if not self.text:
                found = self.searchTag(markup)
        # If it's text, make sure the text matches.
        elif isinstance(markup, NavigableString) or \
                 isString(markup):
            if self._matches(markup, self.text):
                found = markup
        else:
            raise Exception, "I don't know how to match against a %s" \
                  % markup.__class__
        return found 
Example 13
Project: plugin.video.exodus   Author: lastship   File: 1movies.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            url = self.__search([data['tvshowtitle']] + source_utils.aliases_to_array(eval(data['aliases'])), data['year'], season)
            if not url: return

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ep_link'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content) for i in r if i]
            r = [(i[0], re.findall("^(?:episode)\s*(\d+)$", i[1], re.I)) for i in r]
            r = [(i[0], i[1][0] if i[1] else '0') for i in r]
            r = [i[0] for i in r if int(i[1]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return 
Example 14
Project: plugin.video.exodus   Author: lastship   File: fullstream.py    (license) View Source Project 6 votes vote down vote up
def __search(self, titles, year, season='0'):
        try:
            query = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            r = client.request(query, post={'do': 'search', 'subaction': 'search', 'search_start': 0, 'full_search': 0, 'result_from': 1, 'story': cleantitle.query(titles[0])})

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'fullstream'})
            r = [(dom_parser.parse_dom(i, 'h3', attrs={'class': 'mov-title'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'fullmask'})) for i in r]
            r = [(dom_parser.parse_dom(i[0], 'a', req='href'), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*/year/\d+')})) for i in r]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0].content if i[1] else '0') for i in r if i[0]]
            r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:\s*-\s*saison)\s+(\d+)', i[1], re.I)) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return 
Example 15
Project: plugin.video.exodus   Author: lastship   File: pureanime.py    (license) View Source Project 6 votes vote down vote up
def __search(self, titles, episode):
        try:
            query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0]) + ' ' + str(episode))
            query = urlparse.urljoin(self.base_link, query)

            t = [cleantitle.get(i) + str(episode) for i in set(titles) if i]

            r = client.request(query)
            r = r.split('</style>')[-1].strip()
            r = json.loads(r)

            r = [(i.get('title', {}).get('rendered'), i.get('content', {}).get('rendered')) for i in r]
            r = [(re.sub('ger (?:sub|dub)', '', i[0], flags=re.I).strip(), i[1]) for i in r if i[0] and i[1]]
            r = [(i[0], re.findall('(.+?) (\d*)$', i[0]), i[1]) for i in r]
            r = [(i[0] if not i[1] else i[1][0][0] + ' ' + str(int(i[1][0][1])), i[2]) for i in r]
            r = [dom_parser.parse_dom(i[1], 'div') for i in r if cleantitle.get(i[0]) in t]
            r = [[x.attrs['href'] for x in dom_parser.parse_dom(i, 'a', req='href')] + [x.attrs['src'] for x in dom_parser.parse_dom(i, 'iframe', req='src')] for i in r]
            return r[0]
        except:
            return 
Example 16
Project: googletranslate.popclipext   Author: wizyoung   File: utils.py    (MIT License) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 17
Project: F-Scrack   Author: y1ng1996   File: F-Scrack.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def t_join(m_count):
    tmp_count = 0
    i = 0
    if I < m_count:
        count = len(ip_list) + 1
    else:
        count = m_count
    while True:
        time.sleep(4)
        ac_count = threading.activeCount()
        #print ac_count,count
        if ac_count < count  and ac_count == tmp_count:
            i+=1
        else:
            i=0
        tmp_count = ac_count
        #print ac_count,queue.qsize()
        if (queue.empty() and threading.activeCount() <= 1) or i > 5:
            break 
Example 18
Project: gransk   Author: pcbje   File: unpack_diskimage.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def setup(self, config):
    """
    Deterine max size to unpack and which directories to ignore.

    :param config: Configuration object.
    :type config: ``dict``
    """
    self.max_size = config.get(helper.MAX_FILE_SIZE, 128) * 1024 * 1024
    self.config = config

    ignore = {}
    path = os.path.join(
        config[helper.CODE_ROOT], 'utils', 'diskimage_ignore.txt')
    with open(path) as inp:
      for line in inp:
        if len(line.strip()) == 0 or line.startswith('#'):
          continue
        ignore[re.escape(line.strip().lower())] = True

    self.ignore = re.compile('|'.join(list(ignore.keys())), re.I) 
Example 19
Project: gransk   Author: pcbje   File: find_names_brute.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def setup(self, config):
    """
    Load name model (word list) and compile regexes for stop characters.

    :param config: Configuration object.
    :type config: ``dict``
    """
    reference_model = os.path.join(
        config[helper.CODE_ROOT], config[helper.NAME_MODEL])

    self.stopper = regex.compile(('(%s)' % '|'.join([
        'and', 'or', 'og', 'eller', r'\?', '&', '<', '>', '@', ':', ';', '/',
        r'\(', r'\)', 'i', 'of', 'from', 'to', r'\n', '!'])),
        regex.I | regex.MULTILINE)

    self.semistop = regex.compile(
        ('(%s)' % '|'.join([','])), regex.I | regex.MULTILINE)
    self.size_probability = [0.000, 0.000, 0.435, 0.489, 0.472, 0.004, 0.000]
    self.threshold = 0.25
    self.candidates = defaultdict(int)

    with gzip.open(reference_model, 'rb') as inp:
      self.model = json.loads(inp.read().decode('utf-8'))

    self.tokenizer = regex.compile(r'\w{2,20}') 
Example 20
Project: gransk   Author: pcbje   File: find_entities.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def setup(self, config):
    """
    Compile configured regular expressions.

    :param config: Configuration object.
    :type config: ``dict``
    """
    self.matches = {}

    patterns = []

    for entity_type, pattern_conf in config.get(helper.ENTITIES, {}).items():
      patterns.append(
          r'\b(?P<{}>{})\b'.format(entity_type, pattern_conf[helper.PATTERN]))

    self.pattern = regex.compile(
        '|'.join(patterns),
        regex.I | regex.U) 
Example 21
Project: calibre_dangdang   Author: qunxyz   File: __init__.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def clean_downloaded_metadata(self, mi):
        docase = (
            mi.language == 'zhn'
        )
        if mi.title and docase:
            # Remove series information from title
            m = re.search(r'\S+\s+(\(.+?\s+Book\s+\d+\))$', mi.title)
            if m is not None:
                mi.title = mi.title.replace(m.group(1), '').strip()
            mi.title = fixcase(mi.title)
        mi.authors = fixauthors(mi.authors)
        if mi.tags and docase:
            mi.tags = list(map(fixcase, mi.tags))
        mi.isbn = check_isbn(mi.isbn)
        if mi.series and docase:
            mi.series = fixcase(mi.series)
        if mi.title and mi.series:
            for pat in (r':\s*Book\s+\d+\s+of\s+%s$', r'\(%s\)$', r':\s*%s\s+Book\s+\d+$'):
                pat = pat % re.escape(mi.series)
                q = re.sub(pat, '', mi.title, flags=re.I).strip()
                if q and q != mi.title:
                    mi.title = q
                    break 
Example 22
Project: uzdevsbot   Author: Uzbek-Developers   File: bot.py    (license) View Source Project 6 votes vote down vote up
def _process_message(self, message):
        chat = Chat.from_message(self, message)

        for mt in MESSAGE_TYPES:
            if mt in message:
                return self._handlers[mt](chat, message[mt])

        if "text" not in message:
            return

        for patterns, handler in self._commands:
            m = re.search(patterns, message["text"], re.I)
            if m:
                return handler(chat, m)

        # No match, run default if it's a 1to1 chat
        if not chat.is_group():
            return self._default(chat, message)

        else:
            return self._group_message(chat, message) 
Example 23
Project: pip-update-requirements   Author: alanhamlett   File: locators.py    (BSD 2-Clause "Simplified" License) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock() 
Example 24
Project: pip-update-requirements   Author: alanhamlett   File: utils.py    (BSD 2-Clause "Simplified" License) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 25
Project: aws-waf-security-automation   Author: cerbo   File: utils.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 26
Project: VHDL-Mode   Author: Remillard   File: vhdl_util.py    (MIT License) View Source Project 6 votes vote down vote up
def get_vhdl_setting(cmd_obj, key):
    '''
    Borrowing an idea from OdatNurd from ST forum, creating a method
    that will return the value of a key and also check to see if
    it's been overridden in project files.  Defaults are handled by
    the supplied sublime-settings file.

    This will actually work on the regular Preferences as well I think
    though might do bad things if the key doesn't exist.
    '''
    # Load the defaults, or user overridden defaults.
    vhdl_settings = sublime.load_settings('vhdl_mode.sublime-settings')
    default = vhdl_settings.get(key, None)
    # Load the view's settings
    view_settings = cmd_obj.view.settings()
    return view_settings.get(key, default)

#---------------------------------------------------------------------------- 
Example 27
Project: VHDL-Mode   Author: Remillard   File: vhdl_util.py    (MIT License) View Source Project 6 votes vote down vote up
def scan_instantiations(cmd_obj):
    '''
    Obtaining a list of all regions that contain instantiation labels
    and then creating a dictionary of instantiated components and their
    associated labels.
    '''
    instances = {}
    selector = 'meta.block.instantiation entity.name.label'
    regions = cmd_obj.view.find_by_selector(selector)
    for region in regions:
        line = cmd_obj.view.substr(cmd_obj.view.full_line(region))
        line = re.sub(r'\n', '', line)
        row, col = cmd_obj.view.rowcol(region.begin())
        pattern = r'^\s*(?P<label>\w+)\s*:\s*(?:entity)?\s*((?P<lib>\w+)\.)?(?P<entity>[\w\.]+)'
        s = re.search(pattern, line, re.I)
        if s:
            if s.group('entity') in instances:
                instances[s.group('entity')].append(s.group('label'))
            else:
                instances[s.group('entity')] = [s.group('label')]
        else:
            print('vhdl-mode: Could not match instantiation on line {}'.format(row+1))
    return instances 
Example 28
Project: VHDL-Mode   Author: Remillard   File: vhdl_lang.py    (MIT License) View Source Project 6 votes vote down vote up
def parse_str(self, gen_str):
        """Attempts to extract the information from a generic interface."""
        # Right now I'm going to punt.  There are so many variations
        # on these that it's difficult to write a RE for it.  Also
        # there are few ways to have to rewrite it.  We will extract
        # a name, and then a type string (which may include defaults)
        gen_pattern = r'\s?(?P<name>.*?)\s?(?::)\s?(?P<type>.*)'
        gp = re.compile(gen_pattern, re.IGNORECASE)
        s = re.search(gp, gen_str)
        if s:
            self.name = s.group('name')
            # Sometimes the type has a trailing space.  Eliminating it.
            self.type = re.sub(r'\s*$', '', s.group('type'))
            self.success = True
        else:
            print('vhdl-mode: Could not parse generic string.')
            self.success = False 
Example 29
Project: SoCFoundationFlow   Author: mattaw   File: c_config.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_xlc_version(conf, cc):
	"""Get the compiler version"""

	cmd = cc + ['-qversion']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError:
		conf.fatal('Could not find xlc %r' % cmd)

	# the intention is to catch the 8.0 in "IBM XL C/C++ Enterprise Edition V8.0 for AIX..."
	for v in (r"IBM XL C/C\+\+.* V(?P<major>\d*)\.(?P<minor>\d*)",):
		version_re = re.compile(v, re.I).search
		match = version_re(out or err)
		if match:
			k = match.groupdict()
			conf.env['CC_VERSION'] = (k['major'], k['minor'])
			break
	else:
		conf.fatal('Could not determine the XLC version.') 
Example 30
Project: SoCFoundationFlow   Author: mattaw   File: ifort.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def ifort_modifier_win32(self):
	v = self.env
	v.IFORT_WIN32 = True
	v.FCSTLIB_MARKER = ''
	v.FCSHLIB_MARKER = ''

	v.FCLIB_ST = v.FCSTLIB_ST = '%s.lib'
	v.FCLIBPATH_ST = v.STLIBPATH_ST = '/LIBPATH:%s'
	v.FCINCPATH_ST = '/I%s'
	v.FCDEFINES_ST = '/D%s'

	v.fcprogram_PATTERN = v.fcprogram_test_PATTERN = '%s.exe'
	v.fcshlib_PATTERN = '%s.dll'
	v.fcstlib_PATTERN = v.implib_PATTERN = '%s.lib'

	v.FCLNK_TGT_F = '/out:'
	v.FC_TGT_F = ['/c', '/o', '']
	v.FCFLAGS_fcshlib = ''
	v.LINKFLAGS_fcshlib = '/DLL'
	v.AR_TGT_F = '/out:'
	v.IMPLIB_ST = '/IMPLIB:%s'

	v.append_value('LINKFLAGS', '/subsystem:console')
	if v.IFORT_MANIFEST:
		v.append_value('LINKFLAGS', ['/MANIFEST']) 
Example 31
Project: SoCFoundationFlow   Author: mattaw   File: c_nec.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def sxc_common_flags(conf):
        v=conf.env
        v['CC_SRC_F']=[]
        v['CC_TGT_F']=['-c','-o']
        if not v['LINK_CC']:v['LINK_CC']=v['CC']
        v['CCLNK_SRC_F']=[]
        v['CCLNK_TGT_F']=['-o']
        v['CPPPATH_ST']='-I%s'
        v['DEFINES_ST']='-D%s'
        v['LIB_ST']='-l%s'
        v['LIBPATH_ST']='-L%s'
        v['STLIB_ST']='-l%s'
        v['STLIBPATH_ST']='-L%s'
        v['RPATH_ST']=''
        v['SONAME_ST']=[]
        v['SHLIB_MARKER']=[]
        v['STLIB_MARKER']=[]
        v['LINKFLAGS_cprogram']=['']
        v['cprogram_PATTERN']='%s'
        v['CFLAGS_cshlib']=['-fPIC']
        v['LINKFLAGS_cshlib']=['']
        v['cshlib_PATTERN']='lib%s.so'
        v['LINKFLAGS_cstlib']=[]
        v['cstlib_PATTERN']='lib%s.a' 
Example 32
Project: SoCFoundationFlow   Author: mattaw   File: fc_xlf.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_xlf_version(conf, fc):
	"""Get the compiler version"""

	cmd = fc + ['-qversion']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError:
		conf.fatal('Could not find xlf %r' % cmd)

	for v in (r"IBM XL Fortran.* V(?P<major>\d*)\.(?P<minor>\d*)",):
		version_re = re.compile(v, re.I).search
		match = version_re(out or err)
		if match:
			k = match.groupdict()
			conf.env['FC_VERSION'] = (k['major'], k['minor'])
			break
	else:
		conf.fatal('Could not determine the XLF version.') 
Example 33
Project: SoCFoundationFlow   Author: mattaw   File: c_config.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_xlc_version(conf, cc):
	"""Get the compiler version"""

	cmd = cc + ['-qversion']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError:
		conf.fatal('Could not find xlc %r' % cmd)

	# the intention is to catch the 8.0 in "IBM XL C/C++ Enterprise Edition V8.0 for AIX..."
	for v in (r"IBM XL C/C\+\+.* V(?P<major>\d*)\.(?P<minor>\d*)",):
		version_re = re.compile(v, re.I).search
		match = version_re(out or err)
		if match:
			k = match.groupdict()
			conf.env['CC_VERSION'] = (k['major'], k['minor'])
			break
	else:
		conf.fatal('Could not determine the XLC version.') 
Example 34
Project: SoCFoundationFlow   Author: mattaw   File: ifort.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def ifort_modifier_win32(self):
	v = self.env
	v.IFORT_WIN32 = True
	v.FCSTLIB_MARKER = ''
	v.FCSHLIB_MARKER = ''

	v.FCLIB_ST = v.FCSTLIB_ST = '%s.lib'
	v.FCLIBPATH_ST = v.STLIBPATH_ST = '/LIBPATH:%s'
	v.FCINCPATH_ST = '/I%s'
	v.FCDEFINES_ST = '/D%s'

	v.fcprogram_PATTERN = v.fcprogram_test_PATTERN = '%s.exe'
	v.fcshlib_PATTERN = '%s.dll'
	v.fcstlib_PATTERN = v.implib_PATTERN = '%s.lib'

	v.FCLNK_TGT_F = '/out:'
	v.FC_TGT_F = ['/c', '/o', '']
	v.FCFLAGS_fcshlib = ''
	v.LINKFLAGS_fcshlib = '/DLL'
	v.AR_TGT_F = '/out:'
	v.IMPLIB_ST = '/IMPLIB:%s'

	v.append_value('LINKFLAGS', '/subsystem:console')
	if v.IFORT_MANIFEST:
		v.append_value('LINKFLAGS', ['/MANIFEST']) 
Example 35
Project: SoCFoundationFlow   Author: mattaw   File: c_nec.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def sxc_common_flags(conf):
        v=conf.env
        v['CC_SRC_F']=[]
        v['CC_TGT_F']=['-c','-o']
        if not v['LINK_CC']:v['LINK_CC']=v['CC']
        v['CCLNK_SRC_F']=[]
        v['CCLNK_TGT_F']=['-o']
        v['CPPPATH_ST']='-I%s'
        v['DEFINES_ST']='-D%s'
        v['LIB_ST']='-l%s'
        v['LIBPATH_ST']='-L%s'
        v['STLIB_ST']='-l%s'
        v['STLIBPATH_ST']='-L%s'
        v['RPATH_ST']=''
        v['SONAME_ST']=[]
        v['SHLIB_MARKER']=[]
        v['STLIB_MARKER']=[]
        v['LINKFLAGS_cprogram']=['']
        v['cprogram_PATTERN']='%s'
        v['CFLAGS_cshlib']=['-fPIC']
        v['LINKFLAGS_cshlib']=['']
        v['cshlib_PATTERN']='lib%s.so'
        v['LINKFLAGS_cstlib']=[]
        v['cstlib_PATTERN']='lib%s.a' 
Example 36
Project: SoCFoundationFlow   Author: mattaw   File: fc_xlf.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_xlf_version(conf, fc):
	"""Get the compiler version"""

	cmd = fc + ['-qversion']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError:
		conf.fatal('Could not find xlf %r' % cmd)

	for v in (r"IBM XL Fortran.* V(?P<major>\d*)\.(?P<minor>\d*)",):
		version_re = re.compile(v, re.I).search
		match = version_re(out or err)
		if match:
			k = match.groupdict()
			conf.env['FC_VERSION'] = (k['major'], k['minor'])
			break
	else:
		conf.fatal('Could not determine the XLF version.') 
Example 37
Project: SoCFoundationFlow   Author: mattaw   File: c_config.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_xlc_version(conf, cc):
	"""Get the compiler version"""

	cmd = cc + ['-qversion']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError:
		conf.fatal('Could not find xlc %r' % cmd)

	# the intention is to catch the 8.0 in "IBM XL C/C++ Enterprise Edition V8.0 for AIX..."
	for v in (r"IBM XL C/C\+\+.* V(?P<major>\d*)\.(?P<minor>\d*)",):
		version_re = re.compile(v, re.I).search
		match = version_re(out or err)
		if match:
			k = match.groupdict()
			conf.env['CC_VERSION'] = (k['major'], k['minor'])
			break
	else:
		conf.fatal('Could not determine the XLC version.') 
Example 38
Project: SoCFoundationFlow   Author: mattaw   File: c_config.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_suncc_version(conf, cc):
	"""Get the compiler version"""

	cmd = cc + ['-V']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError as e:
		# Older versions of the compiler exit with non-zero status when reporting their version
		if not (hasattr(e, 'returncode') and hasattr(e, 'stdout') and hasattr(e, 'stderr')):
			conf.fatal('Could not find suncc %r' % cmd)
		out = e.stdout
		err = e.stderr

	version = (out or err)
	version = version.splitlines()[0]

	version_re = re.compile(r'cc:\s+sun\s+(c\+\+|c)\s+(?P<major>\d*)\.(?P<minor>\d*)', re.I).search
	match = version_re(version)
	if match:
		k = match.groupdict()
		conf.env['CC_VERSION'] = (k['major'], k['minor'])
	else:
		conf.fatal('Could not determine the suncc version.')

# ============ the --as-needed flag should added during the configuration, not at runtime ========= 
Example 39
Project: SoCFoundationFlow   Author: mattaw   File: c_nec.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def sxc_common_flags(conf):
        v=conf.env
        v['CC_SRC_F']=[]
        v['CC_TGT_F']=['-c','-o']
        if not v['LINK_CC']:v['LINK_CC']=v['CC']
        v['CCLNK_SRC_F']=[]
        v['CCLNK_TGT_F']=['-o']
        v['CPPPATH_ST']='-I%s'
        v['DEFINES_ST']='-D%s'
        v['LIB_ST']='-l%s'
        v['LIBPATH_ST']='-L%s'
        v['STLIB_ST']='-l%s'
        v['STLIBPATH_ST']='-L%s'
        v['RPATH_ST']=''
        v['SONAME_ST']=[]
        v['SHLIB_MARKER']=[]
        v['STLIB_MARKER']=[]
        v['LINKFLAGS_cprogram']=['']
        v['cprogram_PATTERN']='%s'
        v['CFLAGS_cshlib']=['-fPIC']
        v['LINKFLAGS_cshlib']=['']
        v['cshlib_PATTERN']='lib%s.so'
        v['LINKFLAGS_cstlib']=[]
        v['cstlib_PATTERN']='lib%s.a' 
Example 40
Project: SoCFoundationFlow   Author: mattaw   File: fc_xlf.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_xlf_version(conf, fc):
	"""Get the compiler version"""

	cmd = fc + ['-qversion']
	try:
		out, err = conf.cmd_and_log(cmd, output=0)
	except Errors.WafError:
		conf.fatal('Could not find xlf %r' % cmd)

	for v in (r"IBM XL Fortran.* V(?P<major>\d*)\.(?P<minor>\d*)",):
		version_re = re.compile(v, re.I).search
		match = version_re(out or err)
		if match:
			k = match.groupdict()
			conf.env['FC_VERSION'] = (k['major'], k['minor'])
			break
	else:
		conf.fatal('Could not determine the XLF version.') 
Example 41
Project: repository-gardener   Author: GoogleCloudPlatform   File: webhooks.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def check_for_auto_merge_trigger(text):
    """Checks the text for the phrases that should trigger an automerge."""
    # The comment must address @dpebot directly, on the same line
    comment = re.search(
        r'@{}\s+\b(.+)'.format(github_helper.github_user()), text, re.I)
    if not comment:
        return False
    else:
        # Just get the meat of the command
        comment = comment.group(1).strip()

    satisfaction = r'\b(pass|passes|green|approv(al|es)|happy|satisfied)'
    ci_tool = r'\b(travis|tests|statuses)\b'
    merge_action = r'\bmerge\b'
    triggers = (
        r'{}.+({}.+)?{}'.format(merge_action, ci_tool, satisfaction),
        r'{}.+{},.+{}'.format(ci_tool, satisfaction, merge_action),
        'lgtm',
    )

    return any(re.search(trigger, comment, re.I) for trigger in triggers) 
Example 42
Project: dactyl   Author: ripple   File: filter_callouts.py    (MIT License) View Source Project 6 votes vote down vote up
def filter_soup(soup, currentpage={}, config={}, **kwargs):
    """
    Find patterns that look like callouts, for example **Note:**, and add
    callout classes to their parent elements (usually <p>)
    """
    # callout classes are defined by page>target>config>default
    callout_classes = currentpage.get(CALLOUT_TYPES_FIELD,
                        config.get(CALLOUT_TYPES_FIELD,
                        DEFAULT_CALLOUT_TYPES))
    callout_intro = re.compile(r"("+"|".join(callout_classes)+"):?$", re.I)
    callout_base_class = currentpage.get(CALLOUT_CLASS_FIELD,
                        config.get(CALLOUT_CLASS_FIELD,
                        DEFAULT_CALLOUT_CLASS))

    callouts = soup.find_all(name=["strong","em"], string=callout_intro)
    for c in callouts:
        if not c.previous_sibling: #This callout starts a block
            callout_type = c.string.replace(":","").lower()
            if callout_type in callout_classes:
                c.parent["class"] = [callout_base_class, callout_type] 
Example 43
Project: swjtu-pyscraper   Author: Desgard   File: locators.py    (MIT License) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock() 
Example 44
Project: swjtu-pyscraper   Author: Desgard   File: utils.py    (MIT License) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 45
Project: noc-orchestrator   Author: DirceuSilvaLabs   File: locators.py    (MIT License) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock() 
Example 46
Project: noc-orchestrator   Author: DirceuSilvaLabs   File: utils.py    (MIT License) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 47
Project: noc-orchestrator   Author: DirceuSilvaLabs   File: utils.py    (MIT License) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 48
Project: noc-orchestrator   Author: DirceuSilvaLabs   File: locators.py    (MIT License) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock() 
Example 49
Project: noc-orchestrator   Author: DirceuSilvaLabs   File: utils.py    (MIT License) View Source Project 6 votes vote down vote up
def get_encodings_from_content(content):
    """Returns encodings from given content string.

    :param content: bytestring to extract encodings from.
    """
    warnings.warn((
        'In requests 3.0, get_encodings_from_content will be removed. For '
        'more information, please see the discussion on issue #2266. (This'
        ' warning should only appear once.)'),
        DeprecationWarning)

    charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
    pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
    xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')

    return (charset_re.findall(content) +
            pragma_re.findall(content) +
            xml_re.findall(content)) 
Example 50
Project: noc-orchestrator   Author: DirceuSilvaLabs   File: locators.py    (MIT License) View Source Project 6 votes vote down vote up
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
        """
        Initialise an instance.
        :param url: The root URL to use for scraping.
        :param timeout: The timeout, in seconds, to be applied to requests.
                        This defaults to ``None`` (no timeout specified).
        :param num_workers: The number of worker threads you want to do I/O,
                            This defaults to 10.
        :param kwargs: Passed to the superclass.
        """
        super(SimpleScrapingLocator, self).__init__(**kwargs)
        self.base_url = ensure_slash(url)
        self.timeout = timeout
        self._page_cache = {}
        self._seen = set()
        self._to_fetch = queue.Queue()
        self._bad_hosts = set()
        self.skip_externals = False
        self.num_workers = num_workers
        self._lock = threading.RLock()
        # See issue #45: we need to be resilient when the locator is used
        # in a thread, e.g. with concurrent.futures. We can't use self._lock
        # as it is for coordinating our internal threads - the ones created
        # in _prepare_threads.
        self._gplock = threading.RLock()