Python re.findall() Examples

The following are 50 code examples for showing how to use re.findall(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don't like. You can also save this page to your account.

Example 1
Project: Python   Author: Guzi219   File: DY2018.py    (license) View Source Project 11 votes vote down vote up
def ParseHtml(self, html):
        soup = BeautifulSoup(html)
        links = soup.findAll('a', attrs={'class': 'ulink'})
        #print len(links)
        if len(links) == 0: #the js return
            # tmp_js = soup.find(name='script', attrs={'language': 'javascript'})
            js_str = soup.script.string #two ways to get the <script></script>
            new_url = js_str[16:-1] #get the new url
            new_url = eval(new_url) #eval:??????????
            self.ParseHtml(self.LoadPage(new_url))
        else:
            # print type(links)
            for link in links:
                # print type(link)
                # print type(link.string)
                # print unicode(link.string)
                titles = re.findall(r'?(.+?)?', str(link.string)) #unicode(link.string))
                if len(titles) <> 0:
                    print titles[0]
                # print 'url is %s, title is %s.' %(link['href'], titles[0]) 
Example 2
Project: ln2sql   Author: FerreroJeremy   File: database.py    (GNU General Public License v3.0) View Source Project 7 votes vote down vote up
def create_table(self, table_string):
        lines = table_string.split("\n")
        table = Table()
        for line in lines:
            if 'TABLE' in line:
                table_name = re.search("`(\w+)`", line)
                table.name = table_name.group(1)
                if self.thesaurus_object is not None:
                    table.equivalences = self.thesaurus_object.get_synonyms_of_a_word(table.name)
            elif 'PRIMARY KEY' in line:
                primary_key_columns = re.findall("`(\w+)`", line)
                for primary_key_column in primary_key_columns:
                    table.add_primary_key(primary_key_column)
            else:
                column_name = re.search("`(\w+)`", line)
                if column_name is not None:
                    column_type = self.predict_type(line)
                    if self.thesaurus_object is not None:
                        equivalences = self.thesaurus_object.get_synonyms_of_a_word(column_name.group(1))
                    else:
                        equivalences = []
                    table.add_column(column_name.group(1), column_type, equivalences)
        return table 
Example 3
Project: python-   Author: secondtonone1   File: zhaifuliall.py    (license) View Source Project 7 votes vote down vote up
def getDetailList(self,content):
		s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
		pattern =re.compile(s2 , re.S
			)
		result = re.findall(pattern, content)
		with open('file.txt','w',encoding='gbk') as f:
			f.write(content)

		if not result:
			print('???????..............')
		
		
		threadsList=[] 
		for item in result:
			t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
			threadsList.append(t)
			t.start()
			
		for threadid in threadsList:
			threadid.join() 
Example 4
Project: Stitch   Author: nathanlopez   File: wifikeys.py    (license) View Source Project 6 votes vote down vote up
def get_profiles():
    passwd=''
    netsh_output = run_command("netsh wlan show profiles")
    if "not running" in netsh_output:
        net_wlan = run_command("net start wlansvc")
        if "started successfully" in net_wlan:
            netsh_output = run_command("netsh wlan show profiles")
        else:
            return net_wlan
    if "no wireless interface" in netsh_output:
        return netsh_output
    else:
        profiles=re.findall(': (.*)\r',netsh_output)
        for x in profiles:
            output= run_command('netsh wlan show profiles "{}" key=clear'.format(x))
            #output=re.findall('(Key Content.*)\r',proc)
            if output:
                passwd += "\n{}\n{}\n\n".format(x,output)
        return passwd 
Example 5
Project: python-   Author: secondtonone1   File: zhaifuliall.py    (license) View Source Project 6 votes vote down vote up
def getDetailList(self,content):
		s2 = r'<h2><a target="_blank" href="(.*?)" title="(.*?)"'
		pattern =re.compile(s2 , re.S
			)
		result = re.findall(pattern, content)
		with open('file.txt','w',encoding='gbk') as f:
			f.write(content)

		if not result:
			print('???????..............')
		
		
		threadsList=[] 
		for item in result:
			t = threading.Thread(target = workthread, args=(item, self.user_agent, self.path))
			threadsList.append(t)
			t.start()
			
		for threadid in threadsList:
			threadid.join() 
Example 6
Project: ochem_predict_nn   Author: connorcoley   File: generate_reaction_templates.py    (license) View Source Project 6 votes vote down vote up
def reassign_atom_mapping(transform):
	'''This function takes an atom-mapped reaction and reassigns 
	the atom-mapping labels (numbers) from left to right, once 
	that transform has been canonicalized.'''

	all_labels = re.findall('\:([0-9]+)\]', transform)

	# Define list of replacements which matches all_labels *IN ORDER*
	replacements = []
	replacement_dict = {}
	counter = 1
	for label in all_labels: # keep in order! this is important
		if label not in replacement_dict:
			replacement_dict[label] = str(counter)
			counter += 1
		replacements.append(replacement_dict[label])

	# Perform replacements in order
	transform_newmaps = re.sub('\:[0-9]+\]', 
		lambda match: (':' + replacements.pop(0) + ']'),
		transform)

	return transform_newmaps 
Example 7
Project: spyking-circus   Author: spyking-circus   File: mcs_raw_binary.py    (license) View Source Project 6 votes vote down vote up
def _read_from_header(self):

        a, b, c                = self._get_header()
        header                 = a
        header['data_offset']  = b
        header['nb_channels']  = c
        #header['dtype_offset'] = int(header['ADC zero'])
        header['gain']         = float(re.findall("\d+\.\d+", header['El'])[0])
        header['data_dtype']   = self.params['data_dtype']

        self.data   = numpy.memmap(self.file_name, offset=header['data_offset'], dtype=header['data_dtype'], mode='r')
        self.size   = len(self.data)
        self._shape = (self.size//header['nb_channels'], header['nb_channels'])
        del self.data

        return header 
Example 8
Project: stalker_pyramid   Author: eoyilmaz   File: archive.py    (GNU Lesser General Public License v3.0) View Source Project 6 votes vote down vote up
def _extract_references(self, data):
        """returns the list of references in the given maya file

        :param str data: The content of the maya scene file

        :return:
        """
        path_regex = r'\$REPO[\w\d\/_\[email protected]]+'
        # so we have all the data
        # extract references
        ref_paths = re.findall(path_regex, data)

        new_ref_paths = []
        for ref_path in ref_paths:
            if os.path.splitext(ref_path)[1] not in self.exclude_mask:
                new_ref_paths.append(ref_path)
        ref_paths = new_ref_paths

        return ref_paths 
Example 9
Project: sopel-modules   Author: phixion   File: new_url.py    (license) View Source Project 6 votes vote down vote up
def title_command(bot, trigger):
    """
    Show the title or URL information for the given URL, or the last URL seen
    in this channel.
    """
    if not trigger.group(2):
        if trigger.sender not in bot.memory['last_seen_url']:
            return
        matched = check_callbacks(bot, trigger,
                                  bot.memory['last_seen_url'][trigger.sender],
                                  True)
        if matched:
            return
        else:
            urls = [bot.memory['last_seen_url'][trigger.sender]]
    else:
        urls = re.findall(url_finder, trigger)

    results = process_urls(bot, trigger, urls)
    for title, domain in results[:4]:
        bot.reply('[ %s ] - %s' % (title, domain)) 
Example 10
Project: pyselenium-js   Author: neetjn   File: test_events.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_trigger_single_event(self):
        """Test: Trigger click event on button, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event='click'
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+1,
            'Counter label was not modified as expected; %s clicks' % modified
        ) 
Example 11
Project: pyselenium-js   Author: neetjn   File: test_events.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_trigger_multiple_events(self):
        """Test: Trigger click event on button twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        original = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.page.js.trigger_event(
            element=self.page.add_counter_button,
            event=('click', 'click')
        )
        for i in range(10):
            if (original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            modified, original+2,
            'Counter label was not modified as expected; %s clicks' % modified
        ) 
Example 12
Project: pyselenium-js   Author: neetjn   File: test_events.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_trigger_multiple_events_multiple_elements(self):
        """Test: Trigger click event on two buttons twice, validate dispatched"""
        regex = '([0-9]{1,3})'
        num_counter_original = eval(re.findall(regex, self.page.counter_label.text)[0])
        num_users_original = len(self.page.user_cards)
        self.page.js.trigger_event(
            element=(self.page.add_counter_button, self.page.add_user_button),
            event=('click', 'click')
        )
        for i in range(10):
            if (num_counter_original == eval(re.findall(regex, self.page.counter_label.text)[0])):
                time.sleep(1)
            else:
                break
        num_counter_modified = eval(re.findall(regex, self.page.counter_label.text)[0])
        self.assertEqual(
            num_counter_modified, num_counter_original+2,
            'Counter label was not modified as expected; %s clicks' % num_counter_modified
        )
        self.assertEqual(
            len(self.page.user_cards), num_users_original+2,
            'Expected %s user cards found %s' % (
                num_users_original+2, len(self.page.user_cards)
            )
        ) 
Example 13
Project: trf   Author: aistairc   File: analyser.py    (license) View Source Project 6 votes vote down vote up
def calc_rs_modality(self) -> Dict[str, float]:

        modality_counter = Counter()
        for i, s in enumerate(self.sentences):
            chunks = []
            for bnst in self.knp.parse(s).bnst_list():
                chunk = Chunk(chunk_id=bnst.bnst_id,
                              link=bnst.parent,
                              description=bnst.fstring)
                chunks.append(chunk)

            s = "".join([chunk.description for chunk in chunks])
            ms = set(re.findall("<?????-(.+?)>", s))
            modality_counter += Counter(ms)

            n = len(self.sentences)

        return dict([(k, float(c) / n)
                     for k, c in modality_counter.items()]) 
Example 14
Project: cellranger   Author: 10XGenomics   File: reference.py    (license) View Source Project 6 votes vote down vote up
def get_transcript_gc_content(self, transcript_obj):
        pattern = re.compile('[cCgG]')

        gc, length = 0, 0
        for interval in transcript_obj.intervals:
            if interval.chrom not in self.chroms:
                continue

            seq = self.chroms[interval.chrom][interval.start:interval.end]
            gc += len(re.findall(pattern, seq))
            length += interval.length

        if length > 0:
            return float(gc) / float(length)
        else:
            return 0

# NOTE: these stub classes are necessary to maintain backwards compatibility with old refdata (1.2 or older) 
Example 15
Project: ComicSpider   Author: QuantumLiu   File: comic.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def get_pages(self):
        '''
        ??Phantomjs??????????????????url
        Get all pages' urls using selenium an phantomJS
        return:
            a list of tuple (page_num,page_url)
        '''
        r_slt=r'onchange="select_page\(\)">([\s\S]*?)</select>'
        r_p=r'<option value="(.*?)".*?>?(\d*?)?<'
        try:
            dcap = dict(DesiredCapabilities.PHANTOMJS)
            # ???????????????
            dcap["phantomjs.page.settings.loadImages"] = False
            driver = webdriver.PhantomJS(desired_capabilities=dcap)
            driver.get(self.chapter_url)
            text=driver.page_source
            st=re.findall(r_slt,text)[0]
            self.pages = [(int(p[-1]),p[0]) for p in re.findall(r_p,st)]
        except Exception:
            traceback.print_exc()
            self.pages = []
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        finally:
            driver.quit()
            print('Got {l} pages in chapter {ch}'.format(l=len(self.pages),ch=self.chapter_title))
            return self.pages 
Example 16
Project: ComicSpider   Author: QuantumLiu   File: comic.py    (GNU General Public License v3.0) View Source Project 6 votes vote down vote up
def get_info(self):
        '''
        ????????????url???url
        Get informations of the comic
        return:
            comic title,description,cover url,chapters' urls
        '''
        headers={'use-agent':"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",'Referer':'http://manhua.dmzj.com/tags/s.shtml'}
        root='http://manhua.dmzj.com'
        r_title=r'<span class="anim_title_text"><a href=".*?"><h1>(.*?)</h1></a></span>'
        r_des=r'<meta name=\'description\' content=".*?(??.*?)"/>'#????
        r_cover=r'src="(.*?)" id="cover_pic"/></a>'#??url??
        r_cb=r'<div class="cartoon_online_border" >([\s\S]*?)<div class="clearfix"></div>'#??border
        r_cs=r'<li><a title="(.*?)" href="(.*?)" .*?>.*?</a>'#??????
        try:
            text=requests.get(self.comic_url,headers=headers).text
        except ConnectionError:
            traceback.print_exc()
            raise ConnectionError
        title=re.findall(r_title,text)[0]
        cb=re.findall(r_cb,text)[0]
        chapter_urls=[(c[0],root+c[1]+'#@page=1') for c in re.findall(r_cs,cb)]
        cover_url=re.findall(r_cover,text)[0]
        des=re.findall(r_des,text)
        return title,des,cover_url,chapter_urls 
Example 17
Project: Instagram   Author: Fastcampus-WPS-5th   File: comment.py    (license) View Source Project 6 votes vote down vote up
def make_html_content_and_add_tags(self):
        # ????? ???? ?????
        p = re.compile(r'(#\w+)')
        # findall???? ???? ????? ???
        tag_name_list = re.findall(p, self.content)
        # ?? content(Comment??)? ??? ??
        ori_content = self.content
        # ????? ????
        for tag_name in tag_name_list:
            # Tag??? ????? ??, ????? ???? ????? _??
            tag, _ = Tag.objects.get_or_create(name=tag_name.replace('#', ''))
            # ?? content? ??? ??
            change_tag = '<a href="{url}" class="hash-tag">{tag_name}</a>'.format(
                # url=reverse('post:hashtag_post_list', args=[tag_name.replace('#', '')]),
                url=reverse('post:hashtag_post_list',
                            kwargs={'tag_name': tag_name.replace('#', '')}),
                tag_name=tag_name
            )
            ori_content = re.sub(r'{}(?![<\w])'.format(tag_name), change_tag, ori_content, count=1)
            # content? ??? Tag??? ??? tags??? ??
            if not self.tags.filter(pk=tag.pk).exists():
                self.tags.add(tag)
        # ??? ??? ???? html_content? ??
        self.html_content = ori_content
        super().save(update_fields=['html_content']) 
Example 18
Project: django-powerpages   Author: Open-E-WEB   File: test_sitemap_view.py    (MIT License) View Source Project 6 votes vote down vote up
def parse_sitemap(content):
    if not isinstance(content, six.text_type):
        content = content.decode('utf-8')
    urlset_match = re.search(
        r'<urlset[^>]*>(?P<urls>[\s\S]*)</urlset>', content
    )
    if urlset_match:
        results = []
        urlset_content = urlset_match.groupdict()['urls']
        for url_content in re.findall(r'<url>([\s\S]+)</url>', urlset_content):
            results.append(
                dict(
                    re.findall(r'<([^>]+)>([^<]*)</[^>]+>', url_content)
                )
            )
    else:
        results = None
    return results 
Example 19
Project: fantasy-dota-heroes   Author: ThePianoDentist   File: pubg_iemoakland_players.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def get_players():
    with open("../../junk/iemoakland.html") as f:
        html = f.read()
    output = []
    teams = re.findall('(?s)<div class="influencer-card">(.*?)<!-- Card End  -->', html)
    counter = 1
    for t in teams:
        team_name = re.search('<h1 class="influencer-name">([^<]+)</h1>', t).group(1)
        player_section = re.search('(?s)<p class="influencer-description">(.*?)</p>', t).group(1)
        players = re.findall('(?:<a[^>]+>)?\s*(.*?)(?:</a>)?<br />', player_section)
        if len(players) < 4:
            print(team_name)
            print(players)
        for player in players:
            if '<a hre' in player:
                player = re.search('<a[^>]+>([^<]+)', player).group(1)
            output.append({"id": counter, "name": player, "team": team_name, "value": 10.0})
            counter += 1

    with open("../../lib/pubg_players.py", "w+") as f:
        f.write("pubg_init = " + repr(output))
    return 
Example 20
Project: course-crawler   Author: Foair   File: icourse.py    (MIT License) View Source Project 6 votes vote down vote up
def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name 
Example 21
Project: course-crawler   Author: Foair   File: study_mooc.py    (MIT License) View Source Project 6 votes vote down vote up
def get_summary(url):
    """ ?????????? """

    res = CONNECTION.get(url).text

    # ???????
    term_id = re.search(r'termId : "(\d+)"', res).group(1)

    names = re.findall(r'name:"(.+)"', res)
    # ????
    course_name = names[0]
    # ????
    institution = names[1]

    # ?????
    dir_name = REG_FILE.sub('', course_name + ' - ' + institution)
    print(dir_name)

    return term_id, dir_name 
Example 22
Project: course-crawler   Author: Foair   File: study_mooc.py    (MIT License) View Source Project 6 votes vote down vote up
def get_announce(term_id):
    """ ??????? """

    # batchId ?????? str(int(time.time() * 1000))
    post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'httpSessionId': 'dba4977be78d42a78a6e2c2dd2b9bb42', 'c0-scriptName': 'CourseBean', 'c0-methodName': 'getAllAnnouncementByTerm', 'c0-id': '0', 'c0-param0': 'number:' + term_id, 'c0-param1': 'number:1', 'batchId': str(int(time.time() * 1000))}
    res = CONNECTION.post('http://mooc.study.163.com/dwr/call/plaincall/CourseBean.getAllAnnouncementByTerm.dwr', data=post_data).text

    announcements = re.findall(r'content="(.*?[^\\])".*title="(.*?[^\\])"', res)

    with open(os.path.join(BASE_DIR, 'Announcements.html'), 'w', encoding='utf-8') as announce_file:
        for announcement in announcements:
            # ????
            announce_content = announcement[0].encode('utf-8').decode('unicode_escape')
            # ????
            announce_title = announcement[1].encode('utf-8').decode('unicode_escape')
            announce_file.write('<h1>' + announce_title + '</h1>\n' + announce_content + '\n') 
Example 23
Project: segno   Author: heuer   File: test_pdf.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def pdf_as_matrix(buff, border):
    """\
    Reads the path in the PDF and returns it as list of 0, 1 lists.

    :param io.BytesIO buff: Buffer to read the matrix from.
    """
    pdf = buff.getvalue()
    h, w = re.search(br'/MediaBox \[0 0 ([0-9]+) ([0-9]+)\]', pdf,
                     flags=re.MULTILINE).groups()
    if h != w:
        raise ValueError('Expected equal height/width, got height="{}" width="{}"'.format(h, w))
    size = int(w) - 2 * border

    graphic = _find_graphic(buff)
    res = [[0] * size for i in range(size)]
    for x1, y1, x2, y2 in re.findall(r'\s*(\-?\d+)\s+(\-?\d+)\s+m\s+'
                                        r'(\-?\d+)\s+(\-?\d+)\s+l', graphic):
        x1, y1, x2, y2 = [int(i) for i in (x1, y1, x2, y2)]
        y = abs(y1)
        res[y][x1:x2] = [1] * (x2 - x1)
    return res 
Example 24
Project: plugin.video.exodus   Author: lastship   File: client.py    (license) View Source Project 6 votes vote down vote up
def get(self, netloc, ua, timeout):
        try:
            headers = {'User-Agent': ua, 'Referer': netloc}
            result = _basic_request(netloc, headers=headers, timeout=timeout)

            match = re.findall('xhr\.open\("GET","([^,]+),', result)
            if not match:
                return False

            url_Parts = match[0].split('"')
            url_Parts[1] = '1680'
            url = urlparse.urljoin(netloc, ''.join(url_Parts))

            match = re.findall('rid=([0-9a-zA-Z]+)', url_Parts[0])
            if not match:
                return False

            headers['Cookie'] = 'rcksid=%s' % match[0]
            result = _basic_request(url, headers=headers, timeout=timeout)
            return self.getCookieString(result, headers['Cookie'])
        except:
            return

    # not very robust but lazieness... 
Example 25
Project: plugin.video.exodus   Author: lastship   File: client.py    (license) View Source Project 6 votes vote down vote up
def get(self, result):
        try:
            s = re.compile("S\s*=\s*'([^']+)").findall(result)[0]
            s = base64.b64decode(s)
            s = s.replace(' ', '')
            s = re.sub('String\.fromCharCode\(([^)]+)\)', r'chr(\1)', s)
            s = re.sub('\.slice\((\d+),(\d+)\)', r'[\1:\2]', s)
            s = re.sub('\.charAt\(([^)]+)\)', r'[\1]', s)
            s = re.sub('\.substr\((\d+),(\d+)\)', r'[\1:\1+\2]', s)
            s = re.sub(';location.reload\(\);', '', s)
            s = re.sub(r'\n', '', s)
            s = re.sub(r'document\.cookie', 'cookie', s)

            cookie = '' ; exec(s)
            self.cookie = re.compile('([^=]+)=(.*)').findall(cookie)[0]
            self.cookie = '%s=%s' % (self.cookie[0], self.cookie[1])

            return self.cookie
        except:
            pass 
Example 26
Project: plugin.video.exodus   Author: lastship   File: directstream.py    (license) View Source Project 6 votes vote down vote up
def odnoklassniki(url):
    try:
        media_id = re.compile('//.+?/.+?/([\w]+)').findall(url)[0]

        result = client.request('http://ok.ru/dk', post={'cmd': 'videoPlayerMetadata', 'mid': media_id})
        result = re.sub(r'[^\x00-\x7F]+', ' ', result)
        result = json.loads(result).get('videos', [])

        hd = []
        for name, quali in {'ultra': '4K', 'quad': '1440p', 'full': '1080p', 'hd': 'HD'}.items():
            hd += [{'quality': quali, 'url': i.get('url')} for i in result if i.get('name').lower() == name]

        sd = []
        for name, quali in {'sd': 'SD', 'low': 'SD', 'lowest': 'SD', 'mobile': 'SD'}.items():
            sd += [{'quality': quali, 'url': i.get('url')} for i in result if i.get('name').lower() == name]

        url = hd + sd[:1]
        if not url == []: return url
    except:
        return 
Example 27
Project: plugin.video.exodus   Author: lastship   File: directstream.py    (license) View Source Project 6 votes vote down vote up
def cldmailru(url):
    try:
        v = url.split('public')[-1]

        r = client.request(url)
        r = re.sub(r'[^\x00-\x7F]+', ' ', r)

        tok = re.findall('"tokens"\s*:\s*{\s*"download"\s*:\s*"([^"]+)', r)[0]

        url = re.findall('"weblink_get"\s*:\s*\[.+?"url"\s*:\s*"([^"]+)', r)[0]

        url = '%s%s?key=%s' % (url, v, tok)

        return url
    except:
        return 
Example 28
Project: plugin.video.exodus   Author: lastship   File: directstream.py    (license) View Source Project 6 votes vote down vote up
def yandex(url):
    try:
        cookie = client.request(url, output='cookie')

        r = client.request(url, cookie=cookie)
        r = re.sub(r'[^\x00-\x7F]+', ' ', r)

        sk = re.findall('"sk"\s*:\s*"([^"]+)', r)[0]

        idstring = re.findall('"id"\s*:\s*"([^"]+)', r)[0]

        idclient = binascii.b2a_hex(os.urandom(16))

        post = {'idClient': idclient, 'version': '3.9.2', 'sk': sk, '_model.0': 'do-get-resource-url', 'id.0': idstring}
        post = urllib.urlencode(post)

        r = client.request('https://yadi.sk/models/?_m=do-get-resource-url', post=post, cookie=cookie)
        r = json.loads(r)

        url = r['models'][0]['data']['file']

        return url
    except:
        return 
Example 29
Project: plugin.video.exodus   Author: lastship   File: proxy.py    (license) View Source Project 6 votes vote down vote up
def geturl(url):
    try:
        r = client.request(url, output='geturl')
        if r == None: return r

        host1 = re.findall('([\w]+)[.][\w]+$', urlparse.urlparse(url.strip().lower()).netloc)[0]
        host2 = re.findall('([\w]+)[.][\w]+$', urlparse.urlparse(r.strip().lower()).netloc)[0]
        if host1 == host2: return r

        proxies = sorted(get(), key=lambda x: random.random())
        proxies = sorted(proxies, key=lambda x: random.random())
        proxies = proxies[:3]

        for p in proxies:
            p += urllib.quote_plus(url)
            r = client.request(p, output='geturl')
            if not r == None: return parse(r)

    except:
        pass 
Example 30
Project: plugin.video.exodus   Author: lastship   File: miradetodo.py    (license) View Source Project 6 votes vote down vote up
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            t = 'http://www.imdb.com/title/%s' % imdb
            t = client.request(t, headers={'Accept-Language': 'es-AR'})
            t = client.parseDOM(t, 'title')[0]
            t = re.sub('(?:\(|\s)\d{4}.+', '', t).strip().encode('utf-8')

            q = self.search_link % urllib.quote_plus(t)
            q = urlparse.urljoin(self.base_link, q)

            r = client.request(q)

            r = client.parseDOM(r, 'div', attrs = {'class': 'item'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'span', attrs = {'class': 'tt'}), client.parseDOM(i, 'span', attrs = {'class': 'year'})) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]
            r = [i[0] for i in r if cleantitle.get(t) == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            pass 
Example 31
Project: plugin.video.exodus   Author: lastship   File: solarmovie.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            return url
        except:
            return 
Example 32
Project: plugin.video.exodus   Author: lastship   File: kingmovies.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'item-detail'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch' % url
            return url
        except:
            return 
Example 33
Project: plugin.video.exodus   Author: lastship   File: moviegross.py    (license) View Source Project 6 votes vote down vote up
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            t = cleantitle.get(title)

            p = self.post_link % urllib.quote_plus(cleantitle.query(title))
            q = urlparse.urljoin(self.base_link, self.search_link)

            r = proxy.request(q, 'playing top', post=p, XHR=True)

            r = client.parseDOM(r, 'li')
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a')) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if i[0] and i[1]]
            r = [(i[0], re.findall('(.+?)\((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if i[1]]
            r = [i for i in r if t == cleantitle.get(i[1]) and str(year) == i[2]]

            url = proxy.parse(r[0][0])

            url = re.findall('(?://.+?|)(/.+)', url)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            pass 
Example 34
Project: plugin.video.exodus   Author: lastship   File: pmovies.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(title))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch' % url
            return url
        except:
            return 
Example 35
Project: plugin.video.exodus   Author: lastship   File: moviexk.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases):
        try:
            url = '%s/%s-%s/' % (self.base_link, cleantitle.geturl(title), year)
            url = client.request(url, output='geturl')

            if url == None:
                t = cleantitle.get(title)
                q = '%s %s' % (title, year)
                q = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(q))
                r = client.request(q)
                r = client.parseDOM(r, 'div', attrs={'class': 'inner'})
                r = client.parseDOM(r, 'div', attrs={'class': 'info'})
                r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
                r = [(i[0], re.findall('(?:^Watch Movie |^Watch movies |^Watch |)(.+?)\((\d{4})', i[1])) for i in r]
                r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if i[1]]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and year == i[2]][0]

            if url == None: raise Exception()
            return url
        except:
            return 
Example 36
Project: plugin.video.exodus   Author: lastship   File: seriesonline.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(title))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]

            url = urlparse.urljoin(self.base_link, '%s/watching.html' % url)
            return url
        except:
            return 
Example 37
Project: plugin.video.exodus   Author: lastship   File: ymovies.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            return url
        except:
            return 
Example 38
Project: plugin.video.exodus   Author: lastship   File: tunemovie.py    (license) View Source Project 6 votes vote down vote up
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            query = urlparse.urljoin(self.base_link, self.search_link)
            query = query % urllib.quote_plus(title)

            t = cleantitle.get(title)

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs = {'class': 'thumb'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), re.findall('(\d{4})', i)) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]

            url = [i[0] for i in r if t in cleantitle.get(i[1]) and year == i[2]][0]
            return url
        except:
            return 
Example 39
Project: plugin.video.exodus   Author: lastship   File: tunemovie.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            query = urlparse.urljoin(self.base_link, self.search_link)
            query = query % urllib.quote_plus(data['tvshowtitle'])

            t = cleantitle.get(data['tvshowtitle'])

            r = client.request(query)

            r = client.parseDOM(r, 'div', attrs = {'class': 'thumb'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title'), re.findall('(\d{4})', i)) for i in r]
            r = [(i[0][0], i[1][0], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0]

            url = [i[0] for i in r if t in cleantitle.get(i[1]) and ('Season %s' % season) in i[1]][0]
            url += '?episode=%01d' % int(episode)

            return url
        except:
            return 
Example 40
Project: plugin.video.exodus   Author: lastship   File: pubfilm.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            year = re.findall('(\d{4})', premiered)[0]
            season = '%01d' % int(season) ; episode = '%01d' % int(episode)
            tvshowtitle = '%s %s: Season %s' % (data['tvshowtitle'], year, season)

            url = cache.get(self.pidtv_tvcache, 120, tvshowtitle)

            if url == None: raise Exception()

            url += '?episode=%01d' % int(episode)
            url = url.encode('utf-8')
            return url
        except:
            return 
Example 41
Project: plugin.video.exodus   Author: lastship   File: bobby.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])
            headers = eval(data['headers'])
            aliases = eval(data['aliases'])
            title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title']
            title = cleantitle.getsearch(title)
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)
            r = client.request(query, headers=headers, timeout='30', mobile=True)
            match = re.compile('alias=(.+?)\'">(.+?)</a>').findall(r)
            r = [(i[0], re.findall('(.+?)\s+-\s+Season\s+(\d+)', i[1])) for i in match]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            r = [i[0] for i in r if self.matchAlias(i[1], aliases) and int(season) == int(i[2])][0]
            url = {'type': 'tvshow', 'id': r, 'episode': episode, 'season': season, 'headers': headers}
            url = urllib.urlencode(url)
            return url
        except:
            return 
Example 42
Project: plugin.video.exodus   Author: lastship   File: dltube.py    (license) View Source Project 6 votes vote down vote up
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            if debrid.status() == False: raise Exception()

            t = cleantitle.get(title)

            query = self.search_link + urllib.quote_plus(title)
            query = urlparse.urljoin(self.base_link, query)

            r = client.request(query, XHR=True)
            r = json.loads(r)

            r = [i for i in r if 'category' in i and 'movie' in i['category'].lower()]
            r = [(i['url'], i['label']) for i in r if 'label' in i and 'url' in i]
            r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            return 
Example 43
Project: plugin.video.exodus   Author: lastship   File: movies14.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            year = re.findall('(\d{4})', premiered)[0]
            if int(year) >= 2016: raise Exception()

            url = re.sub('[^A-Za-z0-9]', '-', data['tvshowtitle']).lower()
            url = self.tvsearch_link % (url, data['year'], '%01d' % int(season), '%01d' % int(episode))

            r = urlparse.urljoin(self.base_link, url)
            r = client.request(r, output='geturl')
            if not data['year'] in r: raise Exception()

            return url
        except:
            return 
Example 44
Project: plugin.video.exodus   Author: lastship   File: animeultima.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if url == None: return

            tv_maze = tvmaze.tvMaze()
            num = tv_maze.episodeAbsoluteNumber(tvdb, int(season), int(episode))
            num = str(num)

            url = urlparse.urljoin(self.base_link, url)

            r = client.request(url)

            r = client.parseDOM(r, 'tr', attrs = {'class': ''})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'td', attrs = {'class': 'epnum'})) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [i[0] for i in r if num == i[1]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            return 
Example 45
Project: plugin.video.exodus   Author: lastship   File: afdah.py    (license) View Source Project 6 votes vote down vote up
def movie(self, imdb, title, localtitle, aliases, year):
        try:
            query = self.search_link % (urllib.quote_plus(title))
            query = urlparse.urljoin(self.base_link, query)

            c, h = self.__get_cookies(query)

            t = cleantitle.get(title)

            r = client.request(query, headers=h, cookie=c)

            r = client.parseDOM(r, 'div', attrs={'class': 'cell_container'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')
            return url
        except:
            return 
Example 46
Project: plugin.video.exodus   Author: lastship   File: 1movies.py    (license) View Source Project 6 votes vote down vote up
def episode(self, url, imdb, tvdb, title, premiered, season, episode):
        try:
            if not url:
                return

            data = urlparse.parse_qs(url)
            data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data])

            url = self.__search([data['tvshowtitle']] + source_utils.aliases_to_array(eval(data['aliases'])), data['year'], season)
            if not url: return

            r = client.request(urlparse.urljoin(self.base_link, url))

            r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ep_link'})
            r = dom_parser.parse_dom(r, 'a', req='href')
            r = [(i.attrs['href'], i.content) for i in r if i]
            r = [(i[0], re.findall("^(?:episode)\s*(\d+)$", i[1], re.I)) for i in r]
            r = [(i[0], i[1][0] if i[1] else '0') for i in r]
            r = [i[0] for i in r if int(i[1]) == int(episode)][0]

            return source_utils.strip_domain(r)
        except:
            return 
Example 47
Project: plugin.video.exodus   Author: lastship   File: watch5s.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title)))
            r = client.request(url, headers=headers, timeout='15')
            r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'})
            r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title'))
            results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r]
            try:
                r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0]
                url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0]
            except:
                url = None
                pass

            if (url == None):
                url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0]
            url = '%s/watch/' % url
            return url
        except:
            return 
Example 48
Project: plugin.video.exodus   Author: lastship   File: directdl.py    (license) View Source Project 6 votes vote down vote up
def resolve(self, url):
        try:
            b = urlparse.urlparse(url).netloc
            b = re.compile('([\w]+[.][\w]+)$').findall(b)[0]

            if not b in base64.b64decode(self.b_link): return url
 
            u, p, h = url.split('|')
            r = urlparse.parse_qs(h)['Referer'][0]
            #u += '&app_id=Exodus'

            c = self.request(r, output='cookie', close=False)
            result = self.request(u, post=p, referer=r, cookie=c)

            url = result.split('url=')
            url = [urllib.unquote_plus(i.strip()) for i in url]
            url = [i for i in url if i.startswith('http')]
            url = url[-1]

            return url
        except:
            return 
Example 49
Project: plugin.video.exodus   Author: lastship   File: xmovies.py    (license) View Source Project 6 votes vote down vote up
def searchMovie(self, title, year, aliases, headers):
        try:
            title = cleantitle.normalize(title)
            url = urlparse.urljoin(self.base_link, self.search_link % (cleantitle.geturl(title.replace('\'', '-'))))
            r = client.request(url, timeout='10', headers=headers)
            r = client.parseDOM(r, 'h2', attrs={'class': 'tit'})
            r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'a', ret='title')) for i in r]
            r = [(i[0][0], i[1][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0]
            r = [(i[0], re.findall('(.+?) \((\d{4})', i[1])) for i in r]
            r = [(i[0], i[1][0][0], i[1][0][1]) for i in r if len(i[1]) > 0]
            try:
                match = [i[0] for i in r if self.matchAlias(i[1], aliases) and year == i[2]][0]
            except:
                match = [i[0] for i in r if self.matchAlias(i[1], aliases)][0]

            url = re.findall('(?://.+?|)(/.+)', match)[0]
            url = client.replaceHTMLCodes(url)
            return url.encode('utf-8')
        except:
            return 
Example 50
Project: plugin.video.exodus   Author: lastship   File: streamay.py    (license) View Source Project 6 votes vote down vote up
def __search(self, title, localtitle, year, content_type):
        try:
            t = cleantitle.get(title)
            tq = cleantitle.get(localtitle)
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            query = urlparse.urljoin(self.base_link, self.search_link)

            post = urllib.urlencode({'k': "%s"}) % tq
            r = client.request(query, post=post)
            r = json.loads(r)

            r = [i.get('result') for i in r if i.get('type', '').encode('utf-8') == content_type]
            r = [(i.get('url'), i.get('originalTitle'), i.get('title'), i.get('anneeProduction', 0), i.get('dateStart', 0)) for i in r]
            r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1] if i[1] else ''), re.sub('<.+?>|</.+?>', '', i[2] if i[2] else ''), i[3] if i[3] else re.findall('(\d{4})', i[4])[0]) for i in r if i[3] or i[4]]
            r = sorted(r, key=lambda i: int(i[3]), reverse=True)  # with year > no year
            r = [i[0] for i in r if i[3] in y and (t.lower() == cleantitle.get(i[1].lower()) or tq.lower() == cleantitle.query(i[2].lower()))][0]

            url = re.findall('(?://.+?|)(/.+)', r)[0]
            url = client.replaceHTMLCodes(url)
            url = url.encode('utf-8')

            return url
        except:
            return