Python urlparse.urlparse() Examples

The following are 30 code examples of urlparse.urlparse(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module urlparse , or try the search function .
Example #1
Source File: diagnose.py    From dynamic-training-with-apache-mxnet-on-aws with Apache License 2.0 7 votes vote down vote up
def test_connection(name, url, timeout=10):
    """Simple connection test"""
    urlinfo = urlparse(url)
    start = time.time()
    try:
        ip = socket.gethostbyname(urlinfo.netloc)
    except Exception as e:
        print('Error resolving DNS for {}: {}, {}'.format(name, url, e))
        return
    dns_elapsed = time.time() - start
    start = time.time()
    try:
        _ = urlopen(url, timeout=timeout)
    except Exception as e:
        print("Error open {}: {}, {}, DNS finished in {} sec.".format(name, url, e, dns_elapsed))
        return
    load_elapsed = time.time() - start
    print("Timing for {}: {}, DNS: {:.4f} sec, LOAD: {:.4f} sec.".format(name, url, dns_elapsed, load_elapsed)) 
Example #2
Source File: WebSpider.py    From Pansidong with GNU General Public License v3.0 6 votes vote down vote up
def format_url(url):
        """
        简单去重、去相似的URL
        :param url: 待处理的URL
        :return: URL的特征元组
        """
        # 规范化URL,在末尾增加 /
        if urlparse.urlparse(url)[2] == "":
            url += '/'

        url_structure = urlparse.urlparse(url)
        netloc = url_structure.netloc
        path = url_structure.path
        query = url_structure.query
        suffix = url_structure.path.split('.')[-1]

        result = (
            netloc,
            tuple([len(i) for i in path.split('/')]),
            tuple(sorted([i.split('=')[0] for i in query.split('&')])),
        )
        return result, suffix 
Example #3
Source File: getmetrics_cadvisor.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def send_data_to_if(chunk_metric_data):
    send_data_time = time.time()

    # prepare data for metric streaming agent
    data_to_post = initialize_api_post_data()
    if 'DEPLOYMENT' in if_config_vars['project_type'] or 'INCIDENT' in if_config_vars['project_type']:
        for chunk in chunk_metric_data:
            chunk['data'] = json.dumps(chunk['data'])
    data_to_post[get_data_field_from_project_type()] = json.dumps(chunk_metric_data)

    logger.debug('First:\n' + str(chunk_metric_data[0]))
    logger.debug('Last:\n' + str(chunk_metric_data[-1]))
    logger.debug('Total Data (bytes): ' + str(get_json_size_bytes(data_to_post)))
    logger.debug('Total Lines: ' + str(track['line_count']))

    # do not send if only testing
    if cli_config_vars['testing']:
        return

    # send the data
    post_url = urlparse.urljoin(if_config_vars['if_url'], get_api_from_project_type())
    send_request(post_url, 'POST', 'Could not send request to IF',
                 str(get_json_size_bytes(data_to_post)) + ' bytes of data are reported.',
                 data=data_to_post, proxies=if_config_vars['if_proxies'])
    logger.debug('--- Send data time: %s seconds ---' % round(time.time() - send_data_time, 2)) 
Example #4
Source File: getmessages_elasticsearch2.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example #5
Source File: getlogs_k8s.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def send_data_to_if(chunk_metric_data):
    send_data_time = time.time()

    # prepare data for metric streaming agent
    data_to_post = initialize_api_post_data()
    if 'DEPLOYMENT' in if_config_vars['project_type'] or 'INCIDENT' in if_config_vars['project_type']:
        for chunk in chunk_metric_data:
            chunk['data'] = json.dumps(chunk['data'])
    data_to_post[get_data_field_from_project_type()] = json.dumps(chunk_metric_data)

    logger.debug('First:\n' + str(chunk_metric_data[0]))
    logger.debug('Last:\n' + str(chunk_metric_data[-1]))
    logger.debug('Total Data (bytes): ' + str(get_json_size_bytes(data_to_post)))
    logger.debug('Total Lines: ' + str(track['line_count']))

    # do not send if only testing
    if cli_config_vars['testing']:
        return

    # send the data
    post_url = urlparse.urljoin(if_config_vars['if_url'], get_api_from_project_type())
    send_request(post_url, 'POST', 'Could not send request to IF',
                 str(get_json_size_bytes(data_to_post)) + ' bytes of data are reported.',
                 data=data_to_post, proxies=if_config_vars['if_proxies'])
    logger.debug('--- Send data time: %s seconds ---' % round(time.time() - send_data_time, 2)) 
Example #6
Source File: getlogs_k8s.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example #7
Source File: getmetrics_cadvisor.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example #8
Source File: scanner.py    From sqliv with GNU General Public License v3.0 6 votes vote down vote up
def __sqli(url):
    """check SQL injection vulnerability"""

    std.stdout("scanning {}".format(url), end="")

    domain = url.split("?")[0]  # domain with path without queries
    queries = urlparse(url).query.split("&")
    # no queries in url
    if not any(queries):
        print "" # move cursor to new line
        return False, None

    payloads = ("'", "')", "';", '"', '")', '";', '`', '`)', '`;', '\\', "%27", "%%2727", "%25%27", "%60", "%5C")
    for payload in payloads:
        website = domain + "?" + ("&".join([param + payload for param in queries]))
        source = web.gethtml(website)
        if source:
            vulnerable, db = sqlerrors.check(source)
            if vulnerable and db != None:
                std.showsign(" vulnerable")
                return True, db

    print ""  # move cursor to new line
    return False, None 
Example #9
Source File: db_url.py    From Quiver-alfred with MIT License 6 votes vote down vote up
def connect(url, **connect_params):
    parsed = urlparse(url)
    connect_kwargs = parseresult_to_dict(parsed)
    connect_kwargs.update(connect_params)
    database_class = schemes.get(parsed.scheme)

    if database_class is None:
        if database_class in schemes:
            raise RuntimeError('Attempted to use "%s" but a required library '
                               'could not be imported.' % parsed.scheme)
        else:
            raise RuntimeError('Unrecognized or unsupported scheme: "%s".' %
                               parsed.scheme)

    return database_class(**connect_kwargs)

# Conditionally register additional databases. 
Example #10
Source File: reverseip.py    From sqliv with GNU General Public License v3.0 6 votes vote down vote up
def reverseip(url):
    """return domains from given the same server"""

    # get only domain name
    url = urlparse(url).netloc if urlparse(url).netloc != '' else urlparse(url).path.split("/")[0]

    source = "http://domains.yougetsignal.com/domains.php"
    useragent = useragents.get()
    contenttype = "application/x-www-form-urlencoded; charset=UTF-8"

    # POST method
    opener = urllib2.build_opener(
        urllib2.HTTPHandler(), urllib2.HTTPSHandler())
    data = urllib.urlencode([('remoteAddress', url), ('key', '')])

    request = urllib2.Request(source, data)
    request.add_header("Content-type", contenttype)
    request.add_header("User-Agent", useragent)

    try:
        result = urllib2.urlopen(request).read()

    except urllib2.HTTPError, e:
        print >> sys.stderr, "[{}] HTTP error".format(e.code) 
Example #11
Source File: dataset.py    From Quiver-alfred with MIT License 6 votes vote down vote up
def __init__(self, url):
        self._url = url
        parse_result = urlparse(url)
        self._database_path = parse_result.path[1:]

        # Connect to the database.
        self._database = connect(url)
        self._database.connect()

        # Introspect the database and generate models.
        self._introspector = Introspector.from_database(self._database)
        self._models = self._introspector.generate_models(skip_invalid=True)
        self._migrator = SchemaMigrator.from_database(self._database)

        class BaseModel(Model):
            class Meta:
                database = self._database
        self._base_model = BaseModel
        self._export_formats = self.get_export_formats()
        self._import_formats = self.get_import_formats() 
Example #12
Source File: app.py    From video2commons with GNU General Public License v3.0 6 votes vote down vote up
def loginredirect():
    """Initialize OAuth login."""
    app.session_interface.abandon_session(app, session)

    redirecturl, request_token = handshaker.initiate()
    session['request_token_key'], session['request_token_secret'] = \
        request_token.key, request_token.secret
    session['return_to_url'] = url_for('main')

    returnto = request.args.get('returnto')
    if returnto:
        ref_url = urlparse(request.url_root)
        test_url = urlparse(urljoin(request.host_url, returnto))
        if (
            test_url.scheme == ref_url.scheme and
            test_url.netloc == ref_url.netloc and
            test_url.path.startswith(ref_url.path)
        ):
            session['return_to_url'] = returnto

    return redirect(redirecturl) 
Example #13
Source File: bwa_alignment.py    From toil-scripts with Apache License 2.0 6 votes vote down vote up
def parse_manifest(manifest_path):
    """
    Parse manifest file

    :param str manifest_path: Path to manifest file
    :return: samples
    :rtype: list[str, list]
    """
    samples = []
    with open(manifest_path, 'r') as f:
        for line in f:
            if not line.isspace() and not line.startswith('#'):
                sample = line.strip().split('\t')
                require(2 <= len(sample) <= 3, 'Bad manifest format! '
                                               'Expected UUID\tURL1\t[URL2] (tab separated), got: {}'.format(sample))
                uuid = sample[0]
                urls = sample[1:]
                for url in urls:
                    require(urlparse(url).scheme and urlparse(url), 'Invalid URL passed for {}'.format(url))
                samples.append([uuid, urls])
    return samples 
Example #14
Source File: getmessages_elasticsearch2.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def send_data_to_if(chunk_metric_data):
    send_data_time = time.time()

    # prepare data for metric streaming agent
    data_to_post = initialize_api_post_data()
    if 'DEPLOYMENT' in if_config_vars['project_type'] or 'INCIDENT' in if_config_vars['project_type']:
        for chunk in chunk_metric_data:
            chunk['data'] = json.dumps(chunk['data'])
    data_to_post[get_data_field_from_project_type()] = json.dumps(chunk_metric_data)

    logger.debug('First:\n' + str(chunk_metric_data[0]))
    logger.debug('Last:\n' + str(chunk_metric_data[-1]))
    logger.debug('Total Data (bytes): ' + str(get_json_size_bytes(data_to_post)))
    logger.debug('Total Lines: ' + str(track['line_count']))

    # do not send if only testing
    if cli_config_vars['testing']:
        return

    # send the data
    post_url = urlparse.urljoin(if_config_vars['if_url'], get_api_from_project_type())
    send_request(post_url, 'POST', 'Could not send request to IF',
                 str(get_json_size_bytes(data_to_post)) + ' bytes of data are reported.',
                 data=data_to_post, proxies=if_config_vars['if_proxies'])
    logger.debug('--- Send data time: %s seconds ---' % round(time.time() - send_data_time, 2)) 
Example #15
Source File: WebSpider.py    From Pansidong with GNU General Public License v3.0 6 votes vote down vote up
def format_url_param(url):
        url_st = urlparse.urlparse(url)
        queries = url_st.query
        if not queries:
            return
        new_queries = ""
        for eq in queries.split("&"):
            key = eq.split("=")[0]
            value = eq.split("=")[1]
            if value.isdigit():
                value = "<int>"
            new_queries += key + "=" + value + "&"
        new_queries = new_queries.strip("&")
        url = urlparse.urlunparse((
            url_st.scheme,
            url_st.netloc,
            url_st.path,
            url_st.params,
            new_queries,
            url_st.fragment,
        ))
        return url 
Example #16
Source File: getmessages_prometheus.py    From InsightAgent with Apache License 2.0 6 votes vote down vote up
def check_project(project_name):
    if 'token' in if_config_vars and len(if_config_vars['token']) != 0:
        logger.debug(project_name)
        try:
            # check for existing project
            check_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/getprojectstatus')
            output_check_project = subprocess.check_output('curl "' + check_url + '?userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectList=%5B%7B%22projectName%22%3A%22' + project_name + '%22%2C%22customerName%22%3A%22' + if_config_vars['user_name'] + '%22%2C%22projectType%22%3A%22CUSTOM%22%7D%5D&tzOffset=-14400000"', shell=True)
            # create project if no existing project
            if project_name not in output_check_project:
                logger.debug('creating project')
                create_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/add-custom-project')
                output_create_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&projectName=' + project_name + '&instanceType=PrivateCloud&projectCloudType=PrivateCloud&dataType=' + get_data_type_from_project_type() + '&samplingInterval=' + str(if_config_vars['sampling_interval'] / 60) +  '&samplingIntervalInSeconds=' + str(if_config_vars['sampling_interval']) + '&zone=&email=&access-key=&secrete-key=&insightAgentType=' + get_insight_agent_type_from_project_type() + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + create_url + '?tzOffset=-18000000', shell=True)
            # set project name to proposed name
            if_config_vars['project_name'] = project_name
            # try to add new project to system
            if 'system_name' in if_config_vars and len(if_config_vars['system_name']) != 0:
                system_url = urlparse.urljoin(if_config_vars['if_url'], '/api/v1/projects/update')
                output_update_project = subprocess.check_output('no_proxy= curl -d "userName=' + if_config_vars['user_name'] + '&token=' + if_config_vars['token'] + '&operation=updateprojsettings&projectName=' + project_name + '&systemName=' + if_config_vars['system_name'] + '" -H "Content-Type: application/x-www-form-urlencoded" -X POST ' + system_url + '?tzOffset=-18000000', shell=True)
        except subprocess.CalledProcessError as e:
            logger.error('Unable to create project for ' + project_name + '. Data will be sent to ' + if_config_vars['project_name']) 
Example #17
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<p class="web-result-url">(.*?)</p>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example #18
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<cite.*?>(.*?)<\/cite>')
        try:
            links_list = link_regx.findall(resp)
            for link in links_list:
                link = re.sub('<span.*>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass
        return links_list 
Example #19
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx2 = re.compile('<span class=" fz-15px fw-m fc-12th wr-bw.*?">(.*?)</span>')
        link_regx = re.compile('<span class="txt"><span class=" cite fw-xl fz-15px">(.*?)</span>')
        links_list = []
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links + links2
            for link in links_list:
                link = re.sub("<(\/)?b>", "", link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if not subdomain.endswith(self.domain):
                    continue
                if subdomain and subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example #20
Source File: sublist3r.py    From subtake with GNU General Public License v2.0 6 votes vote down vote up
def extract_domains(self, resp):
        link_regx = re.compile('<li class="b_algo"><h2><a href="(.*?)"')
        link_regx2 = re.compile('<div class="b_title"><h2><a href="(.*?)"')
        try:
            links = link_regx.findall(resp)
            links2 = link_regx2.findall(resp)
            links_list = links + links2

            for link in links_list:
                link = re.sub('<(\/)?strong>|<span.*?>|<|>', '', link)
                if not link.startswith('http'):
                    link = "http://" + link
                subdomain = urlparse.urlparse(link).netloc
                if subdomain not in self.subdomains and subdomain != self.domain:
                    if self.verbose:
                        self.print_("%s%s: %s%s" % (R, self.engine_name, W, subdomain))
                    self.subdomains.append(subdomain.strip())
        except Exception:
            pass

        return links_list 
Example #21
Source File: wappalyzer.py    From watchdog with Apache License 2.0 6 votes vote down vote up
def analyze(self):
        ctxt = PyV8.JSContext()
        ctxt.enter()

        f1 = open(os.path.join(self.file_dir, 'js/wappalyzer.js'))
        f2 = open(os.path.join(self.file_dir, '../php/js/driver.js'))
        ctxt.eval(f1.read())
        ctxt.eval(f2.read())
        f1.close()
        f2.close()

        host = urlparse(self.url).hostname
        response = requests.get(self.url)
        html = response.text
        headers = dict(response.headers)

        data = {'host': host, 'url': self.url, 'html': html, 'headers': headers}
        apps = json.dumps(self.apps)
        categories = json.dumps(self.categories)
        return ctxt.eval("w.apps = %s; w.categories = %s; w.driver.data = %s; w.driver.init();" % (apps, categories, json.dumps(data))) 
Example #22
Source File: web_request_socket.py    From httpninja with Apache License 2.0 6 votes vote down vote up
def _setParams(self):
        parsedURL = urlparse.urlparse(self.url)
        # setting the path
        if self.useAbsolutePath == True:
            self._path = self.url
        else:
            self._path = parsedURL.path
            self.qs = parsedURL.query

        if self._path == '':
            self._path = '/'

        # fix the body if it is in dict format
        if isinstance(self.body,dict):
            self.body = urllib.urlencode(self.body)

        # set other necessary parameters
        self.targetName = parsedURL.netloc
        self.targetPort = parsedURL.port
        self.targetProtocol = (parsedURL.scheme).lower()
        if self.targetProtocol == 'https':
            self.isSSL = True
            if self.targetPort == None: self.targetPort = 443
        elif self.targetPort == None:
            self.targetPort = 80 
Example #23
Source File: web_request_socket.py    From httpninja with Apache License 2.0 6 votes vote down vote up
def _setParams(self):
        parsedURL = urlparse.urlparse(self.url)
        # setting the path
        if self.useAbsolutePath == True:
            self._path = self.url
        else:
            self._path = parsedURL.path
            self.qs = parsedURL.query

        if self._path == '':
            self._path = '/'

        # fix the body if it is in dict format
        if isinstance(self.body,dict):
            self.body = urllib.urlencode(self.body)

        # set other necessary parameters
        self.targetName = parsedURL.hostname
        self.targetPort = parsedURL.port
        self.targetProtocol = (parsedURL.scheme).lower()
        if self.targetProtocol == 'https':
            self.isSSL = True
            if self.targetPort == None: self.targetPort = 443
        elif self.targetPort == None:
            self.targetPort = 80 
Example #24
Source File: cspcheck_ipsource.py    From securityheaders with Apache License 2.0 6 votes vote down vote up
def checkIP(self, directive, directiveValues, findings):
        csp = self.csp
        for value in directiveValues:
            url = '//' + Util.getSchemeFreeUrl(value)
            host = urlparse(url).netloc
            ip = None
            validip = True
            
            try:
                ip = ipaddress.ip_address(u''+host)
            except ValueError:
                validip = False
            if validip:
                ipString = str(ip) + ''
                
                if '127.0.0.1' in ipString:
                    findings.append(Finding(csp.headerkey,FindingType.IP_SOURCE, directive.value + ' directive allows localhost as source. Please make sure to remove this in production environments.',FindingSeverity.INFO, directive, value))
                else:
                    findings.append(Finding(csp.headerkey,FindingType.IP_SOURCE, directive.value + ' directive has an IP-Address as source: ' + ipString + ' (will be ignored by browsers!). ', FindingSeverity.INFO, directive, value)) 
Example #25
Source File: dump1090.py    From dump1090-tools with ISC License 6 votes vote down vote up
def handle_config(root):
    for child in root.children:
        instance_name = None

        if child.key == 'Instance':
            instance_name = child.values[0]
            url = None
            for ch2 in child.children:
                if ch2.key == 'URL':
                    url = ch2.values[0]
            if not url:
                collectd.warning('No URL found in dump1090 Instance ' + instance_name)
            else:
                collectd.register_read(callback=handle_read,
                                       data=(instance_name, urlparse.urlparse(url).hostname, url),
                                       name='dump1090.' + instance_name)
                collectd.register_read(callback=handle_read_1min,
                                       data=(instance_name, urlparse.urlparse(url).hostname, url),
                                       name='dump1090.' + instance_name + '.1min',
                                       interval=60)

        else:
            collectd.warning('Ignored config entry: ' + child.key) 
Example #26
Source File: evillib.py    From wafw00f with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def urlParser(target):
    log = logging.getLogger('urlparser')

    ssl = False
    o = urlparse(target)
    if o[0] not in ['http', 'https', '']:
        log.error('scheme %s not supported' % o[0])
        return
    if o[0] == 'https':
        ssl = True
    if len(o[2]) > 0:
        path = o[2]
    else:
        path = '/'
    tmp = o[1].split(':')
    if len(tmp) > 1:
        port = tmp[1]
    else:
        port = None
    hostname = tmp[0]
    query = o[4]
    return (hostname, port, path, query, ssl) 
Example #27
Source File: Legacy.py    From gist-alfred with MIT License 6 votes vote down vote up
def convertIssue(attributes):
    convertedAttributes = {
        "number": attributes["number"],
        "url": "/repos" + urlparse.urlparse(attributes["html_url"]).path,
        "user": {"login": attributes["user"], "url": "/users/" + attributes["user"]},
    }
    if "labels" in attributes:  # pragma no branch
        convertedAttributes["labels"] = [{"name": label} for label in attributes["labels"]]
    if "title" in attributes:  # pragma no branch
        convertedAttributes["title"] = attributes["title"]
    if "created_at" in attributes:  # pragma no branch
        convertedAttributes["created_at"] = attributes["created_at"]
    if "comments" in attributes:  # pragma no branch
        convertedAttributes["comments"] = attributes["comments"]
    if "body" in attributes:  # pragma no branch
        convertedAttributes["body"] = attributes["body"]
    if "updated_at" in attributes:  # pragma no branch
        convertedAttributes["updated_at"] = attributes["updated_at"]
    if "state" in attributes:  # pragma no branch
        convertedAttributes["state"] = attributes["state"]
    return convertedAttributes 
Example #28
Source File: api.py    From recipes-py with Apache License 2.0 6 votes vote down vote up
def validate_url(self, v):
    """Validates that "v" is a valid URL.

    A valid URL has a scheme and netloc, and must begin with HTTP or HTTPS.

    Args:
      * v (str): The URL to validate.

    **Returns (bool)** - True if the URL is considered secure, False if not.

    **Raises:**
      * ValueError: if "v" is not valid.
    """
    u = urlparse.urlparse(v)
    if u.scheme.lower() not in ('http', 'https'):
      raise ValueError('URL scheme must be either http:// or https://')
    if not u.netloc:
      raise ValueError('URL must specify a network location.')
    return u.scheme.lower() == 'https' 
Example #29
Source File: fig20_01.py    From PythonClassBook with GNU General Public License v3.0 6 votes vote down vote up
def getPage( self, event ):
      """Parse URL, add addressing scheme and retrieve file"""

      # parse the URL      
      myURL = event.widget.get()
      components = urlparse.urlparse( myURL )
      self.contents.text_state = NORMAL

      # if addressing scheme not specified, use http
      if components[ 0 ] == "":
         myURL = "http://" + myURL

      # connect and retrieve the file
      try:
         tempFile = urllib.urlopen( myURL )
         self.contents.settext( tempFile.read() ) # show results 
         tempFile.close()
      except IOError:
         self.contents.settext( "Error finding file" )

      self.contents.text_state = DISABLED 
Example #30
Source File: crawl.py    From oxidizr with GNU General Public License v2.0 6 votes vote down vote up
def union(p, q):
    for url in p:
        parsed = urlparse(str(url))
        if parsed.netloc and parsed.netloc != 'www.webhostingtalk.com':
            url = 'http://%s/' % parsed.netloc
        if parsed.netloc and url not in q:
            print url
            if parsed.netloc != 'www.webhostingtalk.com':
                # Insert into Site
                try:
                    Website.objects.create(
                        url=url,
                        name=parsed.netloc,
                        last_crawled_at=datetime.datetime.utcnow().replace(tzinfo=pytz.utc)
                    )
                except IntegrityError:
                    println('%s - already existed in Site' % url)
            else:
                # We want to deep crawl webhosting talk
                q.append(url)