Python validators.url() Examples

The following are 30 code examples of validators.url(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module validators , or try the search function .
Example #1
Source File: malwoverview.py    From malwoverview with GNU General Public License v3.0 6 votes vote down vote up
def hashchecking( ):

    print ("\n")
    print(mycolors.reset)
    print("Main Antivirus Reports")
    print("-" * 25 + "\n")

    vtresult = vtshow(hashtemp,url,param)

    if vtresult == 'Not Found':
        if(bkg == 1):
            print(mycolors.foreground.lightred + "Malware sample was not found in Virus Total.")
        else:
            print(mycolors.foreground.red + "Malware sample was not found in Virus Total.")

    print(mycolors.reset)

    hashow(hashtemp)
    if (down == 1):
         downhash(hashtemp)
    print(mycolors.reset)
    exit(0) 
Example #2
Source File: finder.py    From changelogs with MIT License 6 votes vote down vote up
def validate_repo_url(url):
    """
    Validates and formats `url` to be valid URL pointing to a repo on bitbucket.org or github.com
    :param url: str, URL
    :return: str, valid URL if valid repo, emptry string otherwise
    """
    try:
        if "github.com" in url:
            return re.findall(r"https?://w?w?w?.?github.com/[\w\-]+/[\w.-]+", url)[0]
        elif "bitbucket.org" in url:
            return re.findall(r"https?://bitbucket.org/[\w.-]+/[\w.-]+", url)[0] + "/src/"
        elif "launchpad.net" in url:
            return re.findall(r"https?://launchpad.net/[\w.-]+", url)[0]
        elif "sourceforge.net" in url:
            mo = re.match(r"https?://sourceforge.net/projects/"
                          r"([\w.-]+)/", url, re.I)
            template = "https://sourceforge.net/p/{}/code/HEAD/tree/trunk/src/"
            return template.format(mo.groups()[0])
    except (IndexError, AttributeError):
        pass
    return "" 
Example #3
Source File: finder.py    From changelogs with MIT License 6 votes vote down vote up
def find_release_page(session, repo_url):
    if "github.com" in repo_url:
        logger.debug("Unable to find changelog on {}, try release page".format(repo_url))
        try:
            username, reponame = repo_url.split("/")[3:5]
            # try to fetch the release page. if it 200s, yield the release page
            # api URL for further processing
            resp = session.get("https://github.com/{username}/{reponame}/releases".format(
                username=username, reponame=reponame
            ))
            if resp.status_code == 200:
                yield "https://api.github.com/repos/{username}/{reponame}/releases".format(
                    username=username, reponame=reponame
                )
        except IndexError:
            logger.debug("Unable to construct releases url for {}".format(repo_url)) 
Example #4
Source File: scanner.py    From hacker-scripts with MIT License 6 votes vote down vote up
def __init__(self, url, max_worker=10, timeout=3,
                 scan_dict=None, verbose=False, status=None):
        self.site_lang = ''
        self.raw_base_url = url
        self.base_url = url
        self.max_worker = max_worker
        self.timeout = timeout
        self.scan_dict = scan_dict
        self.verbose = verbose
        self.first_item = ''
        self.dict_data = {}
        self.first_queue = []
        self.found_items = {}
        if status is None or len(status) == 0:
            self.status = [200, 301, 302, 304, 401, 403]
        else:
            self.status = [int(t) for t in status] 
Example #5
Source File: scanner.py    From hacker-scripts with MIT License 6 votes vote down vote up
def on_response(self, url, item, method, response, queue):
        if response.code in self.status:
            if item in self.found_items:
                return
            self.found_items[item] = None
            logger.warning('[Y] %s %s %s' % (response.code, method, url))
            # 自动对找到的代码文件扫描编辑器生成的备份文件
            if any(map(item.endswith, ['.php', '.asp', '.jsp'])):
                bak_list = self.make_bak_file_list(item)
                bak_list = [(t, 'HEAD') for t in bak_list]
                queue.extendleft(bak_list)
        else:
            if response.code == 405 and method != 'POST':
                queue.appendleft((item, 'POST'))

            if self.verbose:
                logger.info('[N] %s %s %s' % (response.code, method, url)) 
Example #6
Source File: sdk.py    From darwin-sdk with Apache License 2.0 6 votes vote down vote up
def auth_login(self, password, api_key):
        self.username = ''
        self.api_key = api_key
        self.password = password
        url = self.server_url + self.routes['auth_login']
        payload = {'api_key': str(api_key), 'pass1': str(password)}
        ret = self.get_info()  # Check SSL certificate
        if not ret[0]:
            return ret
        r = self.s.post(url, data=payload)
        if r.ok:
            self.auth_string = 'Bearer ' + r.json()['access_token']
            self.token_start_time = time.time()
            return True, self.auth_string
        else:
            return False, '{}: {} - {}'.format(r.status_code, r.reason, r.text[0:1024]) 
Example #7
Source File: sdk.py    From darwin-sdk with Apache License 2.0 6 votes vote down vote up
def upload_dataset(self, dataset_path, dataset_name=None, has_header=True):
        if dataset_name is None:
            head, tail = os.path.split(dataset_path)
            dataset_name = tail
            # dataset_name = dataset_path.split('/')[-1]
        url = self.server_url + self.routes['upload_dataset']
        headers = self.get_auth_header()
        if headers is None:
            return False, "Cannot get Auth token. Please log in."
        if not os.path.isfile(dataset_path):
            return False, "File not found"
        with open(dataset_path, 'rb') as f:
            form = encoder.MultipartEncoder({
                "dataset": (str(dataset_path), f, 'text/csv/h5'),
                'dataset_name': str(dataset_name),
                'has_header': str(has_header)
            })
            headers.update({"Prefer": "respond-async", "Content-Type": form.content_type})
            r = self.s.post(url, headers=headers, data=form)
        return self.get_return_info(r) 
Example #8
Source File: sdk.py    From darwin-sdk with Apache License 2.0 6 votes vote down vote up
def clean_data(self, dataset_name, **kwargs):
        url = self.server_url + self.routes['clean_data'] + urllib.parse.quote(dataset_name, safe='')
        headers = self.get_auth_header()
        parameters = kwargs
        if headers is None:
            return False, "Cannot get Auth token. Please log in."
        r = self.s.post(url, headers=headers, json=parameters)
        if not r.ok and 'Please run analyze data' in r.text:
            print("Raw profile not found. Running analyze_data")
            char_encoding = parameters['char_encoding'] if 'char_encoding' in parameters else 'utf-8'
            r = self.analyze_data(dataset_name, char_encoding=char_encoding)
            if r[0]:
                r = self.s.post(url, headers=headers, json=parameters)
            else:
                return r
        return self.get_return_info(r)

    # Create risk information for a datatset 
Example #9
Source File: malwoverview.py    From malwoverview with GNU General Public License v3.0 5 votes vote down vote up
def vtcheck(filehash, url, param):

    pos = ''
    total = ''
    vttext = ''
    response = ''

    try:

        resource = filehash
        params = {'apikey': VTAPI , 'resource': resource}
        response = requests.get(url, params=params)
        vttext = json.loads(response.text)
        rc = (vttext['response_code'])
        if (rc == 0):
            final = ' Not Found'
            return final
        while (rc != 1):
            time.sleep(20)
            response = requests.get(url, params=params)
            vttext = json.loads(response.text)
            rc = (vttext['response_code'])

        pos = str(vttext['positives'])
        total = str(vttext['total'])
        final = (pos + "/" + total)
        rc = str(vttext['response_code'])

        return final

    except ValueError:
        final = '     '
        return final 
Example #10
Source File: malwoverview.py    From malwoverview with GNU General Public License v3.0 5 votes vote down vote up
def generalstatus(key):

    vtfinal = ''
    result = ' '
    ovr = ''
    entr = ''
    G = []

    if (vt==1):
        myfilehash = sha256hash(key)
        vtfinal = vtcheck(myfilehash, url, param)
    G.append(vtfinal)
    mype2 = pefile.PE(key)
    over = mype2.get_overlay_data_start_offset()
    if over == None:
        ovr =  ""
    else:
        ovr =  "OVERLAY"
    G.append(ovr)
    rf = mype2.write()
    entr = mype2.sections[0].entropy_H(rf)
    G.append(entr)
    pack = packed(mype2)
    if pack == False:
        result = "no    "
    elif pack == True:
        result = "PACKED"
    else:
        result = "Likely"
    G.append(result)
    return G 
Example #11
Source File: malwoverview.py    From malwoverview with GNU General Public License v3.0 5 votes vote down vote up
def malsharedown(filehash):

    maltext3 = ''
    malresponse3 = ''
    resource = ''

    try:

        resource = filehash
        requestsession3 = requests.Session( )
        finalurl3 = ''.join([urlmalshare, MALSHAREAPI, '&action=getfile&hash=', resource])
        malresponse3 = requestsession3.get(url=finalurl3, allow_redirects=True)
        open(resource, 'wb').write(malresponse3.content)

        print("\n")
        print((mycolors.reset + "MALWARE SAMPLE SAVED! "))
        print((mycolors.reset))

    except (BrokenPipeError, IOError):
        print(mycolors.reset , file=sys.stderr)
        exit(1)

    except ValueError as e:
        print(e)
        if(bkg == 1):
            print((mycolors.foreground.lightred + "Error while connecting to Malshare.com!\n"))
        else:
            print((mycolors.foreground.red + "Error while connecting to Malshare.com!\n"))
        print(mycolors.reset) 
Example #12
Source File: malwoverview.py    From malwoverview with GNU General Public License v3.0 5 votes vote down vote up
def run(self):

        url = self.key
        if (validators.url(url)) == True:
            loc = urltoip(url)
        else:
            loc = 'URL not valid.'
        if (bkg == 1):
            print((mycolors.reset + "URL: " + mycolors.foreground.yellow + "%-100s" % url + mycolors.reset + "  City: " + mycolors.foreground.lightred + "%s" % loc + mycolors.reset))
        else:
            print((mycolors.reset + "URL: " + mycolors.foreground.blue + "%-100s" % url + mycolors.reset + "  City: " + mycolors.foreground.red + "%s" % loc + mycolors.reset)) 
Example #13
Source File: malwoverview.py    From malwoverview with GNU General Public License v3.0 5 votes vote down vote up
def checkandroidvt(key, package):

    key1 = key
    vtfinal = vtcheck(key1, url, param)
    if (bkg == 1):
        print((mycolors.foreground.orange +  "%-50s" % package), end=' ')
        print((mycolors.foreground.lightcyan +  "%-32s" % key1), end=' ')
        print((mycolors.reset + mycolors.foreground.lightgreen + "%8s" % vtfinal + mycolors.reset))
    else:
        print((mycolors.foreground.green + "%-08s" % package), end=' ')
        print((mycolors.foreground.cyan + "%-32s" % key1), end=' ')
        print((mycolors.reset + mycolors.foreground.red + "%8s" % vtfinal + mycolors.reset)) 
Example #14
Source File: jsonld_helpers.py    From cert-schema with MIT License 5 votes vote down vote up
def to_loader_response(data, url):
    return {
        'contextUrl': None,
        'documentUrl': url,
        'document': data
    } 
Example #15
Source File: jsonld_helpers.py    From cert-schema with MIT License 5 votes vote down vote up
def load_document(url):
    """
    :param url:
    :return:
    """
    result = validators.url(url)
    if result:
        response = requests.get(
            url, headers={'Accept': 'application/ld+json, application/json'}
        )
        return response.text
    raise InvalidUrlError('Could not validate ' + url) 
Example #16
Source File: jsonld_helpers.py    From cert-schema with MIT License 5 votes vote down vote up
def jsonld_document_loader(url):
    """
    Retrieves JSON-LD at the given URL. Propagates BlockcertValidationError is url is invalid
    or doesn't exist
    :param url: the URL to retrieve
    :return: JSON-LD at the URL
    """
    data = load_document(url)
    return to_loader_response(data, url) 
Example #17
Source File: jsonld_helpers.py    From cert-schema with MIT License 5 votes vote down vote up
def preloaded_context_document_loader(url, override_cache=False):
    if url in PRELOADED_CONTEXTS:
        context = PRELOADED_CONTEXTS[url]
        return to_loader_response(context, url)
    else:
        return jsonld_document_loader(url) 
Example #18
Source File: action.py    From insightconnect-plugins with MIT License 5 votes vote down vote up
def run(self, params={}):
        payload = self.generate_payload(params)
        json_payload = json.dumps(payload)
        blacklist_state = params.get(Input.BLACKLIST_STATE, True)
        if blacklist_state is False:
            method = "DELETE"
            payload_type = payload.get("param", {}).get("type")
            content = payload["param"]["content"]
            self.api_path = f"{self.api_path}?type={payload_type}&content={content}"
        else:
            method = "PUT"

        self.connection.create_jwt_token(self.api_path, method, json_payload)
        request_url = self.connection.url + self.api_path

        response = None
        try:
            response = requests.request(
                method.lower(),
                request_url,
                headers=self.connection.header_dict,
                data=json_payload,
                verify=False
            )
            response.raise_for_status()
            return {Output.SUCCESS: response is not None}
        except RequestException as rex:
            if response:
                self.logger.error(f"Received status code: {response.status_code}")
                self.logger.error(f"Response was: {response.text}")
            raise PluginException(
                assistance="Please verify the connection details and input data.",
                cause=f"Error processing the Apex request: {rex}"
            ) 
Example #19
Source File: action.py    From insightconnect-plugins with MIT License 5 votes vote down vote up
def get_data_type(indicator):
        if validators.ipv4(indicator) or validators.ipv6(indicator):
            return "IP"
        elif validators.url(indicator):
            return "URL"
        elif validators.domain(indicator):
            return "DOMAIN"
        elif validators.sha1(indicator):
            return "FILE_SHA1"

        raise PluginException(
            cause="Invalid indicator input provided.",
            assistance="Supported indicators are IP, URL, domain and SHA1 hash."
        ) 
Example #20
Source File: core.py    From DrHeader with MIT License 5 votes vote down vote up
def _get_headers(url, method, params, request_headers, verify):
        """
        Get headers for specified url.

        :param url: URL of target
        :type url: str
        :param method: (optional) Method to use when doing the request
        :type method: str
        :param params: Request params
        :type params: dict
        :param request_headers: Request headers
        :type request_headers: dict
        :param verify: Verify the server's TLS certificate
        :type verify: bool or str
        :return: headers, status_code
        :rtype: dict, int
        """

        if validators.url(url):
            req_obj = getattr(requests, method.lower())
            r = req_obj(url, data=params, headers=request_headers, verify=verify)

            headers = r.headers
            if len(r.raw.headers.getlist('Set-Cookie')) > 0:
                headers['set-cookie'] = r.raw.headers.getlist('Set-Cookie')
            return headers, r.status_code 
Example #21
Source File: validations.py    From FDsploit with GNU General Public License v3.0 5 votes vote down vote up
def validateURL(url):
    try:
        if validators.url(url):
            return url
        else:
            print '\n{}[x] Invalid url!{}\n'.format(FR,S)
            sys.exit(0)       
    except Exception, e:
        print e
        sys.exit(0) 
Example #22
Source File: test_format_url.py    From 3gpp-citations with MIT License 5 votes vote down vote up
def test_format_url():
    """
    This function tests the `format_url` function so that it produces
    valid urls (without the break points)
    """

    url = standardcitations.format_url("36.331", False)
    assert validators.url(url) 
Example #23
Source File: test_format_url.py    From 3gpp-citations with MIT License 5 votes vote down vote up
def test_format_url_xelatex():
    """
    This function tests the  `format_url` function with xelatex flag.
    """

    url = standardcitations.format_url("36.331", True)
    assert r"\-" in url 
Example #24
Source File: test_format_url.py    From 3gpp-citations with MIT License 5 votes vote down vote up
def test_format_url_no_xelatex():
    """
    This function tests the  `format_url` function with the xelatex flag
    set to False.
    """
    url = standardcitations.format_url("36.331", False)
    assert r"\-" not in url 
Example #25
Source File: Page.py    From Scrapeasy with MIT License 5 votes vote down vote up
def __init__(self, url, verify=True):

        # Define verify behaviour and extract domain from url
        self._verify = verify
        url = url.replace("%2F", "/")
        self._domain = self.findDomain(url)

        # Normalize URL to not contain anything before the domain / subdomain
        try:
            self._url = url[url.index(self._domain):]
        except ValueError as ve:
            self._url = url
        if not validators.url("http://"+self._url):
            raise ValueError("Not valid URL: "+url+"!")

        # Try getting the header via http request.head
        try:
            self._header = requests.head("http://www."+self._url, verify=self._verify).headers
        except requests.exceptions.ConnectionError as ce:
            self._header = "Unknown"

        # Add scrapers headers to identify python scraper on websites
        self._headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'}
        self._html = None
        self.update()

        # Categorize links into intern - extern and domain
        self._links = {"intern":[], "extern":[], "domain":[]}
        self.findLinks()

        # Empty dict in which media will be inserted
        self._media = {} 
Example #26
Source File: Page.py    From Scrapeasy with MIT License 5 votes vote down vote up
def getLinks(self, intern=True, extern=True, domain=False):
        linklist = []
        if intern:
            linklist += self._links["intern"]
        if extern:
            linklist += self._links["extern"]
        if domain:
            linklist += self._links["domain"]
        return linklist

    # Extracts url out of a domain according to the first backslash occurence that marks the start of the path 
Example #27
Source File: Page.py    From Scrapeasy with MIT License 5 votes vote down vote up
def findDomain(url):
        url = url.replace("https://", "")
        url = url.replace("http://", "")
        url = url.replace("www.", "")
        if "/" in url:
            url = url[:url.index("/")]
        return url.lower()

    # Folder is the part of a url without the file, so without the part after the last backslash 
Example #28
Source File: Page.py    From Scrapeasy with MIT License 5 votes vote down vote up
def findFolder(url):
        return url[:url.rindex("/")] 
Example #29
Source File: Page.py    From Scrapeasy with MIT License 5 votes vote down vote up
def findLinks(self):
        # print("Finding links of "+self._url)
        # Defined filetypes that are to ignore
        endings = [".jpg", ".jpeg", ".png", ".tiff", ".gif", ".pdf", ".svc", ".ics", ".docx", ".doc", ".mp4", ".mov",
                   ".webm", ".zip", ".ogg"]
        for end in endings:
            if self._url.lower().endswith(end):
                return

        # Parse request as lxml and extract a-tags
        soup = BeautifulSoup(self._html, "lxml")
        links = soup.findAll("a")
        for link in links:
            # Filter out the href link
            link = str(link.get("href")).replace("../", "")
            # Break when the link is None or consists of some javascript that could not be read out
            if link == "None" or "JavaScript:" in link:
                break
            # Categorize link according to its form
            if validators.url(link) and "mailto:" not in link:
                if self._domain in link.lower():
                    self.addInternal(self._domain + link[link.lower().index(self._domain)+len(self._domain):])
                else:
                    self.addExternal((Page.normalize(link)))
            else:
                if validators.url("http://www."+self._domain+"/"+link) and "mailto:" not in link:
                    self.addInternal((self._domain + "/" + link))

    # Add a link to the appropriate list with removing everything before the domain first 
Example #30
Source File: Page.py    From Scrapeasy with MIT License 5 votes vote down vote up
def findSrc(self, *tags):
        links = []
        # Sometimes strange Not-implemented error occurs
        try:
            soup = BeautifulSoup(self._html, "html.parser")
        except NotImplementedError as nie:
            print("Not implemented error occurred!")
            print(nie.args)
            return []
        # Filter as long as there are tags left, in the right order
        filter = soup.find_all(tags[0])
        tags = tags[1:]
        for t in range(len(tags)):
            filter_new = []
            for f in range(len(filter)):
                filter_new += filter[f].find_all(tags[t])
            filter = filter_new.copy()
        #Find source in tag and add link according to its structure
        for link in filter:
            img_url = str(link.get("src")).lower()
            if not self._domain in img_url:
                if img_url[0] == "/":
                    self.add(links, self.findFolder(self._url) + img_url)
                elif validators.url(img_url):
                    self.add(links, img_url)
                else:
                    self.add(links, self.findFolder(self._url) + "/" + img_url)
            else:
                self.add(links, img_url)
        return links


# Pagemedia is the version of Page that is always including all functionality, multi-inheritence will be used here later on