# -*- coding: utf-8 -*- ''' Created on 2015-02-22 BurpSmartBuster @author: @pathetiq @thanks: Abhineet & @theguly @version: 0.3 @summary: This is a Burp Suite extension which discover content with a smart touch. A bit like “DirBuster” and “Burp Discover Content”, but smarter and being integrated into Burp Suite this plugin looks at words in pages, the domain name, the current directories and filename to help you find hidden files, directories and information you usually don't with a static dictionary file that brute force its way on the web server. @bug: URL with variable, no file, no extension or weird variable separate by ; :, etc. breaks the directories/files listing @todo: technology detection and scanning, community files, add 404 detection in output, threads speeds and adjustments @todo: Add results to an issue. add tested files somewhere, add found file to sitemap. ''' import os os.environ["NLTK_DATA"] = os.path.join(os.getcwd(), "nltk_data") #sys imports import sys #Find the jython path where our prerequisites packages are installed import site for site in site.getsitepackages(): sys.path.append(site) #Examples of paths if needed #sys.path.append("/home/USERNAME/.local/lib/python2.7/site-packages/") #sys.path.append("/usr/local/lib/python2.7/site-packages") ##sys.path.append("/usr/lib/python2.7/dist-packages/") #sys.path.append("/home/USERNAME/Documents/Apps/TextBlob") #sys.path.append("/home/USERNAME/Documents/Apps/nltk") #burp imports from burp import IBurpExtender from burp import IScanIssue from burp import IScannerCheck from burp import IScannerInsertionPoint from burp import IHttpListener from burp import IBurpExtenderCallbacks #UI Import from burp import IContextMenuFactory from java.util import List, ArrayList from burp import ITab from javax.swing import JPanel, JLabel, JMenuItem, JTextField, JList, DefaultListModel, JButton, JFileChooser from javax.swing import JScrollPane, ListSelectionModel, GroupLayout, ButtonGroup, JRadioButton from java.awt import Dimension from java.awt import Toolkit from java.awt.datatransfer import StringSelection #utils imports from array import array from java.io import PrintWriter from java.net import URL import os import ConfigParser import json import logging from tld import get_tld import hashlib import random #spidering from bs4 import BeautifulSoup import Queue #Parse HTML comments from bs4 import Comment import re from urlparse import urlparse #requester import requests import csv from collections import deque import threading #text tokenization & natural language lib locals() #TODO: REVALIDATE the following : file /usr/local/lib/python2.7/dist-packages/nltk/internals.py line 902 has been change to remove os.getgroups() to compile in Burp...Jhython? #http://textminingonline.com/getting-started-with-textblob from textblob import TextBlob '''---------------------------------------------------------------------------------------------------------------------------------------- BurpSmartBuster Logging object and config ----------------------------------------------------------------------------------------------------------------------------------------''' class Logger(): LOG_FILENAME = 'BSB.log' DEFAULT_LEVEL = logging.DEBUG def __init__(self,name=LOG_FILENAME,level=DEFAULT_LEVEL): #define configs self._default_level=level self._name = name print "Log file is: " + name logging.basicConfig(filename=self._name+".log", level=self._default_level, format="%(asctime)s - [%(levelname)s] [%(threadName)s] (%(funcName)s:%(lineno)d) %(message)s", ) self._logger = logging.getLogger(name) return def getLogger(self): return self._logger '''---------------------------------------------------------------------------------------------------------------------------------------- BurpSmartBuster main class (BurpExtender) ----------------------------------------------------------------------------------------------------------------------------------------''' class BurpExtender(IBurpExtender, IScanIssue, IScannerCheck, IScannerInsertionPoint,IHttpListener, IBurpExtenderCallbacks, IContextMenuFactory, ITab): # definitions EXTENSION_NAME = "BurpSmartBuster" AUTHOR = "@pathetiq" def registerExtenderCallbacks(self, callbacks): # keep a reference to our callbacks object self._callbacks = callbacks # obtain an extension helpers object self._helpers = callbacks.getHelpers() # define stdout writer self._stdout = PrintWriter(callbacks.getStdout(), True) print(self.EXTENSION_NAME + ' by ' + self.AUTHOR) print('================================') print('This extension will create new requests for ALL "in scope" HTTP request made through Burp. Make sure to filter scope items') print('For help or any information see the github page or contact the author on twitter.') print('Note: The Spider currently only supports English, see author github page for new language installation instructions') # set our extension name callbacks.setExtensionName(self.EXTENSION_NAME) callbacks.registerScannerCheck(self) callbacks.registerHttpListener(self) callbacks.registerContextMenuFactory(self) #Initialize tab details #fields of options setBounds(x,y,width,heigth) self.verboseLabel = JLabel("Verbose") self.verboseLabel.setBounds(10,10,130,30) self.yesVerboseButton = JRadioButton("Yes") self.yesVerboseButton.setSelected(True) self.yesVerboseButton.setBounds(10,40,50,30) self.noVerboseButton = JRadioButton("No") self.noVerboseButton.setBounds(70,40,50,30) self.buttonGroup = ButtonGroup() self.buttonGroup.add(self.yesVerboseButton) self.buttonGroup.add(self.noVerboseButton) self.spiderPagesLabel = JLabel("Spider: Nbr of pages") self.spiderPagesLabel.setBounds(10,70,200,30) self.spiderPagesTextField = JTextField(300) self.spiderPagesTextField.setText("5") self.spiderPagesTextField.setBounds(10,100,300,30) self.spiderPagesTextField.setPreferredSize( Dimension( 250, 20 ) ) self.spiderRecPagesLabel = JLabel("Recursive: Nbr of pages") self.spiderRecPagesLabel.setBounds(10,130,250,30) self.spiderRecPagesTextField = JTextField(300) self.spiderRecPagesTextField.setText("3") self.spiderRecPagesTextField.setBounds(10,160,300,30) self.spiderRecPagesTextField.setPreferredSize( Dimension( 250, 20 ) ) self.fileTypeLabel = JLabel("Ignore Filetypes") self.fileTypeLabel.setBounds(10,190,130,30) self.fileTypeTextField = JTextField(300) self.fileTypeTextField.setText("gif,jpg,png,css,js,ico,woff") self.fileTypeTextField.setBounds(10,220,300,30) self.fileTypeTextField.setPreferredSize( Dimension( 250, 20 ) ) self.inScopeLabel = JLabel("Scan in-scope URLs only?") self.inScopeLabel.setBounds(10,250,200 ,30) self.yesInScopeButton = JRadioButton("Yes") self.yesInScopeButton.setBounds(10,280,50,30) self.yesInScopeButton.setSelected(True) self.noInScopeButton = JRadioButton("No") self.noInScopeButton.setBounds(70,280,50,30) self.buttonGroup1 = ButtonGroup() self.buttonGroup1.add(self.yesInScopeButton) self.buttonGroup1.add(self.noInScopeButton) self.refreshConfigButton = JButton("Update Configuration", actionPerformed=self.updateConfig) self.refreshConfigButton.setBounds(10,310,200,30) #Jlist to contain the results self.list = JList([]) self.list.setSelectionMode(ListSelectionModel.MULTIPLE_INTERVAL_SELECTION) self.list.setLayoutOrientation(JList.VERTICAL) self.list.setVisibleRowCount(-1) self.listScroller = JScrollPane(self.list,JScrollPane.VERTICAL_SCROLLBAR_AS_NEEDED,JScrollPane.HORIZONTAL_SCROLLBAR_AS_NEEDED) self.listScroller.setBounds(510,40,500,500) #self.listScroller.setPreferredSize(Dimension(400, 500)) self.urlFoundLabel = JLabel("URLs Found") self.urlFoundLabel.setBounds(510,10,130,30) self.listScroller.setPreferredSize(Dimension(500, 100)) self.listScroller.setViewportView(self.list) self.clearListButton = JButton("Clear list", actionPerformed=self.clearList) self.clearListButton.setBounds(350,40,150,30) self.copyListButton = JButton("Copy Selected", actionPerformed=self.copyList) self.copyListButton.setBounds(350,70,150,30) self.deleteListButton = JButton("Delete Selected", actionPerformed=self.deleteSelected) self.deleteListButton.setBounds(350,100,150,30) self.exportListButton = JButton("Export list", actionPerformed=self.exportList) self.exportListButton.setBounds(350,130,150,30) #main panel self.mainpanel = JPanel() self.mainpanel.setLayout(None) self.mainpanel.add(self.verboseLabel) self.mainpanel.add(self.yesVerboseButton) self.mainpanel.add(self.noVerboseButton) self.mainpanel.add(self.spiderPagesLabel) self.mainpanel.add(self.spiderPagesTextField) self.mainpanel.add(self.spiderRecPagesLabel) self.mainpanel.add(self.spiderRecPagesTextField) self.mainpanel.add(self.fileTypeLabel) self.mainpanel.add(self.fileTypeTextField) self.mainpanel.add(self.inScopeLabel) self.mainpanel.add(self.yesInScopeButton) self.mainpanel.add(self.noInScopeButton) self.mainpanel.add(self.refreshConfigButton) self.mainpanel.add(self.urlFoundLabel) self.mainpanel.add(self.listScroller) self.mainpanel.add(self.clearListButton) self.mainpanel.add(self.copyListButton) self.mainpanel.add(self.deleteListButton) self.mainpanel.add(self.exportListButton) callbacks.customizeUiComponent(self.mainpanel) callbacks.addSuiteTab(self) #set default config file name and values #only smart is use, keeping other for future development self._configSmart_Local = False self._configSmart_Smart = True self._configSmart_File = False self._configSmart_Spider = False self._trailingSlash = True #To be fetch from the UI settings self._configSpider_NumberOfPages = 5 self._verbose = False self._ignoreFileType = ["gif","jpg","png","css","js","ico","woff"] #keeping to use it self._configInScope_only = True self._configSpider_NumberOfPages = 5 #Get a logger object for logging into file loggerTemp = Logger(self.EXTENSION_NAME,logging.DEBUG) self._logger= loggerTemp.getLogger() #get the config file, will overwrite default config if the ini file is different #self.getSmartConfiguration() #get config from the UI self.updateConfig("") #words gather on the page from the spidering self._words = {} self._mergedWords = {} #robots.txt list self._robots = {} self._robotsScanned = {} #sitemap.xml list self._sitemap = {} #url in comments self._urlsInComment = {} #domain names to query current url/path/files for hidden items self._smartDomain = {} #sitemap and robots scanned once self._siteRobotScanned = {} #Load our BSB json data self._jsonFile = "data.json" jsonfile = open(self._jsonFile) self._parsed_json = json.load(jsonfile) jsonfile.close() #define the request object to use each time we need to call a URL self._requestor = Requestor(self._logger,self) #Variable to define if unique data has already been grabbed self._smartRequestData = {} self._smartRequestPath = {} self._smartRequestFiles = {} #number of time the spider have run self._spiderRan = {} #Array of domain. If domain exist. Spider did ran! return ''' Graphic Functions ''' def createMenuItems(self, contextMenuInvocation): self._contextMenuData = contextMenuInvocation.getSelectedMessages() menu_list = ArrayList() menu_list.add(JMenuItem("Send to BurpSmartBuster",actionPerformed=self.menuItemClicked)) return menu_list def menuItemClicked(self, event): data = self.getURLdata(self._contextMenuData[0],True) self._logger.info("SMARTREQUEST FOR: "+data.getUrl().toString()) self._logger.debug("Executing: smartRequest() from menuItemClicked") thread = threading.Thread( target=self.smartRequest, name="Thread-smartRequest", args=[data],) thread.start() # Implement ITab def getTabCaption(self): return self.EXTENSION_NAME # Return our panel and button we setup. Components of our extension's tab def getUiComponent(self): return self.mainpanel '''------------------------------------------------ Extension Unloaded ------------------------------------------------''' def extensionUnloaded(self): self._logger.info("Extension was unloaded") return '''------------------------------------------------ VERBOSE FUNCTION Display each tested URL ------------------------------------------------''' def verbose(self,text): #Is verbose on or off from config file? if self._verbose == True: print "[VERBOSE]: "+text return '''------------------------------------------------ GRAPHICAL FUNCTIONS for BUTTONS ------------------------------------------------''' def getRecursiveConfig(self): return int(self.spiderRecPagesTextField.getText()) #refresh the config from the UI def updateConfig(self,meh): self._configSpider_NumberOfPages = int(self.spiderPagesTextField.getText()) if self.yesVerboseButton.isSelected(): self._verbose = True else: self._verbose = False if self.yesInScopeButton.isSelected(): self._configInScope_only = True else: self._configInScope_only = False fileType = [] fileTypeStr = self.fileTypeTextField.getText() self._ignoreFileType = self.fileTypeTextField.getText().split(",") self._logger.info("Config changed: " + "spiderNbrPages=" + str(self._configSpider_NumberOfPages) + ", Verbose is:" + str(self._verbose) + ", InScope is:" + str(self._configInScope_only) + ", fileTypeIgnored: " + str(self._ignoreFileType)) print "Now using config: " + "spiderNbrPages=" + str(self._configSpider_NumberOfPages) + ", Verbose is:" + str(self._verbose) + ", InScope is:" + str(self._configInScope_only) + ", fileTypeIgnored: " + str(self._ignoreFileType) return #add a URL to the list def addURL(self,url): list = self.getListData() list.append(url) self.list.setListData(list) return #return the who list def getListData(self): list = [] for i in range(0, self.list.getModel().getSize()): list.append(self.list.getModel().getElementAt(i)) return list #Clear the list def clearList(self,meh): self.list.setListData([]) return #Copy to clipboard def copyList(self,meh): clipboard = Toolkit.getDefaultToolkit().getSystemClipboard() list = self.getListData() selected = self.list.getSelectedIndices().tolist() copied = "" urls = "" for i in selected: url = str(list[i]).split(',')[0] urls = urls+str(url)+"\n" clipboard.setContents(StringSelection(urls), None) return #Delete selected item from the list def deleteSelected(self,meh): x = self.list.getSelectedIndices().tolist() list = self.getListData() for i in reversed(x): del list[i] self.list.setListData(list) return #TODO: save as the list def exportList(self,meh): fd = JFileChooser() dialog = fd.showDialog(self.mainpanel, "Save List As") dataList = self.getListData() urls = "" if dialog == JFileChooser.APPROVE_OPTION: file = fd.getSelectedFile() path = file.getCanonicalPath() try: with open(path, 'w') as exportFile: for item in dataList: url = str(item).split(',')[0] exportFile.write(url+"\n") except IOError as e: print "Error exporting list: " + str(e) self._logger.debug("Error exporting list to: " + path + ", Error: " + str(e)) return '''------------------------------------------------------------------------------------------------ MAIN FUNCTION / WHERE EVERYTHING STARTS For every request which isn't created from the Extender(this might have to be change) The request is analyse and related to the config options new request are create to test if specific files/paths/directories exists. ------------------------------------------------------------------------------------------------''' def processHttpMessage(self, toolFlag, messageIsRequest, messageInfo): #IHttpRequestResponse message info #TODO: not from repeater and intruder --> set in ini file too! --> and toolFlag != self._callbacks.TOOL_EXTENDER #This is required to not LOOP Forever as our plugin generate requests! if toolFlag == self._callbacks.TOOL_PROXY and toolFlag != self._callbacks.TOOL_EXTENDER and toolFlag != self._callbacks.TOOL_SCANNER: #Get an Urldata object to use later data = self.getURLdata(messageInfo,messageIsRequest) #VERIFICATION: if URL is in scope we do scan if not self._callbacks.isInScope(data.getUrl()): #self._callbacks.includeInScope(url) self._logger.info("URL not in scope: " + data.getUrl().toString()) return if messageIsRequest: self._logger.debug("Entering: processHttpMessage() REQUEST") self._logger.debug("Request from domain: "+data.getDomain()) #REJECT specific extension on request if data.getFileExt() in self._ignoreFileType: self._logger.info("FILETYPE IGNORED: " + data.getUrl().toString()) return ############################################### # Decide which mode to use based on ini config ############################################### #from browsed file only if self._configSmart_Smart: self._logger.info("SMARTREQUEST FOR: "+data.getUrl().toString()) self._logger.debug("Executing: smartRequest()") thread = threading.Thread( target=self.smartRequest, name="Thread-smartRequest", args=[data], ) thread.start() thread.join() #wordlist adjust with the domain name elif self._configSmart_Local: self._logger.debug("Executing: localRequest()") self.localRequest(data) #your own wordlist, no smart here elif self._configSmart_File: self._logger.debug("Executing: fileRequest()") self.fileRequest(data) #spidered items only. Like smart but it browse for you. elif self._configSmart_Spider: self._logger.debug("Executing: spiderRequest()") self.spiderRequest(data) else: #if response self._logger.debug("Entering: processHttpMessage() RESPONSE") ############################################### # Decide which mode to use based on ini config ############################################### #VERIFICATION: if URL is in scope we do scan #if not self._callbacks.isInScope(data.getUrl()): # #self._callbacks.includeInScope(url) # self._logger.info("URL %s not in scope: " % data.getUrl()) # return #from browsed file only #TODO: sniff JS and CSS file for URLS #if self._configSmart_Smart: self._logger.debug("Executing: getUrlInComments()") thread = threading.Thread( target=self.getUrlInComments, name="Thread-getUrlInComments", args=[data], ) thread.start() thread.join() return '''---------------------------------------------------------------------------------------------------------- BurpSmartBuster main class (BurpExtender) Only spidering to gather the more page and test those ----------------------------------------------------------------------------------------------------------''' def spiderRequest(self, data): return '''---------------------------------------------------------------------------------------------------------- Use BSB files on all visited page ----------------------------------------------------------------------------------------------------------''' def localRequest(self, data): return '''---------------------------------------------------------------------------------------------------------- Use user supply file on all visited page ----------------------------------------------------------------------------------------------------------''' def fileRequest(self, data): return '''---------------------------------------------------------------------------------------------------------- Use the logic, based on the BSB files and data from the website This is where all the magic happens. We want to : - Call some file extension for the file we browsed to -TODO: Get a huge list - Extension - User file, windows, linux, osx - Call some path when browsing a new path (even when it is a file) - default path list - Call some files when browsing a new path - user files windows, osx, linux - backup list - autosave list - svn, git list - CMS - Web server, etc. - Get robots.txt and sitemap data - Brute force up to 2 or 3 letters of files names and path on all found path which is not cms/git/etc. - Future version: Parse HTML comments for path - If they exist, we add them to XXX? - If new path exists, let's go recursive (new class?) - If file exists: add to sitemap + verbose + log @param data: UrlData object containing all information about the URL ----------------------------------------------------------------------------------------------------------''' def smartRequest(self,data): #Current request variables domain = data.getDomain() url = data.getUrl() ##################### FETCH DATA ############################### # Gather smart data once before sending requests ################################################################ self._logger.debug("Has the Data been gathered for? : "+ str(url)) if domain not in self._smartRequestData: try: self._smartRequestData[domain] = True self._logger.debug("no") self._logger.info("Fetching data for: "+ domain) print "getting data for:" + str(url) self.getSmartData(data) except Exception as e: print "exception:"+ e self._smartRequestData[domain] = False return False else: self._logger.debug("yes") # Execution of request with the received data: # - spider # - sitemap # - robots # - current directories # - commentsInUrl # json data: # - extension files # - common basic cms files # - common server files # - common user files # - common test files # - common repositories files # - # - ''' For the current directories (path) - Test a path/file for a category of path/files - If a tested path/files exist (200/401/403/500) scan other files + - add to sitemap and LOG + add issues? - If not skip it - go 3 deep max and retest all TODO future version: Pseudo algo: Si le present url est un fichier: - Si c'Est un fichier php... tester phps extension. - si c'Est un fichier asmx, tester les wsdl Si c'Est un path: - si ca inclus un path dans sharepoint, tester les sharepoints - si ca inclus un fichier de wordpress ou drupal, tester quelques fichiers cms - Si on trouve un répertoire de type X, effectuer une recherche sur les fichiers de type X dans le repertoire trouvé ''' #Current request data baseUrl = data.getBaseUrl() path = data.getPath() filename = data.getFilename() extension = data.getFileExt() print "CURRENT FILE: " + baseUrl + "," + filename + "," + extension #data.json sections: extensions, fileprefix, filesuffix, files, directories #test local file #if current url is a file: test extentions + intelligent details #AND we test current file with prefix and suffix #testing directories #if current URL have some directories test them out #Test them with FILES and DIRECTORIES. Including the current directory (last in path) #with the smart data test robots path and files #test N url from sitemap #in current paths test files and path using domainname and domain without the tld #with filename generated + extensions and path/filenamegenerated ''' print "EXTENSIONS" for extension in self._parsed_json["extensions"]: print extension["name"] print "SUFFIX PREFIX" for prefix in self._parsed_json["fileprefix"]: print prefix["name"] for suffix in self._parsed_json["filesuffix"]: print suffix["name"] print "FILES" for files in self._parsed_json["files"]: print files["name"] ''' print "DIRECTORIES" #Directories data information directories = data.getDirectories() directory = "/" slash = "" #force slash or not var #get options foir trailing slash. By default it's ON if self._trailingSlash: slash = "/" ##################### EXECUTE DATA.json REQUESTS ################### # Build Request to be execute based on our data.json # and getSmartData results ################################################################ #TODO: important put tested directories and files in a dictionnary or array #TODO: important put tested directories and files in a dictionnary or array #TODO: important put tested directories and files in a dictionnary or array #TODO: important put tested directories and files in a dictionnary or arrayà ######################## # Technology scanner ######################## ''' - do a request to root dir - get response (check for redirect) - check headers - check file extensions - depending on results scan X files. - Set current domain technologyVar to X ''' ################ #Scan the root directory! ################ print "DIR: "+str(directories) if not directories: directories = ["/"] # response will be dealed in requestor for dir in directories: print "TESTING: " + dir if dir == "/": directory = "/" else: directory = directory+dir+"/" #test all directories: / /a/ /a/b/ /a/b/c/ ... #call our directories inside all request directires for dir2 in self._parsed_json["directories"]: self.verbose("RequestDir for: "+baseUrl+directory+dir2["name"]+slash) self._requestor.addRequest(baseUrl+directory+dir2["name"]+slash,data) # call directories based on domain information: url/a/b/c/smartDomain , url/a/b/smartDomain/, etc. #print "SMARTDOMAIN"+self._smartDomain for dir2 in self._smartDomain[domain]: self.verbose("RequestSmartDomain for: " + baseUrl + directory + dir2) self._requestor.addRequest(baseUrl + directory + dir2,data) #in each directory call smartDomain.extensions for ext in self._parsed_json["extensions"]: self.verbose("RequestSmartDomain.ext for: " + baseUrl + directory + dir2 + ext["name"]) self._requestor.addRequest(baseUrl + directory + dir2 + ext["name"],data) #call our files in all directories #print "parsed json"+self._parsed_json["files"] for files in self._parsed_json["files"]: self.verbose("RequestFile for: "+baseUrl+directory+files["name"]) self._requestor.addRequest(baseUrl+directory+files["name"],data) ################ #If URL is a file, let's try to add some extension to the file ################ if extension: #replace current file extension for our extension tempFilenameUrl = baseUrl+directory+filename tempFilenameUrl1 = baseUrl+directory+filename+"."+extension for ext in self._parsed_json["extensions"]: self.verbose("RequestExt for: "+ tempFilenameUrl+ext["name"]) self.verbose("RequestFileExt for: "+ tempFilenameUrl1+ext["name"]) self._requestor.addRequest(tempFilenameUrl+ext["name"],data) self._requestor.addRequest(tempFilenameUrl1+ext["name"],data) #add a prefix to current file tempFilenameUrl = baseUrl+directory for prefix in self._parsed_json["fileprefix"]: tempFilenameUrl1 = tempFilenameUrl+prefix["name"]+filename+"."+extension self.verbose("RequestPrefix for: "+tempFilenameUrl1) self._requestor.addRequest(tempFilenameUrl1,data) #add suffix to current file tempFilenameUrl = baseUrl+directory for suffix in self._parsed_json["filesuffix"]: tempFilenameUrl1 = tempFilenameUrl+filename+suffix["name"]+"."+extension self.verbose("RequestSuffix for: "+tempFilenameUrl1) self._requestor.addRequest(tempFilenameUrl1,data) #make sure we have some data #print "DATA RECEIVED" #print self._words[domain] #print self._mergedWords ##need to call the emrge function if needed #print self._robots[domain] #print str(len(self._sitemap[domain])) #print str(self._urlsInComment[domain]) ##################### EXECUTE SMART REQUESTS ################### # Build Request to be execute based on our data.json # and getSmartData results ################################################################ #list of smart directories smartDirectories = {} #list of smart files (add our extension to it) smartfiles = {} ################ #Request N pages from sitemap ################ if domain not in self._siteRobotScanned: #Do it once self._siteRobotScanned[domain] = True #done for this domain tmpSiteMap = [] for i in range(0,self._configSpider_NumberOfPages): #get N number of pages from ini config tmpSiteMap.append(self._sitemap[domain][i]) #Requests files and directories from robots.txt tmpRobots = [] for line in self._robots[domain]: #in case robots.txt use ending wildcard we remove it if line.endswith("*"): line = line[:-1] #TODO: Test if directory or file is not 404 ?? tmpRobots.append(baseUrl+line) ################ # requests all value for N sitemap url ################ for link in tmpSiteMap: if link.endswith("/"): #scan directories and files for dir2 in self._parsed_json["directories"]: self.verbose("RequestSiteMap dir/file for: " + link + dir2["name"] + slash) self._requestor.addRequest(link + dir2["name"] + slash,data) for files in self._parsed_json["files"]: self.verbose("RequestSiteMap dir/file for: " + link + files["name"]) self._requestor.addRequest(link + files["name"],data) else: #scan extensions and suffix/prefix # call our files in all directories for ext in self._parsed_json["extensions"]: self.verbose("RequestSitemap file/ext/ext for: " + link + ext["name"]) self._requestor.addRequest(link + ext["name"],data) #Get the file extension of the current sitemap url to replace the extension tmpUrl = urlparse(link) if len(tmpUrl.path.split(".")[-1:]) > 1: newUrl = ".".join(tmpUrl.path.split(".")[:-1])+ext["name"] self.verbose("RequestSiteMap file/ext for: " + newUrl) self._requestor.addRequest(newUrl,data) ################ #requests all values for robots path ################ for link in tmpRobots: tmpUrl = baseUrl + link if link.endswith("/"): # scan directories and files for dir2 in self._parsed_json["directories"]: self.verbose("RequestRobots dir/file for: " + tmpUrl + dir2["name"] + slash) self._requestor.addRequest(tmpUrl + dir2["name"] + slash,data) for files in self._parsed_json["files"]: self.verbose("RequestRobots dir/file for: " + tmpUrl + files["name"]) self._requestor.addRequest(tmpUrl + files["name"],data) else: for ext in self._parsed_json["extensions"]: self.verbose("RequestRobots file/ext/ext for: " + tmpUrl + ext["name"]) self._requestor.addRequest(tmpUrl + ext["name"],data) #Get the file extension of the current sitemap url to replace the extension tmpUrl1 = urlparse(link) if len(tmpUrl1.path.split(".")[-1:]) > 1: newUrl = ".".join(tmpUrl1.path.split(".")[:-1])+ext["name"] self.verbose("RequestRobots file/ext for: " + newUrl) self._requestor.addRequest(newUrl,data) #TODO : path and words/merge words ################ #Request from words ################ #print self._words #TODO: loop over: sitemap (done), robots (done), words/mergedwords(fixed for textblob required), bruteforce(later) Maybe comments data? # - add the data to our stack to request and parse by the Requestor object # - Get current query path and files & Filter out static object from the request (images,etc.) #filter out: gif,jpg,png,css,ico print "Done. Waiting for more URL...!" '''---------------------------------------------------------------------------------------------------------- Get the data for smartRequest(), it will fills our list of words which will be our smart logic data to create multiple new HTTP requests. This data should be gather once. ----------------------------------------------------------------------------------------------------------''' #TODO: split some of this works in different functions def getSmartData(self, data): ################################################################ # Get the url and its data to create the new smart requests ################################################################ urlString = str(data.getUrl()) #cast to cast to stop the TYPEerror on URL() domain = data.getDomain() netloc = data.getNetloc() directories = data.getDirectories() lastDirectory = data.getLastDirectory() params = data.getParams() fileExt = data.getFileExt() completeUrl = data.getCompleteURL() baseUrl = data.getBaseUrl() #Java URL to be used with Burp API url = URL(urlString) self._logger.debug("Current URLString: "+urlString) ######################### SPIDER EXECUTION ##################### # Get some words from the web page: do it once! # Note: This step could be threaded using Queue.Queue but there is # little advantage as we need to wait to get all the value anyway ################################################################ self._logger.debug("Has the Spider ran for? : "+ domain) if domain not in self._spiderRan: #doing it once self._spiderRan[domain] = True self._logger.debug("No") #self._mergedWords[domain] = {} #self._words[domain] = {} #Start URL, number of page to spider through, request class object to use spider = Spider(data, self._configSpider_NumberOfPages, self._requestor,self._logger) spider.runSpidering() #Get words from the spidering self._words[domain] = spider.getWords() #Get merged words #spider.mergeWords() #self._mergedWords[domain] = spider.getMergedWords() self._logger.debug("Length of Words: "+ str(len(self._words[domain]))) #self._logger.debug("Length of MergedWords: "+ str(len(self._mergedWords[domain]))) self._logger.info("SPIDER DONE") else: self._logger.debug("Yes") ################################################################ # Get robots.txt (once) # Retrieve unique path and files from the robots.txt ################################################################ if domain not in self._robots: #do it once print " robot " #get the file queueRobot = Queue.Queue(1) self._logger.info("robot") thread = threading.Thread( target=self._requestor.runRequest, name="Thread-Robots", args=[baseUrl+"/robots.txt", queueRobot], ) thread.start() thread.join() response = queueRobot.get() #Parse the file for disallow lines robotList = [] for item in response.content.split('\n'): if item: i = item.split(':') if i[0].lower() == "disallow" and i[1] not in robotList: robotList.append(i[1]) #add to domain list self._robots[domain] = robotList self._logger.debug("ROBOT LIST for : " + domain + ":") for item in self._robots[domain]: self._logger.debug(item) self._logger.info("ROBOTS DONE") else: print "no robot" self._logger.debug("Robots.txt already checked for: " + baseUrl) ################################################################ # Get sitemap.xml (once) # test those url for all files/extensions if not in local deque yet ################################################################ if domain not in self._sitemap: print " sitemap " queueSitemap = Queue.Queue(1) thread = threading.Thread( target=self._requestor.runRequest, name="Thread-Sitemap", args=[baseUrl+"/sitemap.xml", queueSitemap], ) thread.start() thread.join() response = queueSitemap.get() soup = BeautifulSoup(response.content, "html.parser") #Parse the XML TODO: for N instance related to .ini config sitemapList = [] for url in soup.findAll("loc"): sitemapList.append(url.text) self._sitemap[domain] = sitemapList self._logger.debug("Sitemap.xml nbr of items: "+str(len(self._sitemap[domain]))) self._logger.info("SITEMAP DONE") else: print "no sitemap" ################################################################ # Get domain name relative values # test those names for directory, files with extension ################################################################ print "smartDomain" tmpDomValue = [] if domain == "localhost": tmpDomValue.append(domain) else: tld = get_tld(urlString, as_object=True) tmpDomValue.append(tld.domain) tmpDomValue.append(tld.tld) if tld.subdomain: tmpDomValue.append("".join(tld.subdomain+"." + tld.tld)) self._smartDomain[domain] = tmpDomValue ######################## BRUTE FORCE DATA ###################### # 1, 2 or 3 letters brute force of current directory # Has the current directory been test already? No: do it #brute force function or object? ################################################################ #TODO: Later version #charset = "abcdefghijklmnopqrstuvwxyz0123456789_-" #for a in itertools.product(charset,repeat=2): # sub="".join(a) return True '''---------------------------------------------------------------------------------------------------------- Get the information inside response for smartRequest() It will look for URL and email domain inside HTML comments @todo: Optimize the IFs in the comment for loop! ----------------------------------------------------------------------------------------------------------''' def getUrlInComments(self,data): ################### CURRENT DIRECTORIES/FILES ################## # Get current directory(ies) # validate if tested already # If not deal with: test directories and files at currentPath # New class object? ################################################################ responseData = data.getResponseData() #TODO: Parse HTML files for comments for Path and file #if you have a response if responseData: soup = BeautifulSoup(responseData, "html.parser") comments=soup.find_all(string=lambda text:isinstance(text,Comment)) regUrl = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))" regEmail = r"[\w\.-]+@[\w\.-]+" urlsInComments = [] emailsInComments= [] urlsInComment = [] emailsInComment = [] for comment in comments: #get urls urlsComments = re.findall(regUrl,comment) #parse url, does the domain the same as our current domain? if urlsComments: for url in urlsComments[0]: if url: #Get URLs tempData = urlparse(url) domainInUrlTemp = '{uri.netloc}'.format(uri=tempData).split('.') domainInUrl = ".".join(domainInUrlTemp) #TODO: url will need to be verify if in scope when we call it : keep the URL path/file for scan urlsInComment = re.findall(regUrl,comment) urlsInComments.append(urlsInComment) #get emails emailsInComment = re.findall(regEmail, comment) emailsInComments.append(emailsInComment) self._logger.debug("url in comments and email in comments:") #get list only if urlsInComments and urlsInComments[0]: if type(urlsInComment[0]) is tuple: self._urlsInComment[data.getDomain] = urlsInComment[0] #TODO: use email in another version? if emailsInComments and emailsInComments[0]: if type(emailsInComments[0]) is tuple: emailsInComments = emailsInComments[0] self._logger.debug(urlsInComments) self._logger.debug(emailsInComments) self._logger.info("COMMENTS DONE") #TODO: finish these function to gather the information from the data.json ''' Function which is accessing smart list of Path to look into by the smart request function ''' def getSmartListPath(self): return ''' Function which is accessing smart list of file extension to look into by the smart request function ''' def getSmartListExt(self): return ''' Function which is accessing smart list of directories to look into by the smart request function ''' def getSmartDirectories(self): return ''' Function which is accessing smart list of files to look into by the smart request function ''' def getSmartFiles(self): return ''' This functions split all informations of the URL for further use in the smartRequest function @param messageInfo: last request executed with all its information ''' def getURLdata(self,messageInfo,messageIsRequest): analyzedRequest = self._helpers.analyzeRequest(messageInfo) url = analyzedRequest.getUrl() self._logger.debug(url) parsed = urlparse(url.toString()) '''debug info print 'scheme :', parsed.scheme print 'netloc :', parsed.netloc print 'path :', parsed.path print 'params :', parsed.params print 'query :', parsed.query print 'fragment:', parsed.fragment print 'username:', parsed.username print 'password:', parsed.password print 'hostname:', parsed.hostname, '(netloc in lower case)' print 'port :', parsed.port ''' #Is there any parameters? params = analyzedRequest.getParameters() for p in params: self._logger.debug("Query var: "+p.getName()) self._logger.debug("Query value: "+p.getValue()) #getURL, needs to be a string before parsing it with urlparse completeURL = url.toString() self._logger.debug("Complete URL: "+completeURL) #URL sans port/dir/params baseURL = messageInfo.getHttpService().toString() self._logger.debug("Base URL: "+baseURL) #Get path including directories and file extension path = urlparse(completeURL).path.encode("utf-8") filename = path.split('/')[-1:].pop().split('.')[:1].pop() fileExt = path.split('.')[1:] fileExt = "".join(fileExt) directories = path.split('/')[1:-1] directory = "/".join(directories) if len(fileExt) > 0: self._logger.debug("Directories: "+str(directories)[1:-1]) self._logger.debug("Directory: "+directory) self._logger.debug("File Extension: "+fileExt) self._logger.debug("URL Path: "+path) self._logger.debug("Filename: "+filename) else: self._logger.debug("No file Extension, directory is: "+path) #Get domain and netloc netloc = parsed.netloc.encode("utf-8") domain = netloc.split(':')[0] self._logger.debug("Domain/: "+domain) ''' print "Complete URL: "+completeURL print "Domain: "+domain print "Netloc: "+ netloc print "Query value: "+p.getValue() print "Query var: "+p.getName() print "Directories: "+str(directories)[1:-1] print "Directories2: "+str(directories) print "Directory: "+directory print "File Extension: "+fileExt print "URL Path: "+path print "Filename: "+filename print "Base URL: "+baseURL ''' responseData = "" if not messageIsRequest: #when it's a response, get the response data content = messageInfo.getResponse() response = self._helpers.analyzeResponse(content) responseData = self._helpers.bytesToString(content[response.getBodyOffset():]) #data = UrlData("",headers,"","","","","","",responseData,self._logger) data = UrlData(url,domain,netloc,directories,params,filename,fileExt,baseURL,completeURL,path,responseData,self._logger) return data # This method is called when multiple issues are reported for the same URL # In this case we are checking if the issue detail is different, as the # issues from our scans include affected parameters/values in the detail, # which we will want to report as unique issue instances def consolidateDuplicateIssues(self, existingIssue, newIssue): if (existingIssue.getIssueDetail() == newIssue.getIssueDetail()): return -1 else: return 0 #Have to be implemented def doPassiveScan(self, baseRequestResponse): pass #Have to be implemented def doActiveScan(self, baseRequestResponse, insertionPoint): pass ''' Multithreaded class to execute queries out of the Queue.Queue Also get the response and validate the 404 type ''' class RequestorWorker(threading.Thread): def __init__(self, threadID, name, queue, error404, logger, requestor, UI, recursiveURLs): #Sahred Queue between Thread Workers self._id = threadID self._name = name self._queue = queue #request queue received from the Requestor self._threadLock = threading.Lock() self._alive = True threading.Thread.__init__(self) self.daemon = True #self._responseQueue = responseQueue self._error404 = error404 self._logger = logger self._requestor = requestor self._ui = UI self._recursiveURLs = recursiveURLs self._acceptedCode = (200,400,401,403,500) #TODO: Set a randomizer of user-agent and add the option in .ini file self._headers = { 'User-Agent': 'Mozilla/5.0' } return ''' Return type of 404 for requested domain @param domain: domain to fetch error 404 type ''' def _getError404(self,url): #Get domain and netloc parsed = urlparse(url) netloc = parsed.netloc.encode("utf-8") domain = netloc.split(':')[0] return self._error404[domain] def run(self): while(self._alive): #waiting for queue #print "Waiting for queue: "+self._name url = self._queue.get() #print "TASK RECEIVED: " + url + " From: " + self._name self._logger.debug(self._name+" requesting(URL): " + url) self._logger.info(self._name+" requesting(URL): " + url) #print "[Requesting] " + url #TODO: randomizedUserAgent #TODO: - 302 (redirect) --> parse the redirect URL (in scope ok, in sitemap stop, not in site map add to queue : 200+window.location or JS isn't catch yet response = requests.get(url, headers=self._headers, allow_redirects=False) if response.status_code in self._acceptedCode: #add no false positive to site map code = self._getError404(url) print "[URL EXISTS](Response: " +str(response.status_code)+ ") | 404 type:" + str(code) +" | FOR URL: "+ str(url) #False positive logic. #TODO: can be update or upgraded for sure! :) fp = "" ''' si 404 si response 200 ok si response 401 si response 403 si response 300 si response 500 si 403 si response 200 si response 401 fp si response 403 fp si response 300 si response 500 fp si 500 si response 200 si response 401 si response 403 si response 300 si response 500 fp si intext si response 200 need reverification fp si response 401 si response 403 si response 300 si response 500 si 300 si response 200 si response 401 si response 403 si response 300 fp si response 500 ''' #if the current request is a 403 and the 404 page isn't a 403 page, should be false positive if response.status_code == 403 and code != 403: fp = " ,False Positive" #if current response is a 200 and the 404 page was inside a 200 code page, it can be a false positive elif response.status_code == 200 and code == "404 in page": fp = " ,False Positive" #if 404 page is inside a 200 response code, a 300 redirect page or a 403, many possible false positive elif code == "404 in page" or code == 300 or code == 403: fp = " ,Possible False Positive" #code is 200 or whatnot else: #TODO: define all directory in a list and add to the recursive list+validate latest directory of current url to see if it is in list, if not add it print 200 #if it's a direct directory, let's recurse... if not recurse too much already! #if urlparse(url).path[-1] == '/' and self._recursiveURLs.get(str(url), 0) <= self._ui.getRecursiveConfig(): # self._recursiveURLs[str(url)] = self._recursiveURLs.get(str(url), 0) + 1 #adjust the recursed level for that directory # self._requestor.runRequest(url,Queue.Queue(1)) #add code to the Jlist here print url self._ui.addURL(url + " , ("+str(response.status_code)+")" + fp) #TODO: add page to SiteMap if not there already? #TODO: issue = SmartBusterIssue() #might need to parse the url into data for the issue? #issue=ScanIssue(baseRequestResponse.getHttpService(), self._helpers.analyzeRequest(baseRequestResponse).getUrl(), httpmsgs, ISSUE_NAME, ISSUE_DETAIL, SEVERITY, CONFIDENCE, REMEDIATION_DETAIL, ISSUE_BACKGROUND, REMEDIATION_BACKGROUND) #self._callbacks.addScanIssue(issue) '''---------------------------------------------------------------------------------------------------------------------------------------- Class to hold the Request data - Using Requests API we use a Queue to append HTTP requests to be executed. - If the requests return a 200/401/403/500 we add them to the sitemap and add them to our list of URL/Dir/file found - Can save data found to csv ----------------------------------------------------------------------------------------------------------------------------------------''' class Requestor(): ''' Initialize ''' def __init__(self,logger,UI): #Queue to hold URL to request #Each item will be a URL string str(URL) self._requestQueue = Queue.Queue(0) self._logger = logger #hold type of 404 error by domain self._error404 = {} #hold url that are being recursive self._recursiveURLs = [] #Queue to hold URL and their response code #Each item will be a list (url,code) #self._responseQueue = deque() #TODO: Set a randomizer of user-agent and add the option in .ini file self._headers = { 'User-Agent': 'Mozilla/5.0' } self._logger.debug("Requestor object created") threads = [] #list containing threads #1 thread needed for infofestival. Don't know how to split the pages between workers for i in range(0,40):#TODO: Set a number of thread in UI t = RequestorWorker(i,"RequestorWorker-"+str(i),self._requestQueue,self._error404, logger, self, UI, self._recursiveURLs) threads.append(t) t.start() return ''' Add a request to the queue to be execute by a thread worker (RequestorWorker) @param url: the URL to get a response from ''' def addRequest(self,url,data): #print "ADDING: "+ url #get the 404 details for the current domain self._define404(data) self._requestQueue.put(url) ##see if we can put the type404 inside the queue along with the url return ''' Define 404 type of the current domain ''' def _define404(self,data): domain = data.getDomain() #only do once per domain if domain not in self._error404: code = 404 errorQueue = Queue.Queue(0) #get a 404 page m = hashlib.md5() m.update(str(random.random())) url = data.getBaseUrl()+"/"+m.hexdigest() print url self.runRequest(url,errorQueue) response = errorQueue.get() #if website use standard 404 error, everything is good if response.status_code == 404: code = 404 #if website used a 3xx code if 310 - response.status_code < 11 and 310 - response.status_code > 0: code = 300 if response.status_code == 403: code = 403 #if website use a 5xx code if 510 - response.status_code < 11 and 510 - response.status_code > 0: code = 500 #if website use a 200 if response.status_code == 200: soup = BeautifulSoup(response.content, "html.parser") ################################ #TODO: more use case to add ################################ if soup.findAll(text=re.compile("page not found")): code = "404 in page" elif soup.findAll(text=re.compile("404")): code = "404 in page" elif soup.findAll(text=re.compile("page does not exist")): code = "404 in page" elif soup.findAll(text=re.compile("error 404")): code = "404 in page" #define which code is refer to a 404 self._error404[domain] = code return ''' Run a NON DELAYED (no thread workers) request and save the url:response code to the response deque class variable @param url: the URL to request and get a response @param responseQueue: thread safe queue to send the response back to the spider or other objects ''' def runRequest(self,url,responseQueue): #TODO: After thread is done, in thread read the _requestQeue object self._logger.debug("runRequest(URL): "+url) self._logger.info("EXECUTING REQUEST FOR: "+url) response = requests.get(url, headers=self._headers, allow_redirects=False) responseQueue.put(response) #TODO: Get code #TODO: add page to SiteMap if not there already? self._logger.debug("runRequest done for: "+url) return #TODO randomizedUserAgent def randomizedUserAgent(self): return '''---------------------------------------------------------------------------------------------------------------------------------------- Class to hold the Spidering data - Based on: http://www.netinstructions.com/how-to-make-a-web-crawler-in-under-50-lines-of-python-code/ Uses BeautifulSoup, require to download/install it. ----------------------------------------------------------------------------------------------------------------------------------------''' class Spider(): ''' Initialize @param startUrl: the URL to start the spidering ''' def __init__(self, data, maxPages, requestObj, logger): self._data = data self._words = [] self._mergedWords = [] self._maxPages = int(maxPages) self._requestor = requestObj self._queue = Queue.Queue(self._maxPages) self._domain = data.getDomain() self._logger = logger self._logger.debug("Spider object created") ''' Run the spidering @return: list of all words found @todo: use TextBlob for other language, right now mostly only english based words will be categorized correctly. ''' def runSpidering(self): urlString = str(self._data.getUrl()) url = URL(urlString) print "Spider, URL: " + urlString #Get the words from the URL, starting with the startUrl link_list = [urlString] #Counter pagesVisited = 0 self._logger.debug("Max pages to visit: " + str(self._maxPages)) while int(pagesVisited) < int(self._maxPages): self._logger.debug("Nbr Page Visited: " + str(pagesVisited) + " / " + str(self._maxPages)) self._logger.debug("Visiting: " + link_list[pagesVisited]) visitingUrl = link_list[pagesVisited] pagesVisited = pagesVisited+1 print "Visiting URL: "+visitingUrl try: #??? Fix the url retrieve. #If it starts with / we add the domain to it if self._domain not in visitingUrl: if visitingUrl.startswith("/"): visitingUrl = visitingUrl[1:] #TODO: startswith /# link_list[pagesVisited] = self._data.getCompleteURL() + visitingUrl visitingUrl = link_list[pagesVisited] #send an asynchronus HTTP request and wait for the response thread = threading.Thread( target=self._requestor.runRequest, name="Thread-Spider", args=[visitingUrl, self._queue], ) thread.start() thread.join() response = self._queue.get() self._logger.debug("Response received from: "+visitingUrl) #Get the soup soup = BeautifulSoup(response.content, "html.parser") #Get the visible text [s.extract() for s in soup(['style', 'script', '[document]', 'head', 'title'])] visible_texts = soup.getText()#.encode('utf-8').strip() #Get the text blob blob = TextBlob(visible_texts) #Get the words : TODO: add the 1000 value in the bsb.ini? if len(blob.words) <= 1000: #merging 2 words and up to 1000 (cpu intensivity) for words,tag in blob.tags: #Get only noun and numbers if tag.startswith("NN") or tag == "CD": self._words.append(words) self._logger.debug("Size of WORDS: " + str(len(self._words))) #Get the links for next pages or stop aSoup = soup.findAll("a") if len(aSoup) > 0: for i in aSoup: #Do not use previous page, index or anchors if not i['href'].startswith("#") and not i['href'] == "/" and not i['href'] in i and not i['href'].startswith("/#") and not i['href'].startswith("//"): link_list.append(i['href']) else: self._logger.debug("No words on: "+visitingUrl) break except KeyError: self._logger.error("SpiderError: KeyError") pass except requests.exceptions.RequestException as e: self._logger.error("SpiderError: "+e.reason) pass return self._words ''' Merge the obtained words from the spidering @return: List of all words mixed with each others ''' def mergeWords(self): if len(self._words) > 1: #original list of words that we want to mix listOriginal = self._words #merging all words together for words in listOriginal: for wordsToMerge in listOriginal: self._mergedWords.append(words+wordsToMerge) return True else: return False ''' @return: List of all words mixed with each others Note: The return words needs to be convert to utf-8 ''' def getMergedWords(self): return self._mergedWords ''' @return: List of all words Note: The return words needs to be convert to utf-8 ''' def getWords(self): return self._words '''---------------------------------------------------------------------------------------------------------------------------------------- Class to share community data to annonimized server ----------------------------------------------------------------------------------------------------------------------------------------''' class technologyScanner(): def __init__(self, optIn, logger): self._optIn = optIn self._logger = logger self._logger.debug("CommunityData Object Created") return '''---------------------------------------------------------------------------------------------------------------------------------------- Class to share community data to annonimized server ----------------------------------------------------------------------------------------------------------------------------------------''' class communityData(): def __init__(self, optIn, logger): self._optIn = optIn self._logger = logger self._logger.debug("CommunityData Object Created") return def submitData(self,fileName,isFile): if self._optIn: #prepare the request to submit to the server if isFile: print "Data is a file" #data to sent is a file else: print "data is a directory" #data to sent is a directory #contact the server print "contacting the server with data: " + fileName return '''---------------------------------------------------------------------------------------------------------------------------------------- Class to hold the URL data in separated parts ----------------------------------------------------------------------------------------------------------------------------------------''' class UrlData(): def __init__(self,url,domain,netloc,directories,params,filename, fileExt,baseURL,completeURL,path,responseData,logger): self._url = url self._domain = domain self._netloc = netloc self._directories = directories self._params = params self._fileExt = fileExt self._baseURL = baseURL self._completeURL = completeURL self._responseData = responseData self._logger = logger self._path = path self._filename = filename self._logger.debug("UrlData object created") return def getPath(self): return self._path def getFilename(self): return self._filename def getResponseHeaders(self): if not self._url: return self._domain def getResponseData(self): return self._responseData def getBaseUrl(self): return self._baseURL def getCompleteURL(self): return self._completeURL def getUrl(self): return self._url def getDomain(self): return self._domain def getNetloc(self): return self._netloc def getDirectories(self): return self._directories def getLastDirectory(self): if len(self._directories) > 0: return self._directories[len(self._directories)-1] else: return "" def getParams(self): return self._params def getFileExt(self): return self._fileExt '--------------------------------------------------------------------' '''-------------------------------------------------------------------- Class to hold the Issues found @TODO: see for Sitemap instead of issue or WITh issues --------------------------------------------------------------------''' class SmartBusterIssue(IScanIssue): '''This is our custom IScanIssue class implementation.''' def __init__(self, httpService, url, httpMessages, issueName, issueDetail, severity, confidence, remediationDetail, issueBackground, remediationBackground): self._issueName = issueName self._httpService = httpService self._url = url self._httpMessages = httpMessages self._issueDetail = issueDetail self._severity = severity self._confidence = confidence self._remediationDetail = remediationDetail self._issueBackground = issueBackground self._remediationBackground = remediationBackground def getConfidence(self): return self._confidence def getHttpMessages(self): return self._httpMessages #return None def getHttpService(self): return self._httpService def getIssueBackground(self): return self._issueBackground def getIssueDetail(self): return self._issueDetail def getIssueName(self): return self._issueName def getIssueType(self): return 0 def getRemediationBackground(self): return self._remediationBackground def getRemediationDetail(self): return self._remediationDetail def getSeverity(self): return self._severity def getUrl(self): return self._url def getHost(self): return 'localhost' def getPort(self): return int(80)