#!/usr/bin/python # Name: dnsutils.py # Version: 0.91 # By: Maltelligence Research Group # Created: Jan 5, 2015 # Modified: Aug 7, 2015 # Function: all functions used in DNS queries # # Copyright (c), 2015 Maltelligence Group # # This file is part of Maltelligence. # # Maltelligence is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # Maltelligence is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Maltelligence. If not, see <http://www.gnu.org/licenses/>. # import mechanize import urllib2 import dns.resolver import pythonwhois from ipwhois import IPWhois from utils.config import * from utils.utility import * from utils.findASN import * nameservers = {'TW':'114.34.4.1','HK':'61.244.40.66' , 'US':'64.60.46.130', 'CN':'114.66.4.54', 'GB':'213.208.238.54', 'AU':'59.154.226.27', 'SG':'203.126.107.195', 'JP':'124.146.181.113', 'FR':'195.25.102.114', 'RU':'89.179.241.19', 'KR':'112.155.225.123', 'TH':'202.183.133.18', 'DE':'87.138.68.66', 'CN1':'202.196.96.179', 'CN2':'116.246.40.160', 'CN3':'163.177.98.243'} def chk_ip(ip): if ip is None: return False else: parts = ip.split('.') return (len(parts) == 4 and all(part.isdigit() for part in parts) and all(0 <= int(part) <= 255 for part in parts) ) def chk_routable(ip): # check if ip a routable IP Address if ip == '127.0.0.1' or ip == '0.0.0.0': routable = False else: routable = True non_routable =['10.0.0.0/8', '172.16.0.0/12', '192.168.0.0/16', '169.254.0.0/16', '127.0.0.0/8', '239.255.255.0/24', '0.0.0.0/24', '224.0.0.0/28', '240.0.0.0/27'] if chk_ip(ip): for item in non_routable: if is_subnet(ip, item): routable = False break else: routable = False return routable def chk_domain(domain): if chk_ip(domain): return False else: regex = '[a-zA-Z\d-]{,63}(\.[a-zA-Z\d-]{,63})*' p = re.compile(regex) matched = p.search(domain) if matched.lastindex == 1: return True else: return False def chk_subnet(subnet): # *** consider to use the is_subnet logic contains under utility if len(subnet.split("/")) != 2 and len(subnet.split("/")[0].split(".")): msg = '[*] "%s" is not a subnet format (x.x.x.x/y)' % (subnet) logging.info(msg) return False else: return True def chk_hostname(domain): secondLD = hostname = '' parts = domain.split('.') j = len(parts) for i in range(len(parts)): hostname = '.'.join(parts[:i-1]) secondLD = '.'.join(parts[i-1:]) if parts[i].upper() in TLD and i !=0: break return hostname, secondLD def chk_whois(domain): # *** .ca is not working, check more *** email = ns = createdate = expirationdate = updateddate = registrar = registrant = tel = '' w = dict(email=email, registrar=registrar, registrant=registrant, tel=tel, ns=ns, createdate=createdate, expirationdate=expirationdate, updateddate=updateddate) if chk_domain(domain): hostname, secondLD = chk_hostname(domain) try: # find whois for second-level domain msg = 'parsing whois data of: %s ... ' % (secondLD) #logging.info(msg) ans = pythonwhois.get_whois(secondLD, True) except: return w if ans['contacts'].has_key('admin'): if ans['contacts']['admin'] is not None: if ans['contacts']['admin'].has_key('email'): if ans['contacts']['admin']['email'] is not None: email = ans['contacts']['admin']['email'] if ans['contacts'].has_key('registrant'): if ans['contacts']['registrant'] is not None: if ans['contacts']['registrant'].has_key('name'): if ans['contacts']['registrant']['name'] is not None: registrant = ans['contacts']['registrant']['name'] if ans['contacts']['registrant'].has_key('phone'): if ans['contacts']['registrant']['phone'] is not None: tel = ans['contacts']['registrant']['phone'] if ans.has_key('registrar'): if ans['registrar'] is not None: registrar = ans['registrar'] if type(ans['registrar']) is list: registrar = ans['registrar'][0] if ans.has_key('nameservers'): if ans['nameservers'] is not None: ns = ans['nameservers'][0] if ans.has_key('creation_date'): if ans['creation_date'] is not None: createdate = ans['creation_date'] if ans.has_key('expiration_date'): if ans['expiration_date'] is not None: expirationdate = ans['expiration_date'] if ans.has_key('updated_date'): if ans['updated_date'] is not None: updateddate = ans['updated_date'] if createdate == '' and updateddate != '': createdate = updateddate w = dict(email=email, registrar=registrar.encode('utf-8'), registrant=registrant.encode('utf-8'), tel=tel, ns=ns, createdate=createdate, expirationdate=expirationdate, updateddate=updateddate) else: msg = '[*] no whois record: %s ...' % (domain) logging.info(msg) return w def chk_connected(): try: response=urllib2.urlopen('http://www.google.com',timeout=1) return True except urllib2.URLError as err: pass return False def retIP(domain, ns=None): # added the ns option to handle CDN case where cn format = "x.x.x.x", # consider using public name servers provided http://public-dns.tk or use nameservers.keys() ip = '' c_name = '' if domain == '': return ip, c_name resolver = dns.resolver.Resolver() resolver.timeout = 1 try: answers = resolver.query(domain) ip = str(answers[0]).split(": ")[0] c_name = answers.canonical_name except Exception, e: msg = '[*] No IP Addressed: Timeout, NXDOMAIN, NoAnswer or NoNameservers' logging.info(msg) return ip, c_name def retCdnIP(domain): # resolve ip addresses from nameservers bar = pyprind.ProgBar(len(nameservers)) ip_list = [] for ns in nameservers: ip, c_name = retIP(domain, nameservers[ns]) list = (dict(country=ns, ip=ip, c_name=c_name)) ip_list.append(list) bar.update() return ip_list def retDomains(ip): # find parked domains by ip from Hurricane Electric, can use other source # may be duplicate with get_parked(data) domains = [] try: url = "http://bgp.he.net/ip/" + ip + "#_dns" userAgent = [('User-agent','Mozilla/5.0 (X11; U; '+\ 'Linux 2.4.2-2 i586; en-US; m18) Gecko/20010131 Netscape6/6.01')] browser = mechanize.Browser() browser.addheaders = userAgent page = browser.open(url) html = page.read() link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for i in range (0, len(links)): if links[i].find('/dns/') == 0: domains.append(links[i][5:]) return domains except: return domains def get_subnet(ip): # find the subnet by ip from Hurricane Electric, can use other source url = "http://bgp.he.net/ip/" + ip html = get_url(url) subnets = [] link_finder = re.compile(r'<a href="/net/([0-9.]+)/([0-9]+)">') links = link_finder.findall(html) for t in links: subnets.append('/'.join(t)) return subnets[0] def get_parked(data): # find parked domains by {ip} from Hurricane Electric, same as retDomains (cancel either one) if chk_domain(data): ip, c_name = retIP(data) if chk_ip(data): ip = data parked = [] try: msg = '... Checking bgp.he.net for %s' % (ip) logging.info(msg) url = "http://bgp.he.net/ip/" + ip + "#_dns" html = get_url(url) link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) for i in range (0, len(links)): if links[i].find('/dns/') == 0: entry = (dict(data=ip, domain=links[i][5:])) parked.append(entry) except: pass return parked def get_parkedSubnet(data): # find parked domains by {subnet} from Hurricane Electric, can use other source if chk_domain(data): ip, c_name = retIP(data) if chk_ip(data): ip = data net = get_subnet(ip) num, subnet = get_asn2(ip) parknets = [] url = "http://bgp.he.net/net/" + net + "#_dns" html = get_url(url) # check error if html is empty link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) msg = '... Processing parked domains of: %s ...' % (len(links)) logging.info(msg) for i in range(0, len(links)): if links[i].find('/ip/') == 0: ip = links[i][4:] else: if links[i].find('/dns/') == 0: domain = links[i][5:] entry = (dict(data=ip, domain=links[i][5:])) parknets.append(entry) return parknets def get_asn1(data): # *** This function need to be further update, now NOT used *** if chk_domain(data): ip, c_name = retIP(data) if chk_ip(data): ip = data parts = ip.split('.') likes = '"%'+ parts[0]+'.'+parts[1]+'%"' as_number = 0 subnet = '' db = MySQLdb.connect(DB_HOST,DB_ID,DB_PW,DB) cursor = db.cursor() # try to find the largest subnet sql = "select a.asn, s.subnet, as_registrar.name, country.country, s.monitoring_code as M, s.country as residing from autonomous_system as a inner join as_registrar inner join country inner join subnet as s on (as_registrar.id=a.registrar_id and country.id=a.country_id and s.asn_id=a.id) where s.subnet like %s order by s.subnet desc" % (likes) cursor.execute(sql) found = cursor.fetchall() for i in range(0, len(found)): as_number = int(found[i][0]) subnet = found[i][1] # use utils.utility is_subnet function if is_subnet(ip, subnet): break cursor.close() return as_number, subnet def get_asn2(data): # find as number with ipwhois modules if chk_domain(data): ip, c_name = retIP(data) if chk_ip(data): ip = data obj = IPWhois(ip) results = obj.lookup() as_number = 0 subnet = '' try: if results.has_key('asn'): as_number = int(results['asn']) except: pass if results.has_key('asn_cidr'): subnet = results['asn_cidr'] return as_number, subnet def get_asn3(data): # get as number from bgp.he.net(Hurricane) if chk_domain(data): ip, c_name = retIP(data) if chk_ip(data): ip = data as_number = 0 subnet = '' url = "http://bgp.he.net/ip/" + ip + "#_ipinfo" userAgent = [('User-agent','Mozilla/5.0 (X11; U; '+\ 'Linux 2.4.2-2 i586; en-US; m18) Gecko/20010131 Netscape6/6.01')] browser = mechanize.Browser() browser.addheaders = userAgent try: page = browser.open(url) if page.code == 200: html = page.read() link_finder = re.compile('href="(.*?)"') links = link_finder.findall(html) as_number = '' for i in range(0, len(links)): if links[i].find('/AS') == 0: as_number = int(links[i][3:]) if links[i].find('/net') == 0: subnet = links[i][5:] except: msg ="[*] Exception: in accessing Hurrican (bgp.het.net)" logging.error(msg) return as_number, subnet def get_url(url): # parse html by url supplied using mechanize userAgent = [('User-agent','Mozilla/5.0 (X11; U; '+\ 'Linux 2.4.2-2 i586; en-US; m18) Gecko/20010131 Netscape6/6.01')] try: browser = mechanize.Browser() browser.addheaders = userAgent page = browser.open(url) html = page.read() except: html ='' return html def readBatch(path): # Assume path found rows = [] lines = [] with open(path, 'rb') as fd: for row in fd: rows.append(row.split()) for i in range(0, len(rows)): line = {} for j in range(0, len(rows[i])): data = rows[i][j] if chk_ip(data): line.update({'ip':data}) elif chk_domain(data): line.update({'domain':data}) elif len(data) == 32 or len(data) == 40 or len(data) == 64: line.update({'md5':data}) else: msg = '[*] %s cannot be identified, skip processing' % (data) logging.info(msg) lines.append(line) return lines