python source code of get

import github3
import datetime
import os
import errno
import getpass
import time
import csv
import math
import json
import requests
import calendar


class GitHub_Traffic:
    def __init__(self):
        self.referrers = {}
        self.referrers_lower = {}
        self.views = {}
        self.clones = {}

        self.referrers_json = {}
        self.views_json = {}
        self.clones_json = {}
        self.releases_json = {}

    def get_stats(self, username="", password="", organization="llnl", force=True):
        """
        Retrieves the traffic for the users of the given organization.
        Requires organization admin credentials token to access the data.
        """
        referrers_file_path = "../github_stats_output/referrers.csv"
        views_file_path = "../github_stats_output/views.csv"
        clones_file_path = "../github_stats_output/clones.csv"
        if force or not os.path.isfile(file_path):
            my_github.login(username, password)
            calls_beginning = self.logged_in_gh.ratelimit_remaining + 1
            print("Rate Limit: " + str(calls_beginning))
            my_github.get_org(organization)
            my_github.get_traffic()
            views_row_count = my_github.check_data_redundancy(
                file_path=views_file_path, dict_to_check=self.views
            )
            clones_row_count = my_github.check_data_redundancy(
                file_path=clones_file_path, dict_to_check=self.clones
            )
            my_github.write_to_file(
                referrers_file_path=referrers_file_path,
                views_file_path=views_file_path,
                clones_file_path=clones_file_path,
                views_row_count=views_row_count,
                clones_row_count=clones_row_count,
            )
            my_github.write_json(
                dict_to_write=self.referrers_json,
                path_ending_type="traffic_popular_referrers",
            )
            my_github.write_json(
                dict_to_write=self.views_json, path_ending_type="traffic_views"
            )
            my_github.write_json(
                dict_to_write=self.clones_json, path_ending_type="traffic_clones"
            )
            my_github.write_json(
                dict_to_write=self.releases_json, path_ending_type="releases"
            )
            calls_remaining = self.logged_in_gh.ratelimit_remaining
            calls_used = calls_beginning - calls_remaining
            print(
                "Rate Limit Remaining: "
                + str(calls_remaining)
                + "\nUsed "
                + str(calls_used)
                + " API calls."
            )

    def login(self, username="", password=""):
        """
        Performs a login and sets the Github object via given credentials. If
        credentials are empty or incorrect then prompts user for credentials.
        Stores the authentication token in a CREDENTIALS_FILE used for future
        logins. Handles Two Factor Authentication.
        """
        try:

            self.token = ""
            id = ""
            if not os.path.isfile("CREDENTIALS_FILE_ADMIN"):
                if username == "" or password == "":
                    username = raw_input("Username: ")
                    password = getpass.getpass("Password: ")
                note = "GitHub Organization Stats App"
                note_url = "http://software.llnl.gov/"
                scopes = ["user", "repo"]
                auth = github3.authorize(
                    username,
                    password,
                    scopes,
                    note,
                    note_url,
                    two_factor_callback=self.prompt_2fa,
                )
                self.token = auth.token
                id = auth.id
                with open("CREDENTIALS_FILE_ADMIN", "w+") as fd:
                    fd.write(self.token + "\n")
                    fd.write(str(id))
                fd.close()
            else:
                with open("CREDENTIALS_FILE_ADMIN", "r") as fd:
                    self.token = fd.readline().strip()
                    id = fd.readline().strip()
                fd.close()
            print("Logging in.")
            self.logged_in_gh = github3.login(
                token=self.token, two_factor_callback=self.prompt_2fa
            )
            self.logged_in_gh.user().to_json()
        except (ValueError, AttributeError, github3.models.GitHubError):
            print("Bad credentials. Try again.")
            self.login()

    def prompt_2fa(self):
        """
        Taken from
        http://github3py.readthedocs.io/en/master/examples/two_factor_auth.html
        Prompts a user for their 2FA code and returns it.
        """
        code = ""
        while not code:
            code = raw_input("Enter 2FA code: ")
        return code

    def get_org(self, organization_name=""):
        """
        Retrieves an organization via given org name. If given
        empty string, prompts user for an org name.
        """
        self.organization_name = organization_name
        if organization_name == "":
            self.organization_name = raw_input("Organization: ")
        print("Getting organization.")
        self.org_retrieved = self.logged_in_gh.organization(organization_name)

    def get_traffic(self):
        """
        Retrieves the traffic for the repositories of the given organization.
        """
        print("Getting traffic.")
        # Uses the developer API. Note this could change.
        headers = {
            "Accept": "application/vnd.github.spiderman-preview",
            "Authorization": "token " + self.token,
        }
        headers_release = {"Authorization": "token " + self.token}
        for repo in self.org_retrieved.iter_repos(type="public"):
            url = (
                "https://api.github.com/repos/"
                + self.organization_name
                + "/"
                + repo.name
            )
            self.get_referrers(url=url, headers=headers, repo_name=repo.name)
            self.get_paths(url=url, headers=headers)
            self.get_data(
                url=url,
                headers=headers,
                dict_to_store=self.views,
                type="views",
                repo_name=repo.name,
            )
            self.get_data(
                url=url,
                headers=headers,
                dict_to_store=self.clones,
                type="clones",
                repo_name=repo.name,
            )
            self.get_releases(url=url, headers=headers_release, repo_name=repo.name)

    def get_releases(self, url="", headers={}, repo_name=""):
        """
        Retrieves the releases for the given repo in JSON.
        """
        url_releases = url + "/releases"
        r = requests.get(url_releases, headers=headers)
        self.releases_json[repo_name] = r.json()

    def get_referrers(self, url="", headers={}, repo_name=""):
        """
        Retrieves the total referrers and unique referrers of all repos in json
        and then stores it in a dict.
        """
        # JSON
        url_referrers = url + "/traffic/popular/referrers"
        r1 = requests.get(url_referrers, headers=headers)
        referrers_json = r1.json()
        self.referrers_json[repo_name] = referrers_json
        # CSV
        for referrer in referrers_json:
            ref_name = referrer["referrer"]
            try:
                tuple_in = (referrer["count"], referrer["uniques"])  # curr vals
                tuple = (
                    self.referrers[ref_name][0] + tuple_in[0],  # cal new vals
                    self.referrers[ref_name][1] + tuple_in[1],
                )
                self.referrers[ref_name] = tuple  # record new vals
            except KeyError:
                tuple = self.referrers[ref_name] = (
                    referrer["count"],
                    referrer["uniques"],
                )
                self.referrers_lower[ref_name.lower()] = ref_name

    def get_paths(self, url="", headers={}):
        """
        Retrieves the popular paths information in json and then stores it in a
        dict.
        """
        url_paths = url + "/traffic/popular/paths"
        # r2 = requests.get(url_paths, headers=headers)
        # print 'PATHS ' + str(r2.json())

    def get_data(
        self,
        url="",
        headers={},
        date=str(datetime.date.today()),
        dict_to_store={},
        type="",
        repo_name="",
    ):
        """
        Retrieves data from json and stores it in the supplied dict. Accepts
        'clones' or 'views' as type.
        """
        # JSON
        url = url + "/traffic/" + type
        r3 = requests.get(url, headers=headers)
        json = r3.json()
        if type == "views":
            self.views_json[repo_name] = json
        elif type == "clones":
            self.clones_json[repo_name] = json
        # CSV
        for day in json[type]:
            timestamp_seconds = day["timestamp"] / 1000
            try:
                date_timestamp = datetime.datetime.utcfromtimestamp(
                    timestamp_seconds
                ).strftime("%Y-%m-%d")
                # do not add todays date, some views might not be recorded yet
                if date_timestamp != date:
                    tuple_in = (day["count"], day["uniques"])
                    tuple = (
                        dict_to_store[timestamp_seconds][0] + tuple_in[0],
                        dict_to_store[timestamp_seconds][1] + tuple_in[1],
                    )
                    dict_to_store[timestamp_seconds] = tuple
            except KeyError:
                tuple = dict_to_store[timestamp_seconds] = (
                    day["count"],
                    day["uniques"],
                )

    def write_json(
        self,
        date=(datetime.date.today()),
        organization="llnl",
        dict_to_write={},
        path_ending_type="",
    ):
        """
        Writes all traffic data to file in JSON form.
        """
        for repo in dict_to_write:
            if len(dict_to_write[repo]) != 0:  # don't need to write out empty lists
                path = (
                    "../github-data/"
                    + organization
                    + "/"
                    + repo
                    + "/"
                    + path_ending_type
                    + "/"
                    + str(date)
                    + ".json"
                )
                self.checkDir(path)
                with open(path, "w") as out:
                    out.write(
                        json.dumps(
                            dict_to_write[repo],
                            sort_keys=True,
                            indent=4,
                            separators=(",", ": "),
                        )
                    )
                out.close()

    def write_to_file(
        self,
        referrers_file_path="",
        views_file_path="",
        clones_file_path="",
        date=(datetime.date.today()),
        organization="llnl",
        views_row_count=0,
        clones_row_count=0,
    ):
        """
        Writes all traffic data to file.
        """
        self.write_referrers_to_file(file_path=referrers_file_path)
        self.write_data_to_file(
            file_path=views_file_path,
            dict_to_write=self.views,
            name="views",
            row_count=views_row_count,
        )
        self.write_data_to_file(
            file_path=clones_file_path,
            dict_to_write=self.clones,
            name="clones",
            row_count=clones_row_count,
        )

    def check_data_redundancy(self, file_path="", dict_to_check={}):
        """
        Checks the given csv file against the json data scraped for the given
        dict. It will remove all data retrieved that has already been recorded
        so we don't write redundant data to file. Returns count of rows from
        file.
        """
        count = 0
        exists = os.path.isfile(file_path)
        previous_dates = {}
        if exists:
            with open(file_path, "r") as input:
                input.readline()  # skip header line
                for row in csv.reader(input):
                    timestamp = calendar.timegm(time.strptime(row[0], "%Y-%m-%d"))
                    if timestamp in dict_to_check:  # our date is already recorded
                        del dict_to_check[timestamp]
                    # calc current id max
                    count += 1
            input.close()
        return count

    def write_data_to_file(
        self,
        file_path="",
        date=str(datetime.date.today()),
        organization="llnl",
        dict_to_write={},
        name="",
        row_count=0,
    ):
        """
        Writes given dict to file.
        """
        exists = os.path.isfile(file_path)
        with open(file_path, "a") as out:
            if not exists:
                out.write("date,organization," + name + ",unique_" + name + ",id\n")
            sorted_dict = sorted(dict_to_write)
            for day in sorted_dict:
                day_formatted = datetime.datetime.utcfromtimestamp(day).strftime(
                    "%Y-%m-%d"
                )
                out.write(
                    day_formatted
                    + ","
                    + organization
                    + ","
                    + str(dict_to_write[day][0])
                    + ","
                    + str(dict_to_write[day][1])
                    + ","
                    + str(row_count)
                    + "\n"
                )
                row_count += 1

    def write_referrers_to_file(
        self, file_path="", date=str(datetime.date.today()), organization="llnl"
    ):
        """
        Writes the referrers data to file.
        """
        self.remove_date(file_path=file_path, date=date)
        referrers_exists = os.path.isfile(file_path)
        with open(file_path, "a") as out:
            if not referrers_exists:
                out.write(
                    "date,organization,referrer,count,count_log,uniques,"
                    + "uniques_logged\n"
                )
            sorted_referrers = sorted(self.referrers_lower)  # sort based on lowercase
            for referrer in sorted_referrers:
                ref_name = self.referrers_lower[referrer]  # grab real name from
                count = self.referrers[ref_name][0]
                uniques = self.referrers[ref_name][1]
                if count == 1:  # so we don't display 0 for count of 1
                    count = 1.5
                if uniques == 1:
                    uniques = 1.5
                count_logged = math.log(count)
                uniques_logged = math.log(uniques)
                out.write(
                    date
                    + ","
                    + organization
                    + ","
                    + ref_name
                    + ","
                    + str(count)
                    + ","
                    + str(count_logged)
                    + ","
                    + str(uniques)
                    + ","
                    + str(uniques_logged)
                    + "\n"
                )
        out.close()

    def remove_date(self, file_path="", date=str(datetime.date.today())):
        """
        Removes all rows of the associated date from the given csv file.
        Defaults to today.
        """
        languages_exists = os.path.isfile(file_path)
        if languages_exists:
            with open(file_path, "rb") as inp, open("temp.csv", "wb") as out:
                writer = csv.writer(out)
                for row in csv.reader(inp):
                    if row[0] != date:
                        writer.writerow(row)
            inp.close()
            out.close()
            os.remove(file_path)
            os.rename("temp.csv", file_path)

    def checkDir(self, file_path=""):
        """
        Checks if a directory exists. If not, it creates one with the specified
        file_path.
        """
        if not os.path.exists(os.path.dirname(file_path)):
            try:
                os.makedirs(os.path.dirname(file_path))
            except OSError as e:
                if e.errno != errno.EEXIST:
                    raise


if __name__ == "__main__":
    my_github = GitHub_Traffic()
    my_github.get_stats()