#!/usr/bin/env python # -*- coding: utf-8 -*- # # by Erik Osheim # # Reads README.md, and writes a README.md.new. If the format of # README.md changes, this script may need modifications. # # Currently it rewrites each section, doing the following: # 1. alphabetizing # 2. querying GitHub for stars and days since active # 3. formatting the link title to show this info # 4. bolding projects with lots of stars # # Once README.md has the stars/days info in the links, the # repo_regex will need slight modification. # # In order to use GH authentication, create a file in this directory # called .access-token, whose contents are: "$user:$token" where $user # is your github username, and $token is a Personal Access Token. from __future__ import print_function import base64 import datetime import json import os.path import random import re import shutil import sys import urllib2 # we use these regexes when "parsing" README.md empty_regex = re.compile(r"^ *\n$") section_regex = re.compile(r"^### (.+)\n$") repo_regex = re.compile( r"^\* (?:\*\*)?\[?([^*★]+[^ ★])(?: ★ ([^ ]+))?(?: ⧗ ([^ *]+))?\]\((.+?)\)(?:\*\*)?(?: (?:-|—|–) (.+))?\n$" ) end_regex = re.compile(r"^# .+\n$") github_regex = re.compile(r"^https://github.com/(.+?)/(.+?)(?:/?)$") # some paths readme_path = "README.md" temp_path = "README.md.new" # these will be updated if .access-token exists. user = None token = None # use fake to avoid hitting github API fake = True # whether to query all projects, or just those lacking scores/days. full_update = False # right now. now = datetime.datetime.now() # ask github for the number of stargazers, and days since last # activity, for the given github project. def query(owner, name): if fake: print(" {0}/{1}: ok".format(owner, name)) return (random.randint(1, 1000), random.randint(1, 300)) else: try: req = urllib2.Request( "https://api.github.com/repos/{0}/{1}".format(owner, name) ) if user is not None and token is not None: b64 = base64.encodestring("{0}:{1}".format(user, token)).replace( "\n", "" ) req.add_header("Authorization", "Basic {0}".format(b64)) u = urllib2.urlopen(req) j = json.load(u) t = datetime.datetime.strptime(j["updated_at"], "%Y-%m-%dT%H:%M:%SZ") days = max(int((now - t).days), 0) print(" {0}/{1}: ok".format(owner, name)) return (int(j["stargazers_count"]), days) except urllib2.HTTPError as e: print(" {0}/{1}: FAILED".format(owner, name)) return (None, None) def output_repo(outf, name, stars, days, link, rdesc): popular = stars is not None and int(stars) >= 500 if stars is None and days is None: title = name else: title = "%s ★ %s ⧗ %s" % (name, stars, days) if popular: outf.write("* **[{0}]({1})** - {2}\n".format(title, link, rdesc)) else: outf.write("* [{0}]({1}) - {2}\n".format(title, link, rdesc)) def flush_section(outf, section, sdesc, repos): print(" " + section.strip()) outf.write(section) outf.write("\n") if sdesc: outf.write(sdesc) outf.write("\n") repos.sort(key=lambda t: t[0].lower()) for name, stars, days, link, rdesc in repos: if not full_update and stars is not None and days is not None: output_repo(outf, name, stars, days, link, rdesc) continue m = github_regex.match(link) if not m: print(" {0}: not a repo".format(link)) output_repo(outf, name, stars, days, link, rdesc) continue stars, days = query(m.group(1), m.group(2)) output_repo(outf, name, stars, days, link, rdesc) outf.write("\n") def run(): if full_update: print("querying for all entries") else: print("querying for new entries only") if fake: print("running in fake mode -- no GH queries will be made") if os.path.exists(".access-token"): global user, token user, token = open(".access-token").read().strip().split(":") print("using Personal Access Token {0}:{1}".format(user, token)) else: print("no Personal Access Token found in .access-token") inf = open(readme_path, "r") lines = list(inf) inf.close() print("read {0}".format(readme_path)) started = False finished = False section = None sdesc = None repos = [] outf = open(temp_path, "w") total_repos = 0 print("writing {0}".format(temp_path)) for line in lines: if finished: outf.write(line) elif started: if end_regex.match(line): total_repos += len(repos) flush_section(outf, section, sdesc, repos) outf.write(line) finished = True elif empty_regex.match(line): continue elif section_regex.match(line): total_repos += len(repos) flush_section(outf, section, sdesc, repos) section = line sdesc = None repos = [] else: m = repo_regex.match(line) if m: name, stars, days, link, rdesc = m.groups() repos.append((name, stars, days, link, rdesc)) elif sdesc is None: sdesc = line else: raise Exception("cannot parse {0}".format(line)) else: if section_regex.match(line): section = line started = True else: outf.write(line) outf.close() print("wrote {0} repos to {1}".format(total_repos, temp_path)) print("moving {0} to {1}".format(temp_path, readme_path)) shutil.move(temp_path, readme_path) if __name__ == "__main__": # global fake, full_update from optparse import OptionParser parser = OptionParser() parser.add_option( "-f", "--fake", action="store_true", dest="fake", default=False, help="don't query github, use fake data", ) parser.add_option( "-u", "--update", action="store_true", dest="update", default=False, help="update all entries to newest data", ) opts, _ = parser.parse_args() fake = opts.fake full_update = opts.update run()