#!/usr/bin/python # forked from https://github.com/We-Neusoft/Scripts from string import rfind from time import mktime, strptime from os import makedirs, path, stat, utime from urllib import urlretrieve, urlopen from xml.etree import ElementTree import re base_url = 'http://downloads.openwrt.org/snapshots' out_dir = '/data/mirrors/openwrt/snapshots' def download(filename, last_modified): file = out_dir + filename print 'Downloading ' + filename urlretrieve(base_url + filename, file) utime(file, (last_modified, last_modified)) process(filename) def retrieve(filepath): if '../' in filepath: return None handle = urlopen(base_url + filepath) return handle.read() def process(filename, size=-1): file = out_dir + filename if path.isfile(file) and stat(file).st_size == size: print 'Skipping: ' + filename return print 'Processing: ' + filename handle = urlopen(base_url + filename) headers = handle.info() content_length = int(headers.getheader('Content-Length')) last_modified = mktime(strptime(headers.getheader('Last-Modified'), '%a, %d %b %Y %H:%M:%S %Z')) if rfind(filename, '/') > 0: dir = out_dir + filename[:rfind(filename, '/')] else: dir = out_dir if not path.isdir(dir): print 'Creating ' + dir makedirs(dir) if not path.isfile(file): download(filename, last_modified) else: file_stat = stat(file) if file_stat.st_mtime != last_modified or file_stat.st_size != content_length: download(filename, last_modified) else: print 'Skipping: ' + filename pattern = r'<a href="(.*?)">' def fetch(dir): dir_str = retrieve(dir) items = re.findall(pattern, dir_str) for item in items: if item == '../': continue if item[-1] == '/': fetch(dir + item) continue process(dir + item) fetch('/')