from StringIO import StringIO import urllib2 from datetime import datetime import hashlib import json import logging import re import sys, os import traceback import urllib from ggtracker.utils import django_setup from django.conf import settings from sc2parse import ggfactory # S3 import boto from boto.s3.key import Key from pyres import ResQ from sc2parse.sc2reader_to_esdb import SC2ReaderToEsdb # Setup failure backends for pyres from pyres import failure from pyres.failure.multiple import MultipleBackend from pyres.failure.redis import RedisBackend failure.backend = MultipleBackend failure.backend.classes = [RedisBackend] class ParseReplay(): @staticmethod def perform(args): performStart = datetime.now() md5 = None replayDB = None try: sc2reader_to_esdb = SC2ReaderToEsdb() # # at this point the 'hash' may actually be an S3 key like '/uploads/1234-5667-1234234/filename.sc2replay' # or simply '{md5}' # # not to worry, in a few lines, we'll rename the S3 key to be md5.sc2replay # filename = args['hash'] if re.search('.sc2replay', filename, re.IGNORECASE) is None: filename = filename + ".SC2Replay" bucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.REPLAY_BUCKET_NAME) # logging.getLogger("jobs").info("trying to get key {}".format(filename)); k = bucket.get_key(filename) replaystring = k.get_contents_as_string() md5 = hashlib.md5(replaystring).hexdigest() # # rename the S3 key to simply be md5.SC2Replay, so it's easier for us to find it # when we need it. # # http://stackoverflow.com/questions/2481685/amazon-s3-boto-how-to-rename-a-file-in-a-bucket k.copy(settings.REPLAY_BUCKET_NAME, md5 + ".SC2Replay", metadata=None, preserve_acl=False) replayDB, blob = sc2reader_to_esdb.processReplay(StringIO(replaystring), args['channel']) if len(blob) > 0: blobbucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.BLOB_BUCKET_NAME) k = Key(blobbucket) k.key = "%i" % (replayDB.match.id) blobdump = json.dumps(blob) k.set_contents_from_string(blobdump) except Exception as e: tb = traceback.format_exc() exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logging.getLogger("jobs").info("parsing failed for replay {}. oh well. exception={}. {} {} {} {}".format(args['hash'].encode('ascii'), e, exc_type, fname, exc_tb.tb_lineno, tb)) pass finally: alldone = datetime.now() # Enqueue ruby PostParse job, always. ResQ(server=settings.REDIS_SERVER).enqueue_from_string('ESDB::Jobs::Sc2::Replay::PostParse', 'replays-high', { 'uuid': args['uuid'], 'hash': md5, 'provider_id': str(args['provider_id']), 'ggtracker_received_at': args['ggtracker_received_at'], 'esdb_received_at': args['esdb_received_at'], 'preparse_received_at': args['preparse_received_at'], 'jobspy_received_at': performStart.strftime('%s.%f'), 'jobspy_done_at': alldone.strftime('%s.%f'), }) # regarding converting times to floating point seconds since the # epoch, using %s above is dangerous because its not python, it # calls the underlying OS. i tried using the solution here: # http://stackoverflow.com/questions/6999726/python-getting-millis-since-epoch-from-datetime/11111177#11111177 # but i ran into timezone issues and did the lazy thing instead. matchId = 0 if replayDB and hasattr(replayDB, "match") and replayDB.match.id: matchId = replayDB.match.id logging.getLogger("jobs").info("all done with match {}. total time in ParseReplay.perform() = {}".format(matchId, alldone - performStart)) class ParseSummary(): @staticmethod def perform(args): try: sc2reader_to_esdb = SC2ReaderToEsdb() filename = args['hash'] + '.s2gs' gateway = args['gateway'] if gateway == 'sea': gateway = 'sg' # retrieve it from battlenet depoturl = 'http://{0}.depot.battle.net:1119/{1}'.format(gateway, filename) try: s2gsfile = urllib2.urlopen(depoturl).read() except: logging.getLogger("jobs").info("couldnt retrieve {} s2gs hash {}. maybe its bad.".format(gateway, args['hash'])) return None # save it in S3 because we are pack rats bucket = boto.connect_s3(settings.AWS_ACCESS_KEY_ID, settings.AWS_SECRET_ACCESS_KEY)\ .get_bucket(settings.S2GS_BUCKET_NAME) k = Key(bucket) k.key = filename k.set_contents_from_string(s2gsfile) # parse it and write stuff to DB summaryDB = sc2reader_to_esdb.processSummary(StringIO(s2gsfile), args['hash']) except Exception as e: tb = traceback.format_exc() exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logging.getLogger("jobs").info("parsing failed for s2gs {}. oh well. exception={}. {} {} {} {}".format(args['hash'], e, exc_type, fname, exc_tb.tb_lineno, tb)) pass finally: # Enqueue ruby PostParse job, always! ResQ(server=settings.REDIS_SERVER).enqueue_from_string('ESDB::Jobs::Sc2::Summary::PostParse', 'summaries-high', { 'hash': args['hash'] }) class ComputeStats(): @staticmethod def perform(args): try: env = os.environ['RACK_ENV'] = os.environ.get('RACK_ENV', 'development') match_id = args['match_id'] sc2reader_to_esdb = SC2ReaderToEsdb() ggthost = 'ggtracker.com' blobenv = 'prod' if env == 'development': ggthost = 'localhost:3000' blobenv = 'dev' replay = ggfactory.load_replay("http://{}/matches/{}/replay".format(ggthost, match_id)) url = urllib.urlopen("http://gg2-matchblobs-{}.s3.amazonaws.com/{}".format(blobenv, match_id)) mb_string = url.read() blob = json.loads(mb_string) unicode_ident_ids = blob['MineralsCollectionRate'].keys() for blobkey in ['MineralsCollectionRate', 'VespeneCollectionRate', 'WorkersActiveCount']: for ident_id in unicode_ident_ids: blob[blobkey][int(ident_id)] = blob[blobkey][ident_id] sc2reader_to_esdb.reprocessEntityStatsForAllPlayers(replay, blob) except Exception as e: tb = traceback.format_exc() exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] logging.getLogger("jobs").info("stats computation failed for replay {}. oh well. exception={}. {} {} {} {}".format(args['hash'], e, exc_type, fname, exc_tb.tb_lineno, tb)) pass