############################################################################
#                                                                          #
# Copyright (c) 2017 eBay Inc.                                             #
# Modifications copyright (c) 2019-2020 Anders Berkeman                    #
# Modifications copyright (c) 2018-2020 Carl Drougge                       #
#                                                                          #
# Licensed under the Apache License, Version 2.0 (the "License");          #
# you may not use this file except in compliance with the License.         #
# You may obtain a copy of the License at                                  #
#                                                                          #
#  http://www.apache.org/licenses/LICENSE-2.0                              #
#                                                                          #
# Unless required by applicable law or agreed to in writing, software      #
# distributed under the License is distributed on an "AS IS" BASIS,        #
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
# See the License for the specific language governing permissions and      #
# limitations under the License.                                           #
#                                                                          #
############################################################################

from __future__ import print_function
from __future__ import division

import sys
import argparse
import socket
import traceback
import signal
import os
import resource
import time
from stat import S_ISSOCK
from threading import Thread, Lock as TLock, Lock as JLock
from string import ascii_letters
import random
import atexit

from accelerator.compat import unicode

from accelerator.web import ThreadedHTTPServer, ThreadedUnixHTTPServer, BaseWebHandler

from accelerator import autoflush
from accelerator import control
from accelerator.extras import json_encode, json_decode, DotDict
from accelerator.build import JobError
from accelerator.status import statmsg_sink, children, print_status_stacks, status_stacks_export
from accelerator import iowrapper



DEBUG_WRITE_JSON = False


def gen_cookie(size=16):
	return ''.join(random.choice(ascii_letters) for _ in range(size))

# This contains cookie: {lock, last_error, last_time, workdir}
# for all jobs, main jobs have cookie None.
job_tracking = {None: DotDict(lock=JLock(), last_error=None, last_time=0, workdir=None)}


# This needs .ctrl to work. It is set from main()
class XtdHandler(BaseWebHandler):
	server_version = "scx/0.1"
	DEBUG =  not True

	def log_message(self, format, *args):
		return

	def encode_body(self, body):
		if isinstance(body, bytes):
			return body
		if isinstance(body, unicode):
			return body.encode('utf-8')
		return json_encode(body)

	def handle_req(self, path, args):
		if self.DEBUG:  print("@server.py:  Handle_req, path = \"%s\", args = %s" %( path, args ), file=sys.stderr)
		try:
			self._handle_req( path, args )
		except Exception:
			traceback.print_exc()
			self.do_response(500, "text/plain", "ERROR")

	def _handle_req(self, path, args):
		if path[0] == 'status':
			data = job_tracking.get(args.get('subjob_cookie') or None)
			if not data:
				self.do_response(400, 'text/plain', 'bad subjob_cookie!\n' )
				return
			timeout = min(float(args.get('timeout', 0)), 128)
			status = DotDict(idle=data.lock.acquire(False))
			deadline = time.time() + timeout
			while not status.idle and time.time() < deadline:
				time.sleep(0.1)
				status.idle = data.lock.acquire(False)
			if status.idle:
				if data.last_error:
					status.last_error = data.last_error
					data.last_error = None
				else:
					status.last_time = data.last_time
				data.lock.release()
			elif path == ['status', 'full']:
				status.status_stacks, status.current = status_stacks_export()
			self.do_response(200, "text/json", status)
			return

		elif path==['list_workdirs']:
			ws = {k: v.path for k, v in self.ctrl.list_workdirs().items()}
			self.do_response(200, "text/json", ws)

		elif path==['config']:
			self.do_response(200, "text/json", self.ctrl.config)

		elif path==['update_methods']:
			self.do_response(200, "text/json", self.ctrl.update_methods())

		elif path==['methods']:
			""" return a json with everything the Method object knows about the methods """
			self.do_response(200, "text/json", self.ctrl.get_methods())

		elif path[0]=='method_info':
			method = path[1]
			self.do_response(200, "text/json", self.ctrl.method_info(method))

		elif path[0]=='workspace_info':
			self.do_response(200, 'text/json', self.ctrl.get_workspace_details())

		elif path[0] == 'abort':
			tokill = list(children)
			print('Force abort', tokill)
			for child in tokill:
				os.killpg(child, signal.SIGKILL)
			self.do_response(200, 'text/json', {'killed': len(tokill)})

		elif path==['submit']:
			if self.ctrl.broken:
				self.do_response(500, "text/json", {'broken': self.ctrl.broken, 'error': 'Broken methods: ' + ', '.join(sorted(m.split('.')[-1][2:] for m in self.ctrl.broken))})
			elif 'json' in args:
				if DEBUG_WRITE_JSON:
					with open('DEBUG_WRITE.json', 'wb') as fh:
						fh.write(args['json'])
				setup = json_decode(args['json'])
				data = job_tracking.get(setup.get('subjob_cookie') or None)
				if not data:
					self.do_response(403, 'text/plain', 'bad subjob_cookie!\n' )
					return
				if len(job_tracking) - 1 > 5: # max five levels
					print('Too deep subjob nesting!')
					self.do_response(403, 'text/plain', 'Too deep subjob nesting')
					return
				if data.lock.acquire(False):
					still_locked = True
					respond_after = True
					try:
						if self.DEBUG:  print('@server.py:  Got the lock!', file=sys.stderr)
						workdir = setup.get('workdir', data.workdir)
						jobidv, job_res = self.ctrl.initialise_jobs(setup, workdir)
						job_res['done'] = False
						if jobidv:
							error = []
							tlock = TLock()
							link2job = {j['link']: j for j in job_res['jobs'].values()}
							def run(jobidv, tlock):
								for jobid in jobidv:
									passed_cookie = None
									# This is not a race - all higher locks are locked too.
									while passed_cookie in job_tracking:
										passed_cookie = gen_cookie()
									job_tracking[passed_cookie] = DotDict(
										lock=JLock(),
										last_error=None,
										last_time=0,
										workdir=workdir,
									)
									try:
										self.ctrl.run_job(jobid, subjob_cookie=passed_cookie, parent_pid=setup.get('parent_pid', 0))
										# update database since a new jobid was just created
										job = self.ctrl.add_single_jobid(jobid)
										with tlock:
											link2job[jobid]['make'] = 'DONE'
											link2job[jobid]['total_time'] = job.total
									except JobError as e:
										error.append([e.jobid, e.method, e.status])
										with tlock:
											link2job[jobid]['make'] = 'FAIL'
										return
									finally:
										del job_tracking[passed_cookie]
								# everything was built ok, update symlink
								try:
									dn = self.ctrl.workspaces[workdir].path
									ln = os.path.join(dn, workdir + "-LATEST_")
									try:
										os.unlink(ln)
									except OSError:
										pass
									os.symlink(jobid, ln)
									os.rename(ln, os.path.join(dn, workdir + "-LATEST"))
								except OSError:
									traceback.print_exc()
							t = Thread(target=run, name="job runner", args=(jobidv, tlock,))
							t.daemon = True
							t.start()
							t.join(2) # give job two seconds to complete
							with tlock:
								for j in link2job.values():
									if j['make'] in (True, 'FAIL',):
										respond_after = False
										job_res_json = json_encode(job_res)
										break
							if not respond_after: # not all jobs are done yet, give partial response
								self.do_response(200, "text/json", job_res_json)
							t.join() # wait until actually complete
							del tlock
							del t
							# verify that all jobs got built.
							total_time = 0
							for j in link2job.values():
								jobid = j['link']
								if j['make'] == True:
									# Well, crap.
									error.append([jobid, "unknown", {"INTERNAL": "Not built"}])
									print("INTERNAL ERROR IN JOB BUILDING!", file=sys.stderr)
								total_time += j.get('total_time', 0)
							data.last_error = error
							data.last_time = total_time
					except Exception as e:
						if respond_after:
							data.lock.release()
							still_locked = False
							self.do_response(500, "text/json", {'error': str(e)})
						raise
					finally:
						if still_locked:
							data.lock.release()
					if respond_after:
						job_res['done'] = True
						self.do_response(200, "text/json", job_res)
					if self.DEBUG:  print("@server.py:  Process releases lock!", file=sys.stderr) # note: has already done http response
				else:
					self.do_response(503, 'text/plain', 'Busy doing work for you...\n')
			else:
				self.do_response(400, 'text/plain', 'Missing json input!\n' )
		else:
			self.do_response(404, 'text/plain', 'Unknown path\n' )
			return


def exitfunction(*a):
	if a != (DeadlyThread,): # if not called from a DeadlyThread
		signal.signal(signal.SIGTERM, signal.SIG_IGN)
		signal.signal(signal.SIGINT, signal.SIG_IGN)
	print()
	print('The deathening! %d %s' % (os.getpid(), children,))
	print()
	for child in children:
		os.killpg(child, signal.SIGKILL)
	time.sleep(0.16) # give iowrapper a chance to output our last words
	os.killpg(os.getpgid(0), signal.SIGKILL)
	os._exit(1) # we really should be dead already

# A Thread that kills the server if it exits
class DeadlyThread(Thread):
	def run(self):
		try:
			Thread.run(self)
		except Exception:
			traceback.print_exc()
		finally:
			print("Thread %r died. That's bad." % (self.name,))
			exitfunction(DeadlyThread)


def check_socket(fn):
	dn = os.path.dirname(fn)
	try:
		os.mkdir(dn, 0o750)
	except OSError:
		pass
	try:
		s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
		try:
			s.connect(fn)
		finally:
			s.close()
	except socket.error:
		try:
			assert S_ISSOCK(os.lstat(fn).st_mode), fn + " exists as non-socket"
			os.unlink(fn)
		except OSError:
			pass
		return
	raise Exception("Socket %s already listening" % (fn,))

def siginfo(sig, frame):
	print_status_stacks()

def main(argv, config):
	parser = argparse.ArgumentParser(prog=argv.pop(0))
	parser.add_argument('--debug', action='store_true')
	options = parser.parse_args(argv)

	# all forks belong to the same happy family
	try:
		os.setpgrp()
	except OSError:
		print("Failed to create process group - there is probably already one (daemontools).", file=sys.stderr)

	# Set a low (but not too low) open file limit to make
	# dispatch.update_valid_fds faster.
	# The runners will set the highest limit they can
	# before actually running any methods.
	r1, r2 = resource.getrlimit(resource.RLIMIT_NOFILE)
	r1 = min(r1, r2, 1024)
	resource.setrlimit(resource.RLIMIT_NOFILE, (r1, r2))

	iowrapper.main()

	# setup statmsg sink and tell address using ENV
	statmsg_rd, statmsg_wr = socket.socketpair(socket.AF_UNIX, socket.SOCK_DGRAM)
	os.environ['BD_STATUS_FD'] = str(statmsg_wr.fileno())
	def buf_up(fh, opt):
		sock = socket.fromfd(fh.fileno(), socket.AF_UNIX, socket.SOCK_DGRAM)
		sock.setsockopt(socket.SOL_SOCKET, opt, 256 * 1024)
		# does not close fh, because fromfd dups the fd (but not the underlying socket)
		sock.close()
	buf_up(statmsg_wr, socket.SO_SNDBUF)
	buf_up(statmsg_rd, socket.SO_RCVBUF)

	t = DeadlyThread(target=statmsg_sink, args=(statmsg_rd,), name="statmsg sink")
	t.daemon = True
	t.start()

	# do all main-stuff, i.e. run server
	sys.stdout = autoflush.AutoFlush(sys.stdout)
	sys.stderr = autoflush.AutoFlush(sys.stderr)
	atexit.register(exitfunction)
	signal.signal(signal.SIGTERM, exitfunction)
	signal.signal(signal.SIGINT, exitfunction)

	signal.signal(signal.SIGUSR1, siginfo)
	signal.siginterrupt(signal.SIGUSR1, False)
	if hasattr(signal, 'SIGINFO'):
		signal.signal(signal.SIGINFO, siginfo)
		signal.siginterrupt(signal.SIGINFO, False)

	if isinstance(config.listen, tuple):
		server = ThreadedHTTPServer(config.listen, XtdHandler)
	else:
		check_socket(config.listen)
		# We want the socket to be world writeable, protect it with dir permissions.
		u = os.umask(0)
		server = ThreadedUnixHTTPServer(config.listen, XtdHandler)
		os.umask(u)

	if config.get('urd_listen') == 'local':
		from accelerator import urd
		t = DeadlyThread(target=urd.main, args=(['urd', '--quiet', '--allow-passwordless'], config), name='urd')
		t.daemon = True
		t.start()

	ctrl = control.Main(config, options, config.url)
	print()
	ctrl.print_workdirs()
	print()

	XtdHandler.ctrl = ctrl
	job_tracking[None].workdir = ctrl.target_workdir

	for n in ("project_directory", "result_directory", "input_directory", "urd_listen"):
		if n == "urd_listen":
			dispn = "urd"
		else:
			dispn = n.replace("_", " ")
		print("%17s: %s" % (dispn, config.get(n),))
	print()

	print("Serving on %s\n" % (config.listen,), file=sys.stderr)
	server.serve_forever()