import collections
import csv
import datetime
import itertools
import json
import logging
import math
import os
import socket
import sys
import time
from ConfigParser import ConfigParser
from optparse import OptionParser

import requests

home_path = ''
server_url = ''
data_dir = ''
logger = None
previous_results_filename = "previous_results.json"


def get_input_from_user():
    usage = "Usage: %prog [options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-d", "--directory",
                      action="store", dest="homepath", help="Directory to run from")
    parser.add_option("-w", "--server_url",
                      action="store", dest="server_url", help="Server Url")
    parser.add_option("-l", "--log_level",
                      action="store", dest="log_level", help="Change log verbosity(WARNING: 0, INFO: 1, DEBUG: 2)")
    (options, args) = parser.parse_args()

    params = {}

    params['homepath'] = os.getcwd() if not options.homepath else options.homepath
    params['server_url'] = 'http://127.0.0.1:8080' if not options.server_url else options.server_url
    # For calling reportCustomMetrics from '../common' directory.
    sys.path.insert(0, os.path.join(params['homepath'], 'common'))

    params['log_level'] = logging.INFO
    if options.log_level == '0':
        params['log_level'] = logging.WARNING
    elif options.log_level == '1':
        params['log_level'] = logging.INFO
    elif options.log_level >= '2':
        params['log_level'] = logging.DEBUG

    params['datadir'] = "data/"
    return params


def set_logger_config(level):
    """Set up logging according to the defined log level"""
    # Get the root logger
    logger_obj = logging.getLogger(__name__)
    # Have to set the root logger level, it defaults to logging.WARNING
    logger_obj.setLevel(level)
    # route INFO and DEBUG logging to stdout from stderr
    logging_handler_out = logging.StreamHandler(sys.stdout)
    logging_handler_out.setLevel(logging.DEBUG)
    # create a logging format
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(process)d - %(threadName)s - %(levelname)s - %(message)s')
    logging_handler_out.setFormatter(formatter)
    logger_obj.addHandler(logging_handler_out)

    logging_handler_err = logging.StreamHandler(sys.stderr)
    logging_handler_err.setLevel(logging.WARNING)
    logger_obj.addHandler(logging_handler_err)
    return logger_obj


def set_info_from_config_ini_file():
    config_vars = {}
    if os.path.exists(os.path.join(home_path, "collectd", "config.ini")):
        try:
            parser = ConfigParser()
            parser.read(os.path.join(home_path, "collectd", "config.ini"))
            config_vars['license_key'] = parser.get('insightfinder', 'license_key')
            config_vars['project_name'] = parser.get('insightfinder', 'project_name')
            config_vars['user_name'] = parser.get('insightfinder', 'user_name')

            if len(config_vars['license_key']) == 0 or len(config_vars['project_name']) == 0 or len(
                    config_vars['user_name']) == 0:
                logger.error("Agent not correctly configured. Check config file.")
                sys.exit(1)

            # setting config var for python
            os.environ["INSIGHTFINDER_LICENSE_KEY"] = config_vars['license_key']
            os.environ["INSIGHTFINDER_PROJECT_NAME"] = config_vars['project_name']
            os.environ["INSIGHTFINDER_USER_NAME"] = config_vars['user_name']

        except IOError:
            logger.error("Agent not correctly configured. Check config file.")
            sys.exit(1)
    else:
        logger.error("Agent not correctly configured. Check config file.")
        sys.exit(1)

    return config_vars


def set_from_reporting_config_json():
    # global hostname, hostnameShort
    report_file_name = "reporting_config.json"

    # reading file form reporting_config.json
    with open(os.path.join(home_path, report_file_name), 'r') as f:
        config = json.load(f)

    reporting_interval_string = config['reporting_interval']
    # is_second_reporting = False
    if reporting_interval_string[-1:] == 's':
        # is_second_reporting = True
        reporting_interval_l = float(config['reporting_interval'][:-1])
        reporting_interval_l = float(reporting_interval_l / 60)
    else:
        reporting_interval_l = int(config['reporting_interval'])

    # keep_file_days = int(config['keep_file_days'])
    prev_endtime_l = config['prev_endtime']
    # deltaFields_l = config['delta_fields']

    hostname_l = socket.getfqdn()
    hostname_short_l = socket.gethostname().partition(".")[0]
    csvpath_l = "/var/lib/collectd/csv/" + hostname_short_l

    if not os.path.exists(csvpath_l):
        csvpath_l = "/var/lib/collectd/csv/" + hostname_l
    if not os.path.exists(csvpath_l):
        directory_list = os.listdir("/var/lib/collectd/csv")
        if len(directory_list) > 0:
            csvpath_l = "/var/lib/collectd/csv/" + directory_list[0]

    date_l = time.strftime("%Y-%m-%d")
    return reporting_interval_l, hostname_l, hostname_short_l, prev_endtime_l, csvpath_l, date_l


# deletes old csv files from a directory
def remove_old_files(directory, filetype):
    now = datetime.datetime.now()
    now_time = now.time()
    # time between which each day the deletion is done
    if datetime.time(06, 30) <= now_time <= datetime.time(20, 35):
        # data directory path
        data_file_path = directory
        # data_file_path = os.path.join(homepath,datadir)
        now = time.time()
        for f in os.listdir(data_file_path):
            data_file = os.path.join(data_file_path, f)
            # check files older than 3 days
            if os.stat(data_file).st_mtime < now - 2 * 86400:
                # only delete csv files
                if filetype is None:
                    if os.path.isfile(data_file):
                        os.remove(data_file)
                else:
                    if str(filetype) in str(os.path.splitext(data_file)[1]):
                        # print data_file
                        if os.path.isfile(data_file):
                            os.remove(data_file)


def getindex(col_name):
    if col_name == "CPU":
        return 7001
    elif col_name == "DiskRead" or col_name == "DiskWrite":
        return 7002
    elif col_name == "DiskUsed":
        return 7003
    elif col_name == "NetworkIn" or col_name == "NetworkOut":
        return 7004
    elif col_name == "MemUsed":
        return 7005
    elif "DiskUsed" in col_name:
        return 7006
    elif "LoadAvg" in col_name:
        return 7007
    elif "Process" in col_name:
        return 7008


def update_results(lists):
    with open(os.path.join(home_path, data_dir + previous_results_filename), 'w') as f:
        json.dump(lists, f)


def get_previous_results():
    with open(os.path.join(home_path, data_dir + previous_results_filename), 'r') as f:
        return json.load(f)


def set_epoch_time(reporting_interval_l, prev_endtime_l):
    if prev_endtime_l != "0":
        start_time = prev_endtime_l
        # pad a second after prev_end_time
        start_time_epoch_l = 1000 + long(1000 * time.mktime(time.strptime(start_time, "%Y%m%d%H%M%S")))
        # end_time_epoch = start_time_epoch_l + 1000 * 60 * reporting_interval_l
        start_time_epoch_l = start_time_epoch_l / 1000
    else:  # prev_endtime == 0
        end_time_epoch = int(time.time()) * 1000
        start_time_epoch_l = end_time_epoch - 1000 * 60 * reporting_interval_l
        start_time_epoch_l = start_time_epoch_l / 1000
    return reporting_interval_l, start_time_epoch_l, prev_endtime_l


# update prev_endtime in config file
def update_timestamp(prev_endtime_l):
    with open(os.path.join(home_path, "reporting_config.json"), 'r') as f:
        config = json.load(f)
    config['prev_endtime'] = prev_endtime_l
    with open(os.path.join(home_path, "reporting_config.json"), "w") as f:
        json.dump(config, f)


# send data to insightfinder
def send_data(metric_data_l, reporting_interval_l, hostname_l):
    if len(metric_data_l) == 0:
        return
    collectd = 'collectd'

    # update projectKey, userName in dict
    all_data = {"metricData": json.dumps(metric_data_l), "licenseKey": agent_config_vars['license_key'],
                "projectName": agent_config_vars['project_name'],
                "userName": agent_config_vars['user_name'], "instanceName": hostname_l, "insightAgentType": collectd,
                "samplingInterval": str(int(reporting_interval_l * 60))}

    json_data = json.dumps(all_data)
    # logging the results
    logger.info("Json date to send is : " + json_data + "\n" + "Number of bytes reported are: " + str(
        len(bytearray(json_data))))

    custom_project_url = "/customprojectrawdata"
    url = server_url + custom_project_url
    response = requests.post(url, data=json.loads(json_data))

    if response.status_code != 200:
        logger.error("post request to " + url + " failed.")
    else:
        logger.info("Post request to  " + url + " successful!")

    return


def aggregate_results_into_raw_data(start_time_epoch_l, new_prev_endtime_epoch_l, date_l):
    raw_data_l = collections.OrderedDict()
    filenames = {'cpu/percent-active-': ['CPU'], 'memory/memory-used-': ['MemUsed'],
                 'load/load-': ['LoadAvg1', 'LoadAvg5', 'LoadAvg15'], 'df-root/percent_bytes-used-': ['DiskUsed'],
                 'processes/ps_state-blocked-': ['BlockedProcess'], 'processes/ps_state-paging-': ['PagingProcess'],
                 'processes/ps_state-running-': ['RunningProcess'],
                 'processes/ps_state-sleeping-': ['SleepingProcess'], 'processes/ps_state-stopped-': ['StoppedProcess'],
                 'processes/ps_state-zombies-': ['ZombieProcess']}

    all_latest_timestamps = []
    # Calculate average CPU
    aggregate_cpu = remove_old_files_and_update_filesnames(filenames)
    # Collect info from /var/lib/collectd/
    set_raw_data_from_collectd_dir(aggregate_cpu, all_latest_timestamps, date_l, filenames, new_prev_endtime_epoch_l,
                                   raw_data_l, start_time_epoch_l)
    # update endtime_epoch from recent data load
    new_prev_endtime_epoch_l = max(all_latest_timestamps)
    return new_prev_endtime_epoch_l, raw_data_l


def remove_old_files_and_update_filesnames(filenames):
    all_directories = os.listdir(csvpath)
    # aggregate cou for collectd version < 5.5
    aggregate_cpu = False
    # remove old csv files in datadir
    remove_old_files(os.path.join(home_path, data_dir), 'csv')

    for each_dir in all_directories:
        # remove old collectd log files
        remove_old_files(os.path.join(csvpath, each_dir), None)

        if "disk" in each_dir:
            filenames[each_dir + "/disk_octets-"] = [each_dir +
                                                     '_DiskWrite', each_dir + '_DiskRead']
        if "interface" in each_dir:
            filenames[each_dir + "/if_octets-"] = [each_dir +
                                                   '_NetworkIn', each_dir + '_NetworkOut']

    for fEntry in os.walk(os.path.join(csvpath)):
        if "cpu-" in fEntry[0]:
            aggregate_cpu = True
            filenames['aggregation-cpu-average/cpu-system-'] = ['CPU']

    return aggregate_cpu


def set_raw_data_from_collectd_dir(aggregate_cpu, all_latest_timestamps, date_l, filenames, new_prev_endtime_epoch_l,
                                   raw_data_l, start_time_epoch_l):
    for each_file in filenames:
        if "cpu/percent-active" in each_file and aggregate_cpu:
            continue
        if "aggregation-cpu-average/cpu-system" in each_file and aggregate_cpu:
            new_prev_endtime_epoch_l = calculate_avg_cpu_values(all_latest_timestamps, each_file, filenames,
                                                                new_prev_endtime_epoch_l, raw_data_l,
                                                                start_time_epoch_l, date_l)
            aggregate_cpu = False
        else:
            new_prev_endtime_epoch_l = calculate_disk_load_values(all_latest_timestamps, each_file, filenames,
                                                                  new_prev_endtime_epoch_l, raw_data_l,
                                                                  start_time_epoch_l, date_l)


def calculate_avg_cpu_values(all_latest_timestamps, each_file, filenames, new_prev_endtime_epoch_l, raw_data_l,
                             start_time_epoch_l, date_l):
    try:
        csv_file_1 = open(os.path.join(csvpath, each_file + date_l))
        csv_file_2 = open(os.path.join(
            csvpath, 'aggregation-cpu-average/cpu-user-' + date_l))
        csv_file_3 = open(os.path.join(
            csvpath, 'aggregation-cpu-average/cpu-idle-' + date_l))
        reader1 = csv.reader(csv_file_1)
        reader2 = csv.reader(csv_file_2)
        reader3 = csv.reader(csv_file_3)

        for row, row1, row2 in itertools.izip(reader1, reader2, reader3):
            if reader1.line_num > 1:
                if long(int(float(row[0]))) < long(start_time_epoch_l):
                    continue
                timestamp_str = str(int(float(row[0])))
                new_prev_endtime_epoch_l = long(timestamp_str) * 1000.0
                if timestamp_str in raw_data_l:
                    value_list = raw_data_l[timestamp_str]
                    total = float(row[1]) + float(row1[1]) + float(row2[1])
                    idle = float(row2[1])
                    # result = 1 - round(float(idle / total), 4)
                    value_list[filenames[each_file][0]] = str(
                        round((1 - float(idle / total)) * 100, 4))
                    raw_data_l[timestamp_str] = value_list
                else:
                    value_list = {}
                    total = float(row[1]) + float(row1[1]) + float(row2[1])
                    idle = float(row2[1])
                    # result = 1 - round(float(idle / total), 4)
                    value_list[filenames[each_file][0]] = str(
                        round((1 - float(idle / total)) * 100, 4))
                    raw_data_l[timestamp_str] = value_list
        all_latest_timestamps.append(new_prev_endtime_epoch_l)

    except IOError:
        print ""
    return new_prev_endtime_epoch_l


def calculate_disk_load_values(all_latest_timestamps, each_file, filenames, new_prev_endtime_epoch_l, raw_data_l,
                               start_time_epoch_l, date_l):
    try:
        csvfile = open(os.path.join(csvpath, each_file + date_l))
        reader = csv.reader(csvfile)

        for row in reader:
            if reader.line_num > 1:
                if long(int(float(row[0]))) < long(start_time_epoch_l):
                    continue
                timestamp_str = str(int(float(row[0])))
                new_prev_endtime_epoch_l = long(timestamp_str) * 1000.0
                if timestamp_str in raw_data_l:
                    value_list = raw_data_l[timestamp_str]
                    value_list[filenames[each_file][0]] = row[1]
                    if ("disk" in each_file) or ("interface" in each_file):
                        value_list[filenames[each_file][1]] = row[2]
                    elif "load" in each_file:
                        value_list[filenames[each_file][1]] = row[2]
                        value_list[filenames[each_file][2]] = row[3]
                    raw_data_l[timestamp_str] = value_list
                else:
                    value_list = {filenames[each_file][0]: row[1]}
                    if ("disk" in each_file) or ("interface" in each_file):
                        value_list[filenames[each_file][1]] = row[2]
                    elif "load" in each_file:
                        value_list[filenames[each_file][1]] = row[2]
                        value_list[filenames[each_file][2]] = row[3]
                    raw_data_l[timestamp_str] = value_list
        all_latest_timestamps.append(new_prev_endtime_epoch_l)
    except IOError:
        pass
    return new_prev_endtime_epoch_l


def is_str_in_keys(my_dict, my_str):
    for key in my_dict.keys():
        if my_str in key:
            return True
    return False


def fill_metric_data_to_send(raw_data_l, hostname_short_l):
    metric_data_l = []
    metric_list = ["CPU", "MemUsed", "DiskWrite", "DiskRead", "DiskUsed", "NetworkIn", "NetworkOut", "LoadAvg1",
                   "LoadAvg5", "LoadAvg15",
                   "BlockedProcess", "PagingProcess", "RunningProcess", "SleepingProcess", "StoppedProcess",
                   "ZombieProcess"]
    delta_fields = ["DiskRead", "DiskWrite", "NetworkIn", "NetworkOut"]

    if not os.path.isfile(os.path.join(home_path, data_dir + previous_results_filename)):
        previous_result_l = {}
    else:
        previous_result_l = get_previous_results()

    if not bool(raw_data_l):
        logger.error("No data is reported. Exiting.")
        sys.exit()

    for each_timestamp in raw_data_l:
        data = raw_data_l[each_timestamp]
        # print "Data: " + str(data)
        this_data = {'timestamp': str(int(each_timestamp) * 1000)}
        # this_data['timestamp'] = str(int(each_timestamp) * 1000)
        disk_read = disk_write = network_in = network_out = 0  # diskused

        new_result, this_data = get_new_object_from_disk_and_network_details(data, delta_fields, disk_read, disk_write,
                                                                             hostname_short_l, metric_list, network_in,
                                                                             network_out,
                                                                             previous_result_l, this_data)
        previous_result_l = new_result
        metric_data_l.append(this_data)
    return metric_data_l, previous_result_l


def get_new_object_from_disk_and_network_details(data, delta_fields, disk_read, disk_write, hostname_short_l,
                                                 metric_list, network_in, network_out,
                                                 previous_result_l, this_data):
    new_result = {}
    for each_metric in metric_list:
        if each_metric == "DiskWrite" or each_metric == "DiskRead" \
                or each_metric == "NetworkIn" or each_metric == "NetworkOut":
            for each_data in data:
                if "DiskWrite" in each_data:
                    disk_write += float(data[each_data])
                if "DiskRead" in each_data:
                    disk_read += float(data[each_data])
                if "NetworkIn" in each_data:
                    network_in = float(data[each_data])
                if "NetworkOut" in each_data:
                    network_out = float(data[each_data])
        if (not is_str_in_keys(data, each_metric)) and each_metric != "DiskRead" and each_metric != "DiskWrite" \
                and each_metric != "NetworkIn" and each_metric != "NetworkOut":
            final_metric_name = str(
                each_metric) + "[" + str(hostname_short_l) + "]:" + str(getindex(each_metric))
            this_data[final_metric_name] = "NaN"
            continue
        else:
            final_metric_name = str(
                each_metric) + "[" + str(hostname_short_l) + "]:" + str(getindex(each_metric))
            if each_metric == "DiskWrite":
                this_data[final_metric_name] = str(
                    float(float(disk_write) / (1024 * 1024)))
            elif each_metric == "DiskRead":
                this_data[final_metric_name] = str(
                    float(float(disk_read) / (1024 * 1024)))
            elif each_metric == "NetworkIn":
                this_data[final_metric_name] = str(
                    float(float(network_in) / (1024 * 1024)))
            elif each_metric == "NetworkOut":
                this_data[final_metric_name] = str(
                    float(float(network_out) / (1024 * 1024)))
            elif each_metric == "MemUsed":
                this_data[final_metric_name] = str(
                    float(float(data[each_metric]) / (1024 * 1024)))
            else:
                this_data[final_metric_name] = str(data[each_metric])
            new_result[final_metric_name] = this_data[final_metric_name]
            if each_metric in delta_fields:
                if final_metric_name in previous_result_l:
                    this_data[final_metric_name] = str(
                        abs(float(this_data[final_metric_name]) - float(previous_result_l[final_metric_name])))
                else:
                    this_data[final_metric_name] = "NaN"
    return new_result, this_data


# update endtime in config
def update_endtime_in_config(metric_data_l, reporting_interval_l, new_prev_endtime_epoch_l, hostname_l):
    if new_prev_endtime_epoch_l == 0:
        print "No data is reported"
    else:
        new_prev_endtimeinsec = math.ceil(long(new_prev_endtime_epoch_l) / 1000.0)
        new_prev_endtime = time.strftime(
            "%Y%m%d%H%M%S", time.localtime(long(new_prev_endtimeinsec)))
        update_timestamp(new_prev_endtime)
        send_data(metric_data_l, reporting_interval_l, hostname_l)
    return


if __name__ == "__main__":
    parameters = get_input_from_user()

    server_url = parameters['server_url']
    home_path = parameters['homepath']
    data_dir = parameters['datadir']
    log_level = parameters['log_level']
    # setting log level
    logger = set_logger_config(log_level)
    agent_config_vars = set_info_from_config_ini_file()

    new_prev_endtime_epoch = 0

    reporting_interval, hostname, hostname_short, prev_endtime, csvpath, date = set_from_reporting_config_json()

    reporting_interval, start_time_epoch, prev_endtime = set_epoch_time(reporting_interval, prev_endtime)

    new_prev_endtime_epoch, raw_data = aggregate_results_into_raw_data(start_time_epoch, new_prev_endtime_epoch, date)

    metric_data, previous_result = fill_metric_data_to_send(raw_data, hostname_short)

    update_results(previous_result)

    update_endtime_in_config(metric_data, reporting_interval, new_prev_endtime_epoch, hostname)

    exit(0)