######################################################################################################################
#  Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.                                           #
#                                                                                                                    #
#  Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance    #
#  with the License. A copy of the License is located at                                                             #
#                                                                                                                    #
#      http://www.apache.org/licenses/LICENSE-2.0                                                                    #
#                                                                                                                    #
#  or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES #
#  OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions    #
#  and limitations under the License.                                                                                #
######################################################################################################################

import boto3
import csv
import gzip
import json
import logging
import datetime
import time
from os import environ, remove
from ipaddress import ip_address
from botocore.config import Config
from urllib.parse import unquote_plus
from urllib.request import Request, urlopen
from urllib.parse import urlparse
from backoff import on_exception, expo
from build_athena_queries import build_athena_query_for_app_access_logs, \
    build_athena_query_for_waf_logs

logging.getLogger().debug('Loading function')

#======================================================================================================================
# Constants/Globals
#======================================================================================================================
API_CALL_NUM_RETRIES = 5
MAX_DESCRIPTORS_PER_IP_SET_UPDATE = 500
DELAY_BETWEEN_UPDATES = 2

# CloudFront Access Logs
# http://docs.aws.amazon.com/AmazonCloudFront/latest/DeveloperGuide/AccessLogs.html#BasicDistributionFileFormat
LINE_FORMAT_CLOUD_FRONT = {
    'delimiter': '\t',
    'date': 0,
    'time' : 1,
    'source_ip' : 4,
    'uri': 7,
    'code' : 8
}
# ALB Access Logs
# http://docs.aws.amazon.com/elasticloadbalancing/latest/application/load-balancer-access-logs.html
LINE_FORMAT_ALB = {
    'delimiter': ' ',
    'timestamp': 1,
    'source_ip' : 3,
    'code' : 9, # GitHub issue #44. Changed from elb_status_code to target_status_code.
    'uri': 13
}

waf = None
if environ['LOG_TYPE'] == 'alb':
    session = boto3.session.Session(region_name=environ['REGION'])
    waf = session.client('waf-regional', config=Config(retries={'max_attempts': API_CALL_NUM_RETRIES}))
else:
    waf = boto3.client('waf', config=Config(retries={'max_attempts': API_CALL_NUM_RETRIES}))
config = {}

#======================================================================================================================
# Auxiliary Functions
#======================================================================================================================
@on_exception(expo, waf.exceptions.WAFStaleDataException, max_time=10)
def waf_get_ip_set(ip_set_id):
    logging.getLogger().debug('[waf_get_ip_set] Start')
    response = waf.get_ip_set(IPSetId=ip_set_id)
    logging.getLogger().debug('[waf_get_ip_set] End')
    return response

@on_exception(expo, waf.exceptions.WAFStaleDataException, max_time=10)
def waf_update_ip_set(ip_set_id, updates):
    logging.getLogger().debug('[waf_update_ip_set] Start')
    response = waf.update_ip_set(IPSetId=ip_set_id,
        ChangeToken=waf.get_change_token()['ChangeToken'],
        Updates=updates)
    logging.getLogger().debug('[waf_update_ip_set] End')
    return response

def waf_commit_updates(ip_set_id, updates_list):
    logging.getLogger().debug('[waf_commit_updates] Start')
    response = None

    if len(updates_list) > 0:
        index = 0
        while index < len(updates_list):
            logging.getLogger().debug('[waf_commit_updates] Processing from index %d.'%index)
            response = waf_update_ip_set(ip_set_id, updates_list[index: index + MAX_DESCRIPTORS_PER_IP_SET_UPDATE])
            index += MAX_DESCRIPTORS_PER_IP_SET_UPDATE
            if index < len(updates_list):
                logging.getLogger().debug('[waf_commit_updates] Sleep %d sec befone next slot to avoid AWS WAF API throttling ...'%DELAY_BETWEEN_UPDATES)
                time.sleep(DELAY_BETWEEN_UPDATES)

    logging.getLogger().debug('[waf_commit_updates] End')
    return response

def update_waf_ip_set(ip_set_id, outstanding_requesters):
    logging.getLogger().debug('[update_waf_ip_set] Start')

    counter = 0
    try:
        if ip_set_id == None:
            logging.getLogger().info("[update_waf_ip_set] Ignore process when ip_set_id is None")
            return

        updates_list = []

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[update_waf_ip_set] \tMerge general and uriList into a single list")
        #--------------------------------------------------------------------------------------------------------------
        unified_outstanding_requesters = outstanding_requesters['general']
        for uri in outstanding_requesters['uriList'].keys():
            for k in outstanding_requesters['uriList'][uri].keys():
                if (k not in unified_outstanding_requesters.keys() or
                    outstanding_requesters['uriList'][uri][k]['max_counter_per_min'] > unified_outstanding_requesters[k]['max_counter_per_min']):
                    unified_outstanding_requesters[k] = outstanding_requesters['uriList'][uri][k]

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[update_waf_ip_set] \tTruncate [if necessary] list to respect WAF limit")
        #--------------------------------------------------------------------------------------------------------------
        if len(unified_outstanding_requesters) > int(environ['LIMIT_IP_ADDRESS_RANGES_PER_IP_MATCH_CONDITION']):
            ordered_unified_outstanding_requesters = sorted(unified_outstanding_requesters.items(), key=lambda kv: kv[1]['max_counter_per_min'], reverse=True)
            unified_outstanding_requesters = {}
            for key, value in ordered_unified_outstanding_requesters:
                if counter < int(environ['LIMIT_IP_ADDRESS_RANGES_PER_IP_MATCH_CONDITION']):
                    unified_outstanding_requesters[key] = value
                    counter += 1
                else:
                    break

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[update_waf_ip_set] \tRemove IPs that are not in current outstanding requesters list")
        #--------------------------------------------------------------------------------------------------------------
        response = waf_get_ip_set(ip_set_id)
        if response != None:
            for k in response['IPSet']['IPSetDescriptors']:
                ip_value = k['Value'].split('/')[0]
                if ip_value not in unified_outstanding_requesters.keys():
                    ip_type = "IPV%s"%ip_address(ip_value).version
                    updates_list.append({
                        'Action': 'DELETE',
                        'IPSetDescriptor': {
                            'Type': ip_type,
                            'Value': k['Value']
                        }
                    })
                else:
                    # Dont block an already blocked IP
                    unified_outstanding_requesters.pop(ip_value, None)

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[update_waf_ip_set] \tBlock remaining outstanding requesters")
        #--------------------------------------------------------------------------------------------------------------
        for k in unified_outstanding_requesters.keys():
            ip_type = "IPV%s"%ip_address(k).version
            ip_class = "32" if ip_type == "IPV4" else "128"
            updates_list.append({
                'Action': 'INSERT',
                'IPSetDescriptor': {
                    'Type': ip_type,
                    'Value': "%s/%s"%(k, ip_class)
                }
            })

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[update_waf_ip_set] \tCommit changes in WAF IP set")
        #--------------------------------------------------------------------------------------------------------------
        response = waf_commit_updates(ip_set_id, updates_list)

    except Exception as error:
        logging.getLogger().error(str(error))
        logging.getLogger().error("[update_waf_ip_set] Error to update waf ip set")

    logging.getLogger().debug('[update_waf_ip_set] End')
    return counter

def send_anonymous_usage_data():
    try:
        if 'SEND_ANONYMOUS_USAGE_DATA' not in environ or environ['SEND_ANONYMOUS_USAGE_DATA'].lower() != 'yes':
            return

        logging.getLogger().debug("[send_anonymous_usage_data] Start")

        cw = boto3.client('cloudwatch')
        usage_data = {
            "Solution": "SO0006",
            "UUID": environ['UUID'],
            "TimeStamp": str(datetime.datetime.utcnow().isoformat()),
            "Data":
            {
                "data_type" : "lop_parser",
                "scanners_probes_set_size": 0,
                "http_flood_set_size": 0,
                "allowed_requests" : 0,
                "blocked_requests_all" : 0,
                "blocked_requests_scanners_probes": 0,
                "blocked_requests_http_flood": 0,
                "waf_type" : environ['LOG_TYPE']
            }
        }

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().debug("[send_anonymous_usage_data] Get num allowed requests")
        #--------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='AllowedRequests',
                Namespace='WAF',
                Statistics=['Sum'],
                Period=12*3600,
                StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=12*3600),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[
                    {
                        "Name": "Rule",
                        "Value": "ALL"
                    },
                    {
                        "Name": "WebACL",
                        "Value": environ['METRIC_NAME_PREFIX'] + 'MaliciousRequesters'
                    }
                ]
            )
            usage_data['Data']['allowed_requests'] = response['Datapoints'][0]['Sum']

        except Exception as error:
            logging.getLogger().debug("[send_anonymous_usage_data] Failed to get Num Allowed Requests")
            logging.getLogger().debug(str(error))

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[send_anonymous_usage_data] Get num blocked requests - all rules")
        #--------------------------------------------------------------------------------------------------------------
        try:
            response = cw.get_metric_statistics(
                MetricName='BlockedRequests',
                Namespace='WAF',
                Statistics=['Sum'],
                Period=12*3600,
                StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=12*3600),
                EndTime=datetime.datetime.utcnow(),
                Dimensions=[
                    {
                        "Name": "Rule",
                        "Value": "ALL"
                    },
                    {
                        "Name": "WebACL",
                        "Value": environ['METRIC_NAME_PREFIX'] + 'MaliciousRequesters'
                    }
                ]
            )
            usage_data['Data']['blocked_requests_all'] = response['Datapoints'][0]['Sum']

        except Exception as error:
            logging.getLogger().debug("[send_anonymous_usage_data] Failed to get num blocked requests - all rules")
            logging.getLogger().debug(str(error))

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().debug("[send_anonymous_usage_data] Get scanners probes data")
        #--------------------------------------------------------------------------------------------------------------
        if 'IP_SET_ID_SCANNERS_PROBES' in environ:
            try:
                response = waf_get_ip_set(environ['IP_SET_ID_SCANNERS_PROBES'])
                if response != None:
                    usage_data['Data']['scanners_probes_set_size'] = len(response['IPSet']['IPSetDescriptors'])

                response = cw.get_metric_statistics(
                    MetricName='BlockedRequests',
                    Namespace='WAF',
                    Statistics=['Sum'],
                    Period=12*3600,
                    StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=12*3600),
                    EndTime=datetime.datetime.utcnow(),
                    Dimensions=[
                        {
                            "Name": "Rule",
                            "Value": environ['METRIC_NAME_PREFIX'] + 'ScannersProbesRule'
                        },
                        {
                            "Name": "WebACL",
                            "Value": environ['METRIC_NAME_PREFIX'] + 'MaliciousRequesters'
                        }
                    ]
                )
                usage_data['Data']['blocked_requests_scanners_probes'] = response['Datapoints'][0]['Sum']

            except Exception as error:
                logging.getLogger().debug("[send_anonymous_usage_data] Failed to get scanners probes data")
                logging.getLogger().debug(str(error))

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().debug("[send_anonymous_usage_data] Get HTTP flood data")
        #--------------------------------------------------------------------------------------------------------------
        if 'IP_SET_ID_HTTP_FLOOD' in environ:
            try:
                response = waf_get_ip_set(environ['IP_SET_ID_HTTP_FLOOD'])
                if response != None:
                    usage_data['Data']['http_flood_set_size'] = len(response['IPSet']['IPSetDescriptors'])

                response = cw.get_metric_statistics(
                    MetricName='BlockedRequests',
                    Namespace='WAF',
                    Statistics=['Sum'],
                    Period=12*3600,
                    StartTime=datetime.datetime.utcnow() - datetime.timedelta(seconds=12*3600),
                    EndTime=datetime.datetime.utcnow(),
                    Dimensions=[
                        {
                            "Name": "Rule",
                            "Value": environ['METRIC_NAME_PREFIX'] + 'HttpFloodRule'
                        },
                        {
                            "Name": "WebACL",
                            "Value": environ['METRIC_NAME_PREFIX'] + 'MaliciousRequesters'
                        }
                    ]
                )
                usage_data['Data']['blocked_requests_http_flood'] = response['Datapoints'][0]['Sum']

            except Exception as error:
                logging.getLogger().debug("[send_anonymous_usage_data] Failed to get HTTP flood data")
                logging.getLogger().debug(str(error))

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[send_anonymous_usage_data] Send Data")
        #--------------------------------------------------------------------------------------------------------------
        url = 'https://metrics.awssolutionsbuilder.com/generic'
        req = Request(url, method='POST', data=bytes(json.dumps(usage_data), encoding='utf8'), headers={'Content-Type': 'application/json'})
        rsp = urlopen(req)
        rspcode = rsp.getcode()
        logging.getLogger().debug('[send_anonymous_usage_data] Response Code: {}'.format(rspcode))
        logging.getLogger().debug("[send_anonymous_usage_data] End")

    except Exception as error:
        logging.getLogger().debug("[send_anonymous_usage_data] Failed to send data")
        logging.getLogger().debug(str(error))

#======================================================================================================================
# Athena Log Parser
#======================================================================================================================
def process_athena_scheduler_event(event):
    logging.getLogger().debug('[process_athena_scheduler_event] Start')

    log_type = str(environ['LOG_TYPE'].upper())
    
    # Execute athena query for CloudFront or ALB logs
    if event['resourceType'] == 'LambdaAthenaAppLogParser' \
       and (log_type == 'CLOUDFRONT' or log_type == 'ALB'):
        execute_athena_query(log_type, event)

    # Execute athena query for WAF logs
    if event['resourceType'] == 'LambdaAthenaWAFLogParser':
        execute_athena_query('WAF', event)
    
    logging.getLogger().debug('[process_athena_scheduler_event] End')
    
    
def execute_athena_query(log_type, event):            
    logging.getLogger().debug('[execute_athena_query] Start')
    
    athena_client = boto3.client('athena')
    s3_output = "s3://%s/athena_results/"%event['accessLogBucket']
    database_name = event['glueAccessLogsDatabase']
    
    # Dynamically build query string using partition
    # for CloudFront or ALB logs
    if log_type == 'CLOUDFRONT' or log_type == 'ALB':
        query_string = build_athena_query_for_app_access_logs(
            logging.getLogger(),
            log_type,
            event['glueAccessLogsDatabase'],
            event['glueAppAccessLogsTable'],
            datetime.datetime.utcnow(),
            int(environ['WAF_BLOCK_PERIOD']),
            int(environ['ERROR_THRESHOLD'])
        )
    else: # Dynamically build query string using partition for WAF logs 
        query_string = build_athena_query_for_waf_logs(
            logging.getLogger(),
            event['glueAccessLogsDatabase'],
            event['glueWafAccessLogsTable'],
            datetime.datetime.utcnow(),
            int(environ['WAF_BLOCK_PERIOD']),
            int(environ['REQUEST_THRESHOLD'])
        )

    response = athena_client.start_query_execution(
            QueryString=query_string,
            QueryExecutionContext={'Database': database_name},
            ResultConfiguration={
                'OutputLocation': s3_output,
                'EncryptionConfiguration': {
                    'EncryptionOption': 'SSE_S3'
                }
            },
            WorkGroup=event['athenaWorkGroup']
    )
    
    logging.getLogger().info("[execute_athena_query] Query Execution Response: {}".format(response))
    logging.getLogger().info('[execute_athena_query] End')


def process_athena_result(bucket_name, key_name, ip_set_id):
    logging.getLogger().debug('[process_athena_result] Start')

    try:
        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[process_athena_result] \tDownload file from S3")
        #--------------------------------------------------------------------------------------------------------------
        local_file_path = '/tmp/' + key_name.split('/')[-1]
        s3 = boto3.client('s3')
        s3.download_file(bucket_name, key_name, local_file_path)

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[process_athena_result] \tRead file content")
        #--------------------------------------------------------------------------------------------------------------
        outstanding_requesters = {
            'general': {},
            'uriList': {}
        }
        utc_now_timestamp_str = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z")
        with open(local_file_path,'r') as csvfile:
            reader = csv.DictReader(csvfile)
            for row in reader:
                # max_counter_per_min is set as 1 just to reuse lambda log parser data structure
                # and reuse update_waf_ip_set.
                outstanding_requesters['general'][row['client_ip']] = {
                    "max_counter_per_min": row['max_counter_per_min'],
                    "updated_at": utc_now_timestamp_str
                }
        remove(local_file_path)

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[process_athena_result] \tUpdate WAF IP Set")
        #--------------------------------------------------------------------------------------------------------------
        update_waf_ip_set(ip_set_id, outstanding_requesters)

    except Exception:
        logging.getLogger().error("[process_athena_result] \tError to read input file")

    logging.getLogger().debug('[process_athena_result] End')

#======================================================================================================================
# Lambda Log Parser
#======================================================================================================================
def load_configurations(bucket_name, key_name):
    logging.getLogger().debug('[load_configurations] Start')

    try:
        s3 = boto3.resource('s3')
        file_obj = s3.Object(bucket_name, key_name)
        file_content = file_obj.get()['Body'].read()

        global config
        config = json.loads(file_content)

    except Exception as e:
        logging.getLogger().error("[load_configurations] \tError to read config file")
        raise e

    logging.getLogger().debug('[load_configurations] End')

def get_outstanding_requesters(bucket_name, key_name, log_type):
    logging.getLogger().debug('[get_outstanding_requesters] Start')

    counter = {
        'general': {},
        'uriList': {}
    }
    outstanding_requesters = {
        'general': {},
        'uriList': {}
    }

    try:
        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[get_outstanding_requesters] \tDownload file from S3")
        #--------------------------------------------------------------------------------------------------------------
        local_file_path = '/tmp/' + key_name.split('/')[-1]
        s3 = boto3.client('s3')
        s3.download_file(bucket_name, key_name, local_file_path)

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[get_outstanding_requesters] \tRead file content")
        #--------------------------------------------------------------------------------------------------------------
        with gzip.open(local_file_path,'r') as content:
            for line in content:
                try:
                    request_key = ""
                    uri = ""
                    return_code_index = None

                    if log_type == 'waf':
                        line = line.decode() # Remove the b in front of each field
                        line_data = json.loads(str(line))

                        request_key = datetime.datetime.fromtimestamp(int(line_data['timestamp'])/1000.0).isoformat(sep='T', timespec='minutes')
                        request_key += ' ' + line_data['httpRequest']['clientIp']
                        uri = urlparse(line_data['httpRequest']['uri']).path

                    elif log_type == 'alb':
                        line = line.decode('utf8')
                        if line.startswith('#'):
                            continue

                        line_data = line.split(LINE_FORMAT_ALB['delimiter'])
                        request_key = line_data[LINE_FORMAT_ALB['timestamp']].rsplit(':', 1)[0]
                        request_key += ' ' + line_data[LINE_FORMAT_ALB['source_ip']].rsplit(':', 1)[0]
                        return_code_index = LINE_FORMAT_ALB['code']
                        uri = urlparse(line_data[LINE_FORMAT_ALB['uri']]).path

                    elif log_type == 'cloudfront':
                        line = line.decode('utf8')
                        if line.startswith('#'):
                            continue

                        line_data = line.split(LINE_FORMAT_CLOUD_FRONT['delimiter'])
                        request_key = line_data[LINE_FORMAT_CLOUD_FRONT['date']]
                        request_key += ' ' + line_data[LINE_FORMAT_CLOUD_FRONT['time']][:-3]
                        request_key += ' ' + line_data[LINE_FORMAT_CLOUD_FRONT['source_ip']]
                        return_code_index = LINE_FORMAT_CLOUD_FRONT['code']
                        uri = urlparse(line_data[LINE_FORMAT_ALB['uri']]).path

                    else:
                        return outstanding_requesters

                    if 'ignoredSufixes' in config['general'] and uri.endswith(tuple(config['general']['ignoredSufixes'])):
                        logging.getLogger().debug("[get_outstanding_requesters] \t\tSkipping line %s. Included in ignoredSufixes."%line)
                        continue

                    if return_code_index == None or line_data[return_code_index] in config['general']['errorCodes']:
                        if request_key in counter['general'].keys():
                            counter['general'][request_key] += 1
                        else:
                            counter['general'][request_key] = 1

                        if 'uriList' in config and uri in config['uriList'].keys():
                            if uri not in counter['uriList'].keys():
                                counter['uriList'][uri] = {}

                            if request_key in counter['uriList'][uri].keys():
                                counter['uriList'][uri][request_key] += 1
                            else:
                                counter['uriList'][uri][request_key] = 1

                except Exception as e:
                    logging.getLogger().error("[get_outstanding_requesters] \t\tError to process line: %s"%line)
        remove(local_file_path)

        #--------------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[get_outstanding_requesters] \tKeep only outstanding requesters")
        #--------------------------------------------------------------------------------------------------------------
        threshold = 'requestThreshold' if log_type == 'waf' else "errorThreshold"
        utc_now_timestamp_str = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z%z")
        for k, num_reqs in counter['general'].items():
            try:
                k = k.split(' ')[-1]
                if num_reqs >= config['general'][threshold]:
                    if k not in outstanding_requesters['general'].keys() or num_reqs > outstanding_requesters['general'][k]['max_counter_per_min']:
                        outstanding_requesters['general'][k] = {
                            'max_counter_per_min': num_reqs,
                            'updated_at': utc_now_timestamp_str
                        }
            except Exception as e:
                logging.getLogger().error("[get_outstanding_requesters] \t\tError to process outstanding requester: %s"%k)

        for uri in counter['uriList'].keys():
            for k, num_reqs in counter['uriList'][uri].items():
                try:
                    k = k.split(' ')[-1]
                    if num_reqs >= config['uriList'][uri][threshold]:
                        if uri not in outstanding_requesters['uriList'].keys():
                            outstanding_requesters['uriList'][uri] = {}

                        if k not in outstanding_requesters['uriList'][uri].keys() or num_reqs > outstanding_requesters['uriList'][uri][k]['max_counter_per_min']:
                            outstanding_requesters['uriList'][uri][k] = {
                                'max_counter_per_min': num_reqs,
                                'updated_at': utc_now_timestamp_str
                            }
                except Exception as e:
                    logging.getLogger().error("[get_outstanding_requesters] \t\tError to process outstanding requester: (%s) %s"%(uri, k))

    except Exception as e:
        logging.getLogger().error("[get_outstanding_requesters] \tError to read input file")
        logging.getLogger().error(e)

    logging.getLogger().debug('[get_outstanding_requesters] End')
    return outstanding_requesters

def merge_outstanding_requesters(bucket_name, key_name, log_type, output_key_name, outstanding_requesters):
    logging.getLogger().debug('[merge_outstanding_requesters] Start')

    force_update = False
    need_update = False
    s3 = boto3.client('s3')

    #--------------------------------------------------------------------------------------------------------------
    logging.getLogger().info("[merge_outstanding_requesters] \tCalculate Last Update Age")
    #--------------------------------------------------------------------------------------------------------------
    response = None
    try:
        response = s3.head_object(Bucket=bucket_name, Key=output_key_name)
    except Exception:
        logging.getLogger().info('[merge_outstanding_requesters] No file to be merged.')
        need_update = True
        return outstanding_requesters, need_update

    utc_last_modified = response['LastModified'].astimezone(datetime.timezone.utc)
    utc_now_timestamp = datetime.datetime.now(datetime.timezone.utc)

    utc_now_timestamp_str = utc_now_timestamp.strftime("%Y-%m-%d %H:%M:%S %Z%z")
    last_update_age = int(((utc_now_timestamp - utc_last_modified).total_seconds())/60)

    #--------------------------------------------------------------------------------------------------------------
    logging.getLogger().info("[merge_outstanding_requesters] \tDownload current blocked IPs")
    #--------------------------------------------------------------------------------------------------------------
    local_file_path = '/tmp/' + key_name.split('/')[-1] + '_REMOTE.json'
    s3.download_file(bucket_name, output_key_name, local_file_path)

    #----------------------------------------------------------------------------------------------------------
    logging.getLogger().info("[merge_outstanding_requesters] \tProcess outstanding requesters files")
    #----------------------------------------------------------------------------------------------------------
    remote_outstanding_requesters = {
        'general': {},
        'uriList': {}
    }
    with open(local_file_path, 'r') as file_content:
        remote_outstanding_requesters = json.loads(file_content.read())
    remove(local_file_path)

    threshold = 'requestThreshold' if log_type == 'waf' else "errorThreshold"
    try:
        if 'general' in remote_outstanding_requesters:
            for k, v in remote_outstanding_requesters['general'].items():
                try:
                    if k in outstanding_requesters['general'].keys():
                        logging.getLogger().info("[merge_outstanding_requesters] \t\tUpdating general data of BLOCK %s rule"%k)
                        outstanding_requesters['general'][k]['updated_at'] = utc_now_timestamp_str
                        if v['max_counter_per_min'] > outstanding_requesters['general'][k]['max_counter_per_min']:
                            outstanding_requesters['general'][k]['max_counter_per_min'] = v['max_counter_per_min']

                    else:
                        utc_prev_updated_at = datetime.datetime.strptime(v['updated_at'], "%Y-%m-%d %H:%M:%S %Z%z").astimezone(datetime.timezone.utc)
                        total_diff_min = ((utc_now_timestamp - utc_prev_updated_at).total_seconds())/60

                        if v['max_counter_per_min'] < config['general'][threshold]:
                            force_update = True
                            logging.getLogger().info("[merge_outstanding_requesters] \t\t%s is bellow the current general threshold"%k)

                        elif total_diff_min < config['general']['blockPeriod']:
                            logging.getLogger().debug("[merge_outstanding_requesters] \t\tKeeping %s in general"%k)
                            outstanding_requesters['general'][k] = v

                        else:
                            force_update = True
                            logging.getLogger().info("[merge_outstanding_requesters] \t\t%s expired in general"%k)

                except Exception:
                    logging.getLogger().error("[merge_outstanding_requesters] \tError merging general %s rule"%k)
    except Exception:
        logging.getLogger().error('[merge_outstanding_requesters] Failed to process general group.')

    try:
        if 'uriList' in remote_outstanding_requesters:
            if 'uriList' not in config or len(config['uriList']) == 0:
                force_update = True
                logging.getLogger().info("[merge_outstanding_requesters] \t\tCurrent config file does not contain uriList anymore")
            else:
                for uri in remote_outstanding_requesters['uriList'].keys():
                    if 'ignoredSufixes' in config['general'] and uri.endswith(tuple(config['general']['ignoredSufixes'])):
                        force_update = True
                        logging.getLogger().info("[merge_outstanding_requesters] \t\t%s is in current ignored sufixes list."%uri)
                        continue

                    for k, v in remote_outstanding_requesters['uriList'][uri].items():
                        try:
                            if uri in outstanding_requesters['uriList'].keys() and k in outstanding_requesters['uriList'][uri].keys():
                                logging.getLogger().info("[merge_outstanding_requesters] \t\tUpdating uriList (%s) data of BLOCK %s rule"%(uri, k))
                                outstanding_requesters['uriList'][uri][k]['updated_at'] = utc_now_timestamp_str
                                if v['max_counter_per_min'] > outstanding_requesters['uriList'][uri][k]['max_counter_per_min']:
                                    outstanding_requesters['uriList'][uri][k]['max_counter_per_min'] = v['max_counter_per_min']

                            else:
                                utc_prev_updated_at = datetime.datetime.strptime(v['updated_at'], "%Y-%m-%d %H:%M:%S %Z%z").astimezone(datetime.timezone.utc)
                                total_diff_min = ((utc_now_timestamp - utc_prev_updated_at).total_seconds())/60

                                if v['max_counter_per_min'] < config['uriList'][uri][threshold]:
                                    force_update = True
                                    logging.getLogger().info("[merge_outstanding_requesters] \t\t%s is bellow the current uriList (%s) threshold"%(k, uri))

                                elif total_diff_min < config['general']['blockPeriod']:
                                    logging.getLogger().debug("[merge_outstanding_requesters] \t\tKeeping %s in uriList (%s)"%(k, uri))

                                    if uri not in outstanding_requesters['uriList'].keys():
                                        outstanding_requesters['uriList'][uri] = {}

                                    outstanding_requesters['uriList'][uri][k] = v
                                else:
                                    force_update = True
                                    logging.getLogger().info("[merge_outstanding_requesters] \t\t%s expired in uriList (%s)"%(k, uri))

                        except Exception:
                            logging.getLogger().error("[merge_outstanding_requesters] \tError merging uriList (%s) %s rule"%(uri, k))
    except Exception:
        logging.getLogger().error('[merge_outstanding_requesters] Failed to process uriList group.')

    need_update = (force_update or
        last_update_age > int(environ['MAX_AGE_TO_UPDATE']) or
        len(outstanding_requesters['general']) > 0 or
        len(outstanding_requesters['uriList']) > 0)

    logging.getLogger().debug('[merge_outstanding_requesters] End')
    return outstanding_requesters, need_update

def write_output(bucket_name, key_name, output_key_name, outstanding_requesters):
    logging.getLogger().debug('[write_output] Start')

    try:
        current_data = '/tmp/' + key_name.split('/')[-1] + '_LOCAL.json'
        with open(current_data, 'w') as outfile:
            json.dump(outstanding_requesters, outfile)

        s3 = boto3.client('s3')
        s3.upload_file(current_data, bucket_name, output_key_name, ExtraArgs={'ContentType': "application/json"})
        remove(current_data)

    except Exception as e:
        logging.getLogger().error("[write_output] \tError to write output file")
        logging.getLogger().error(e)

    logging.getLogger().debug('[write_output] End')

def process_log_file(bucket_name, key_name, conf_filename, output_filename, log_type, ip_set_id):
    logging.getLogger().debug('[process_log_file] Start')

    #--------------------------------------------------------------------------------------------------------------
    logging.getLogger().info("[process_log_file] \tReading input data and get outstanding requesters")
    #--------------------------------------------------------------------------------------------------------------
    load_configurations(bucket_name, conf_filename)
    outstanding_requesters = get_outstanding_requesters(bucket_name, key_name, log_type)
    outstanding_requesters, need_update = merge_outstanding_requesters(bucket_name, key_name, log_type, output_filename, outstanding_requesters)

    if need_update:
        #----------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[process_log_file] \tUpdate new blocked requesters list to S3")
        #----------------------------------------------------------------------------------------------------------
        write_output(bucket_name, key_name, output_filename, outstanding_requesters)

        #----------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[process_log_file] \tUpdate WAF IP Set")
        #----------------------------------------------------------------------------------------------------------
        update_waf_ip_set(ip_set_id, outstanding_requesters)

    else:
        #----------------------------------------------------------------------------------------------------------
        logging.getLogger().info("[process_log_file] \tNo changes identified")
        #----------------------------------------------------------------------------------------------------------

    logging.getLogger().debug('[process_log_file] End')

#======================================================================================================================
# Lambda Entry Point
#======================================================================================================================
def lambda_handler(event, context):
    logging.getLogger().debug('[lambda_handler] Start')

    result = {}
    try:
        #------------------------------------------------------------------
        # Set Log Level
        #------------------------------------------------------------------
        global log_level
        log_level = str(environ['LOG_LEVEL'].upper())
        if log_level not in ['DEBUG', 'INFO','WARNING', 'ERROR','CRITICAL']:
            log_level = 'ERROR'
        logging.getLogger().setLevel(log_level)

        #----------------------------------------------------------
        # Process event
        #----------------------------------------------------------
        logging.getLogger().info(event)

        if "resourceType" in event:
            process_athena_scheduler_event(event)
            result['message'] = "[lambda_handler] Athena scheduler event processed."
            logging.getLogger().debug(result['message'])

        elif 'Records' in event:
            for r in event['Records']:
                bucket_name = r['s3']['bucket']['name']
                key_name = unquote_plus(r['s3']['object']['key'])

                if 'APP_ACCESS_LOG_BUCKET' in environ and bucket_name == environ['APP_ACCESS_LOG_BUCKET']:
                    if key_name.startswith('athena_results/'):
                        process_athena_result(bucket_name, key_name, environ['IP_SET_ID_SCANNERS_PROBES'])
                        result['message'] = "[lambda_handler] Athena app log query result processed."
                        logging.getLogger().debug(result['message'])

                    else:
                        conf_filename = environ['STACK_NAME'] + '-app_log_conf.json'
                        output_filename = environ['STACK_NAME'] + '-app_log_out.json'
                        log_type = environ['LOG_TYPE']
                        ip_set_id = environ['IP_SET_ID_SCANNERS_PROBES']
                        process_log_file(bucket_name, key_name, conf_filename, output_filename, log_type, ip_set_id)
                        result['message'] = "[lambda_handler] App access log file processed."
                        logging.getLogger().debug(result['message'])

                elif 'WAF_ACCESS_LOG_BUCKET' in environ and bucket_name == environ['WAF_ACCESS_LOG_BUCKET']:
                    if key_name.startswith('athena_results/'):
                        process_athena_result(bucket_name, key_name, environ['IP_SET_ID_HTTP_FLOOD'])
                        result['message'] = "[lambda_handler] Athena AWS WAF log query result processed."
                        logging.getLogger().debug(result['message'])

                    else:
                        conf_filename = environ['STACK_NAME'] + '-waf_log_conf.json'
                        output_filename = environ['STACK_NAME'] + '-waf_log_out.json'
                        log_type = 'waf'
                        ip_set_id = environ['IP_SET_ID_HTTP_FLOOD']
                        process_log_file(bucket_name, key_name, conf_filename, output_filename, log_type, ip_set_id)
                        result['message'] = "[lambda_handler] AWS WAF access log file processed."
                        logging.getLogger().debug(result['message'])

                else:
                    result['message'] = "[lambda_handler] undefined handler for bucket %s"%bucket_name
                    logging.getLogger().info(result['message'])

                send_anonymous_usage_data()

        else:
            result['message'] = "[lambda_handler] undefined handler for this type of event"
            logging.getLogger().info(result['message'])

    except Exception as error:
        logging.getLogger().error(str(error))

    logging.getLogger().debug('[lambda_handler] End')
    return result