#!/bin/env python

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import logging
import json
import os
import sys
import copy
from common.utils import Util, NewFileEvent
from common.kafka_client import KafkaProducer
from multiprocessing import Pool
from common.file_collector import FileWatcher
import time

class Collector(object):
    
    def __init__(self,hdfs_app_path,kafka_topic,conf_type):
        
        self._initialize_members(hdfs_app_path,kafka_topic,conf_type)
        
    def _initialize_members(self,hdfs_app_path,kafka_topic,conf_type):
        
        # getting parameters.
        self._logger = logging.getLogger('SPOT.INGEST.PROXY')
        self._hdfs_app_path = hdfs_app_path
        self._kafka_topic= kafka_topic

        # get script path
        self._script_path = os.path.dirname(os.path.abspath(__file__))

        # read proxy configuration.
        conf_file = "{0}/ingest_conf.json".format(os.path.dirname(os.path.dirname(self._script_path)))
        conf = json.loads(open(conf_file).read())
        self._message_size = conf["kafka"]["message_size"]
        self._conf = conf["pipelines"][conf_type]

        # get collector path.
        self._collector_path = self._conf['collector_path']

        #get supported files
        self._supported_files = self._conf['supported_files']

        # create collector watcher
        self._watcher = FileWatcher(self._collector_path,self._supported_files)

        # Multiprocessing. 
        self._processes = conf["collector_processes"]
        self._ingestion_interval = conf["ingestion_interval"]
        self._pool = Pool(processes=self._processes)

    def start(self):

        self._logger.info("Starting PROXY collector")
        self._watcher.start()   
    
        try:
            while True:
                #self._ingest_files()
                self._ingest_files_pool()              
                time.sleep(self._ingestion_interval)
        except KeyboardInterrupt:
            self._logger.info("Stopping Proxy collector...")  
            Util.remove_kafka_topic(self._kafka_topic.Zookeeper,self._kafka_topic.Topic,self._logger)          
            self._watcher.stop()
            self._pool.terminate()
            self._pool.close()            
            self._pool.join()
             

    def _ingest_files_pool(self):
            
       
        if self._watcher.HasFiles:
            
            for x in range(0,self._processes):
                file = self._watcher.GetNextFile()
                resutl = self._pool.apply_async(ingest_file,args=(file,self._message_size,self._kafka_topic.Topic,self._kafka_topic.BootstrapServers))
                #resutl.get() # to debug add try and catch.
                if  not self._watcher.HasFiles: break    
        return True


def ingest_file(file,message_size,topic,kafka_servers):
    
    logger = logging.getLogger('SPOT.INGEST.PROXY.{0}'.format(os.getpid()))
    try:        
        message = ""
        logger.info("Ingesting file: {0} process:{1}".format(file,os.getpid())) 
        with open(file,"rb") as f:
            for line in f:
                message += line
                if len(message) > message_size:
                    KafkaProducer.SendMessage(message, kafka_servers, topic, 0)
                    message = ""
            #send the last package.        
            KafkaProducer.SendMessage(message, kafka_servers, topic, 0)
        rm_file = "rm {0}".format(file)
        Util.execute_cmd(rm_file,logger)
        logger.info("File {0} has been successfully sent to Kafka Topic: {1}".format(file,topic))

    except Exception as err:        
        logger.error("There was a problem, please check the following error message:{0}".format(err.message))
        logger.error("Exception: {0}".format(err))