import os
import subprocess
import sys
import time

import PIL.Image as Im
import pyocr
import pyocr.builders
from PIL import Image as Im
from wand.image import Image

VALIDITY = [".jpg",".gif",".png",".tga",".tif",".bmp", ".pdf"]

FNULL = open(os.devnull, 'w') #Open file in write mode to The file path of the null device. For example: '/dev/null' 

path = ""

class ArgumentMissingException(Exception):
    def __init__(self):
        print("usage: {} <dirname>".format(sys.argv[0]))
        sys.exit(1)

class saram(object):
    
    def __init__(self, path):
        
        ocr_language = 'eng'
        
        path = path

        #if call(['which', 'tesseract']): #Run the command described by args
        #    print("tesseract-ocr missing") #No tesseract installed
        
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("OCR tool: %s" % self.tool)

        try:
            langs = self.tool.get_available_languages()
            self.lang = langs[0]
            if ocr_language in langs:
                self.lang = ocr_language
            print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
        except Exception as e:
            print("{}".format(e))
    
    def create_directory(self, path):
        if not os.path.exists(path): #No path
	        os.makedirs(path) #Create path
    
    def pdf_run(self, image_file_name, filename, path):
        
        image_pdf = Image(filename=image_file_name, resolution=300) #take filename
        image_page = image_pdf.convert("png") #png conversion

        page = 1 #init page
        process_start = time.time()

        for img in image_page.sequence: # Every single image in image_page for grayscale conversion in 300 resolution
            
            img_per_page = Image(image=img)
            img_per_page.type = 'grayscale'
            img_per_page.depth = 8
            img_per_page.density = 300

            try:
                img_per_page.level(black=0.3, white=1.0, gamma=1.5, channel=None)
            
            except AttributeError as e:
                print("Update Wand library: %s" % e)

            img_buf = path + '/' + "saram_" + filename + str(page) + ".png"

            os.chmod(path, 0o777)
            img_per_page.save(filename=img_buf)

            page_start = time.time()
            page_elaboration = time.time() - page_start
            print("page %s - size %s - process %2d sec." % (page, img_per_page.size, page_elaboration))
                
            page += 1
            img.destroy()

        process_end = time.time() - process_start
        print("Total elaboration time: %s" % process_end)
    
    def get_rotation_info(self, filename):
        arguments = ' %s - -psm 0'
        stdoutdata = subprocess.getoutput('tesseract' + arguments % filename)
        degrees = None

        for line in stdoutdata.splitlines():
            print(line)
            info = 'Orientation in degrees: '
            if info in line:
                degrees = -float(line.replace(info, '').strip())
        return degrees

    def fix_dpi_and_rotation(self, filename, degrees, ext):
        im1 = Im.open(filename)
        print('Fixing rotation %.2f in %s...' % (degrees, filename))
        im1.rotate(degrees).save(filename)
        
    def main(self, path):
        if bool(os.path.exists(path)):

            directory_path = path + '/OCR-text/' #Create text_conversion folder
            count = 0
            other_files = 0

            for f in os.listdir(path):
                ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc

                if ext.lower() == ".pdf": #For PDF
                    image_file_name = path + '/' + f #Full /dir/path/filename.extension
                    filename = os.path.splitext(f)[0] #Filename without extension
                    filename = ''.join(e for e in filename if e.isalnum() or e == '-') #Join string of filename if it contains alphanumeric characters or -
                    self.pdf_run(image_file_name, filename, path)

            for f in os.listdir(path):
                ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc

                if ext.lower() == ".pdf": #For PDF
                    continue

                if ext.lower() in VALIDITY:
                    image_file_name = path + '/' + f #Full /dir/path/filename.extension
                    
                    degrees = self.get_rotation_info(image_file_name)
                    print(degrees)
                    if degrees:
                        self.fix_dpi_and_rotation(image_file_name, degrees, ext)

            for f in os.listdir(path): #Return list of files in path directory

                ext = os.path.splitext(f)[1] #Split the pathname path into a pair i.e take .png/ .jpg etc
                
                image_file_name = path + '/' + f #Full /dir/path/filename.extension
                filename = os.path.splitext(f)[0] #Filename without extension
                filename = ''.join(e for e in filename if e.isalnum() or e == '-') #Join string of filename if it contains alphanumeric characters or -
                text_file_path = directory_path + filename #Join dir_path with file_name

                if ext.lower() not in VALIDITY: #Convert to lowercase and check in validity list          
                    other_files += 1 #Increment if other than validity extension found
                    continue

                if count == 0: #No directory created
                    self.create_directory(directory_path) #function to create directory
                count += 1

                if ext.lower() == ".pdf": #For PDF
                    continue

                else:                    
                    degrees = self.get_rotation_info(image_file_name)

                    if degrees:
                        self.fix_dpi_and_rotation(image_file_name, degrees, ext)
                                        
                    subprocess.call(["tesseract", image_file_name, text_file_path], stdout=FNULL) #Fetch tesseract with FNULL in write mode

                print(str(count) + (" file" if count == 1 else " files") + " processed")
            
            for f in os.listdir(path):
                 if f.startswith("saram_"):
                    os.remove(os.path.join(path, f))

            if count + other_files == 0:
                print("No files found") #No files found
            else :
                print(str(count) + " / " + str(count + other_files) + " files converted")
        else :
            print("No directory : " + format(path))

def start():
    if len(sys.argv) != 2: # Count number of arguments which contains the command-line arguments passed to the script if it is not equal to 2 ie for (py main.py 1_arg 2_arg)
        raise ArgumentMissingException
    path = sys.argv[1] #python main.py "path_to/img_dir" ie the argv[1] value
    path = os.path.abspath(path) #Accesing filesystem for Return a normalized absolutized version of the pathname path
    s = saram(path)
    s.main(path) # Def main to path