python source code of __init_

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
    Lector utils

    Copyright (C) 2011-2013 Davide Setti, Zdenko Podobný
    Website: http://code.google.com/p/lector

    This program is released under the GNU GPLv2

"""

import sys
import os

from glob import glob
from subprocess import Popen, PIPE
from PyQt5.QtGui import QImage

# workaroung to run textwidget outside of Lector
CMD_FOLDER = os.path.dirname(os.path.abspath(__file__))
CMD_FOLDER += "/../"
if CMD_FOLDER not in sys.path:
    sys.path.insert(0, CMD_FOLDER)

from utils import settings


def pilImage2Qt(im):
    if im.mode != 'RGB':
        im = im.convert('RGB')
    s = im.tostring("jpeg", "RGB")

    qtimage = QImage()
    qtimage.loadFromData(s)

#    from PIL import ImageQt
#    qtimage = ImageQt.ImageQt(im)
    return qtimage


def extract_tesseract_languages_path(error_message):
    """
    >>> extract_tesseract_languages_path("Unable to load unicharset file /usr/share/tesseract-ocr/tessdata/invalid.unicharset")
    ('/usr/share/tesseract-ocr/tessdata', '.unicharset')
    """
    # problem if there is space in path
    print("error_message", error_message)
    if len(error_message) < 1:
        return "", ""
    invalid_path = error_message.split()[-1]
    path, invalid_fn = os.path.split(invalid_path)
    _, extension = os.path.splitext(invalid_fn)
    return path, extension

def get_tesseract_languages():
    """
    get list of lang
    """
    tess_exec = settings.get('tesseract-ocr:executable')
    if not tess_exec:
        tess_exec = 'tesseract'

    try:
        poTess = Popen([tess_exec, '--list-langs'],
                       shell=False, stdout=PIPE, stderr=PIPE)
        stdout_message, lTess = poTess.communicate()
        # we need to remove not needed information e.g. OpenCL performamnce
        if isinstance(lTess, bytes):
            lTess = str(lTess, 'utf-8')
        out_split = lTess.split('\n')
        langlist = list()
        add_lang = False
        for row in out_split:
            if row.startswith('List of'):
                add_lang = True
            if add_lang:
                langlist.append(row.strip())
        if langlist:
            return langlist
        else:
            return get_tesseract_languages_old()
    except OSError as ex:
        print("ex", ex)
        return None

def get_tesseract_languages_old():
    """
    make a list of installed language data files
    """

    tess_exec = settings.get('tesseract-ocr:executable')
    if not tess_exec:
        tess_exec = 'tesseract'

    try:
        poTess = Popen([tess_exec, 'a', 'a', '-l', 'invalid'],
                       shell=False, stdout=PIPE, stderr=PIPE)
        stdout_message, lTess = poTess.communicate()
        tess_data_prefix, langdata_ext = \
                                        extract_tesseract_languages_path(lTess)
    except OSError as ex:
        print("ex", ex)
        return None
    # env. setting can help to handle path with spaces
    tess_data_prefix = settings.get('tesseract-ocr:TESSDATA_PREFIX:')
    if not tess_data_prefix:
        tess_data_prefix = os.getenv('TESSDATA_PREFIX')
    tessdata_path = os.path.join(tess_data_prefix, "tessdata")

    if not os.path.exists(tessdata_path):
        print("Tesseract data path ('%s') do not exist!" % tessdata_path)
        return None
    langdata = glob(tess_data_prefix + os.path.sep + '*' + langdata_ext)
    return [os.path.splitext(os.path.split(uc)[1])[0] for uc in langdata]

def get_spellchecker_languages(directory = None):
    """
    Check if spellchecker is installed and provide list of languages
    """
    try:
        import enchant

        if (directory):
            enchant.set_param("enchant.myspell.dictionary.path", directory)
        langs = enchant.list_languages()
        return sorted(langs)

    except:
        print("can not start spellchecker!!!")
        import traceback
        traceback.print_exc()
        return None


# for testing module funcionality
def main():
    """ Main loop to run test
    """
    # langs = get_spellchecker_languages()
    langs = get_tesseract_languages()
    if langs:
        print('Found languages:', langs)
    else:
        print('No lang found!')
    return

if __name__ == '__main__':
    main()