Python pyocr.get_available_tools() Examples

The following are 6 code examples of pyocr.get_available_tools(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pyocr , or try the search function

Example #1

Source File: LocalOCR.py From PDFtoTXT with MIT License

6 votes

def __init__(self, ocr_language):
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("OCR tool: %s" % self.tool)

        try:
            langs = self.tool.get_available_languages()
            self.lang = langs[0]
            if ocr_language in langs:
                self.lang = ocr_language
            print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
        except Exception as e:
            print("{}".format(e))

Example #2

Source File: saram.py From saram with MIT License

6 votes

def __init__(self, path):
        
        ocr_language = 'eng'
        
        path = path

        #if call(['which', 'tesseract']): #Run the command described by args
        #    print("tesseract-ocr missing") #No tesseract installed
        
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            print("No OCR tool found")
            sys.exit(1)
        self.tool = tools[0]
        print("OCR tool: %s" % self.tool)

        try:
            langs = self.tool.get_available_languages()
            self.lang = langs[0]
            if ocr_language in langs:
                self.lang = ocr_language
            print("OCR selected language: %s (available: %s)" % (self.lang.upper(), ", ".join(langs)))
        except Exception as e:
            print("{}".format(e))

Example #3

Source File: ocr.py From ocr-process-service with MIT License

5 votes

def check_required_software():
        logger = logging.getLogger(__name__)
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
                                             'this system. Make sure it\'s on')
        elif len(tools) == 1:
            logger.info("I've found only one ocr tool [%s]. This is not exactly "
                        "an error but you should get better results if you have "
                        "both Tesseract and Cuneiform installed"
                        % tools[0].get_name())
        else:
            logger.info("I've found all required software. We're good to go =)")

Example #4

Source File: tesseract_ocr.py From PAN_OCR with MIT License

5 votes

def initialize(self):
		''' Initialize Tesseract and load it up for speed '''
		tools = pyocr.get_available_tools()
		if len(tools) == 0:
			print("No tools found, do you have Tesseract installed?")
			sys.exit(1)
		self.tool = tools[0]
		self.langs = self.tool.get_available_languages()

Example #5

Source File: config.py From paperwork-backend with GNU General Public License v3.0

5 votes

def get_default_ocr_lang():
    # Try to guess based on the system locale what would be
    # the best OCR language

    ocr_tools = pyocr.get_available_tools()
    if len(ocr_tools) == 0:
        return DEFAULT_OCR_LANG
    ocr_langs = ocr_tools[0].get_available_languages()

    lang = find_language()
    if hasattr(lang, 'iso639_3_code') and lang.iso639_3_code in ocr_langs:
        return lang.iso639_3_code
    if hasattr(lang, 'terminology') and lang.terminology in ocr_langs:
        return lang.terminology
    return DEFAULT_OCR_LANG

Example #6

Source File: ocr.py From ocr-process-service with MIT License

4 votes

def image_to_string(self, filename):
        tools = pyocr.get_available_tools()
        if len(tools) == 0:
            raise PyOCRIntegrationNoOCRFound('No OCR tool has been found on '
                                             'this system. Make sure it\'s on'
                                             'PATH variable of your system')

        filename_split, fileextension_split = os.path.splitext(filename)

        grayscale_filename = filename_split + '_gray' + fileextension_split
        with WandImage(filename=filename) as img:
            img.type = 'grayscale'
            img.save(filename=grayscale_filename)

        adaptive_thresh_filename = filename_split + '_adt' + fileextension_split
        ScikitImageIntegration.adaptive_threshold(grayscale_filename, adaptive_thresh_filename)

        processes = []
        for tool in tools:
            if tool.get_name() == "Tesseract":

                thread_t = self._OCRProcessingThread(tool, self.lang, filename)
                thread_t.start()
                processes.append(thread_t)

            else:
                thread_c_raw = self._OCRProcessingThread(tool, self.lang,
                                                         filename)
                thread_c_raw.start()
                processes.append(thread_c_raw)

                thread_c_gs = self._OCRProcessingThread(tool, self.lang,
                                                        grayscale_filename)
                thread_c_gs.start()
                processes.append(thread_c_gs)

                thread_c_prd = self._OCRProcessingThread(tool, self.lang,
                                                         adaptive_thresh_filename)
                thread_c_prd.start()
                processes.append(thread_c_prd)

        # Wait this all threads finish processing
        result = []
        threads_running = True
        while threads_running:
            found_thread_alive = False
            for p in processes:
                if p.is_alive():
                    found_thread_alive = True

            if not found_thread_alive:
                threads_running = False
                for p in processes:
                    result.append(p.return_value)

        # Removing generated files
        self._cleanup(grayscale_filename)
        self._cleanup(adaptive_thresh_filename)

        return result