python source code of model

import os
import cv2

from OCR.tf_tesseract.my_vgsl_model import MyVGSLImageModel, ctc_decode
from OCR.tf_tesseract.read_params import read_tesseract_params
from OCR.ocr_utils import *
from OCR.l2_attack import init

from utils import to_alpha, get_image_paths

from tensorflow import app
from tensorflow.python.platform import flags

from timeit import default_timer as timer

flags.DEFINE_string('glob_path', "", 'images to load')
flags.DEFINE_integer('target_height', 0, 'Resize image to this height')
flags.DEFINE_string('target', "adchoices", 'text target')
flags.DEFINE_integer('use_gpu', -1, 'GPU id (>=0) or cpu (-1)')

FLAGS = flags.FLAGS

if FLAGS.use_gpu >= 0:
    os.environ['CUDA_VISIBLE_DEVICES'] = "{}".format(FLAGS.use_gpu)
else:
    os.environ['CUDA_VISIBLE_DEVICES'] = ""


class OCRModel(object):

    def __init__(self, target, target_height=0, target_threshold=4):
        self.target_txt = target
        self.target_height = target_height
        self.sim_threshold = target_threshold

        use_gpu = FLAGS.use_gpu >= 0
        self.char_map = read_all_chars()
        params = read_tesseract_params(use_gpu=use_gpu)
        model = MyVGSLImageModel(use_gpu=use_gpu)

        self.img_var = tf.placeholder(dtype=tf.float32, shape=(None, None, 4))
        self.h_orig_var = tf.placeholder(dtype=tf.int64, shape=[1])
        self.w_orig_var = tf.placeholder(dtype=tf.int64, shape=[1])

        self.h_resized_var = tf.placeholder(dtype=tf.int64, shape=[1])
        self.w_resized_var = tf.placeholder(dtype=tf.int64, shape=[1])
        self.resized_dims_var = tf.cast(
            tf.concat([self.h_resized_var, self.w_resized_var], axis=0),
            tf.int32)

        img_preproc = self.img_var
        img_preproc = remove_alpha(img_preproc)
        img_preproc = preprocess_tf(img_preproc,
                                    self.h_orig_var[0],
                                    self.w_orig_var[0])

        img_large = tf.image.resize_images(img_preproc, self.resized_dims_var,
                                           method=tf.image.ResizeMethod.BILINEAR)
        img_large = tf.image.rgb_to_grayscale(img_large)

        logits, _ = model(img_large,  self.h_resized_var, self.w_resized_var)
        self.text_output = ctc_decode(logits, model.ctc_width)

        init_ops = init(params, use_gpu=use_gpu, skip=0)
        self.sess = tf.Session()
        self.sess.run(init_ops)

    def match(self, img, verbose=False, img_name=None):
        img = to_alpha(img)
        h, w, _ = img.shape
        if h < 5 or w < 5:
            if verbose:
                print("Skipping {}, too small".format(img_name))
            return False, len(self.target_txt)

        feed_dict = {
            self.img_var: img,
            self.h_orig_var: [h],
            self.w_orig_var: [w],
            self.h_resized_var: [h],
            self.w_resized_var: [w]
        }

        mul = 1
        if self.target_height and h < self.target_height:
            mul = get_size_mul(h, w, self.target_height)

        feed_dict[self.h_resized_var] = [mul * h]
        feed_dict[self.w_resized_var] = [mul * w]
        output2 = self.sess.run(self.text_output, feed_dict=feed_dict)
        s2 = decode(output2, self.char_map)[0]
        print(s2)
        dist2 = levenshtein(s2.lower(), self.target_txt.lower())
        if self.target_txt.lower() in s2.lower():
            dist2 = 0

        dist = dist2

        if dist <= self.sim_threshold:
            if verbose:
                print("{} matched with distance {}".format(img_name, dist))
            return True, dist
        else:
            if verbose:
                print("no match for {}! Distance {}".format(img_name, dist))
            return False, dist


def main(argv):
    del argv
    threshold = 4
    OCR = OCRModel(FLAGS.target, FLAGS.target_height,
                   target_threshold=threshold)

    ad_logo_paths = get_image_paths(FLAGS.glob_path)
    print("found {} files to match".format(len(ad_logo_paths)))

    scores = []

    t1 = timer()

    for ad_logo_path in ad_logo_paths:
        ad_logo = cv2.imread(ad_logo_path, -1)
        assert ad_logo is not None

        ad_logo_name = os.path.basename(ad_logo_path)

        _, score = OCR.match(ad_logo, verbose=True, img_name=ad_logo_name)
        scores.append(score)

    t2 = timer()

    print("evaluated {} images in {} seconds".format(len(ad_logo_paths), t2-t1))

    scores = np.array(scores)
    ad_logo_paths = np.array(ad_logo_paths)

    print(np.sum(scores <= threshold))
    print(ad_logo_paths[scores <= threshold])
    topk = scores.argsort()[:10]
    print(list(zip(ad_logo_paths[topk], scores[topk])))


if __name__ == '__main__':
    app.run()