python source code of page

# coding=utf-8

import os
import traceback
import sys

import cv2
import numpy as np
from PIL import Image
from scipy.ndimage.filters import rank_filter
import pytesseract


class Page(object):
    def __init__(self, im, page_num, lang=None):
        self.healthy = True
        self.err = False
        self.page_num = page_num
        self.orig_im = im
        self.orig_shape = self.orig_im.shape
        self.lang = lang

    def crop(self):
        try:
            self.image, self.num_tries = process_image(self.orig_im)
            self.crop_shape = self.image.shape
            return self.image
        except Exception as e:
            for frame in traceback.extract_tb(sys.exc_info()[2]):
                fname, lineno, fn, text = frame
                print("Error in %s on line %d" % (fname, lineno))
                print(e)
            self.err = e
            self.healthy = False

    def deskew(self):
        try:
            self.image, self.theta_est = process_skewed_crop(self.image)
            return self.image
        except Exception as e:
            self.err = e
            self.healthy = False

    def extract_text(self):
        temp_path = 'text_temp.png'
        cv2.imwrite(temp_path, self.image)
        self.text = pytesseract.image_to_string(Image.open(temp_path), lang=self.lang)
        os.remove(temp_path)
        return self.text

    def save(self, out_path):
        if not self.healthy:
            print("There was an error when cropping")
            raise Exception(self.err)
        else:
            self.imwrite(out_path, self.image)


def auto_canny(image, sigma=0.33):
    v = np.median(image)
    lower = int(max(0, (1.0 - sigma) * v))
    upper = int(min(255, (1.0 + sigma) * v))
    edged = cv2.Canny(image, lower, upper, True)
    return edged


def dilate(image, kernel, iterations):
    dilated_image = cv2.dilate(image, kernel, iterations=iterations)
    return dilated_image


def downscale_image(im, max_dim=2048):
    """Shrink im until its longest dimension is <= max_dim.
    Returns new_image, scale (where scale <= 1)."""
    a, b = im.shape[:2]
    if max(a, b) <= max_dim:
        return 1.0, im

    scale = 1.0 * max_dim / max(a, b)
    new_im = cv2.resize(im, (int(b * scale), int(a * scale)), cv2.INTER_AREA)
    return scale, new_im


def find_components(im, max_components=16):
    """Dilate the image until there are just a few connected components.
    Returns contours for these components."""
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (10, 10))
    dilation = dilate(im, kernel, 6)

    count = 21
    n = 0
    sigma = 0.000

    while count > max_components:
        n += 1
        sigma += 0.005
        result = cv2.findContours(dilation, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        if len(result) == 3:
            _, contours, hierarchy = result
        elif len(result) == 2:
            contours, hierarchy = result
        possible = find_likely_rectangles(contours, sigma)
        count = len(possible)

    return (dilation, possible, n)


def find_likely_rectangles(contours, sigma):
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:10]
    possible = []
    for c in contours:

        # approximate the contour
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, sigma * peri, True)
        box = make_box(approx)
        possible.append(box)

    return possible


def make_box(poly):
    x = []
    y = []
    for p in poly:
        for point in p:
            x.append(point[0])
            y.append(point[1])
    xmax = max(x)
    ymax = max(y)
    xmin = min(x)
    ymin = min(y)
    return (xmin, ymin, xmax, ymax)


def rect_union(crop1, crop2):
    """Union two (x1, y1, x2, y2) rects."""
    x11, y11, x21, y21 = crop1
    x12, y12, x22, y22 = crop2
    return min(x11, x12), min(y11, y12), max(x21, x22), max(y21, y22)


def rect_area(crop):
    x1, y1, x2, y2 = crop
    return max(0, x2 - x1) * max(0, y2 - y1)


def crop_image(im, rect, scale):
    xmin, ymin, xmax, ymax = rect
    crop = [xmin, ymin, xmax, ymax]
    xmin, ymin, xmax, ymax = [int(x / scale) for x in crop]
    cropped = im[ymin:ymax, xmin:xmax]
    return cropped


def reduce_noise_raw(im):
    bilat = cv2.bilateralFilter(im, 9, 75, 75)
    blur = cv2.medianBlur(bilat, 5)
    return blur


def reduce_noise_edges(im):
    structuring_element = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))
    opening = cv2.morphologyEx(im, cv2.MORPH_OPEN, structuring_element)
    maxed_rows = rank_filter(opening, -4, size=(1, 20))
    maxed_cols = rank_filter(opening, -4, size=(20, 1))
    debordered = np.minimum(np.minimum(opening, maxed_rows), maxed_cols)
    return debordered


def rects_are_vertical(rect1, rect2):
    xmin1, ymin1, xmax1, ymax1 = rect1
    xmin2, ymin2, xmax2, ymax2 = rect2

    midpoint1 = (xmin1 + xmax1) / 2
    midpoint2 = (xmin2 + xmax2) / 2
    dist = abs(midpoint1 - midpoint2)

    rectarea1 = rect_area(rect1)
    rectarea2 = rect_area(rect2)
    if rectarea1 > rectarea2:
        thres = (xmax1 - xmin1) * 0.1
    else:
        thres = (xmax2 - xmin2) * 0.1
    if thres > dist:
        align = True
    else:
        align = False
    return align


def find_final_crop(im, rects):
    current = None
    for rect in rects:
        if current is None:
            current = rect
            continue

        aligned = rects_are_vertical(current, rect)

        if not aligned:
            continue

        current = rect_union(current, rect)
    return current


def process_image(orig_im):

    # Load and scale down image.
    scale, im = downscale_image(orig_im)

    # Reduce noise.
    blur = reduce_noise_raw(im.copy())

    # Edged.
    edges = auto_canny(blur.copy())

    # Reduce noise and remove thin borders.
    debordered = reduce_noise_edges(edges.copy())

    # Dilate until there are a few components.
    dilation, rects, num_tries = find_components(debordered, 16)

    # Find the final crop.
    final_rect = find_final_crop(dilation, rects)

    # Crop the image and smooth.
    cropped = crop_image(orig_im, final_rect, scale)
    kernel = np.ones((5, 5), np.float32) / 25
    smooth2d = cv2.filter2D(cropped, -1, kernel=kernel)

    return (smooth2d, num_tries)


def rad_to_deg(theta):
    return theta * 180 / np.pi


def rotate(image, theta):
    (h, w) = image.shape[:2]
    center = (w / 2, h / 2)
    M = cv2.getRotationMatrix2D(center, theta, 1)
    rotated = cv2.warpAffine(image, M, (int(w), int(h)), cv2.INTER_LINEAR,
                             borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))
    return rotated


def estimate_skew(image):
    edges = auto_canny(image)
    lines = cv2.HoughLines(edges, 1, np.pi / 90, 200)
    new = edges.copy()

    thetas = []

    for line in lines:
        for rho, theta in line:
            a = np.cos(theta)
            b = np.sin(theta)
            x0 = a * rho
            y0 = b * rho
            x1 = int(x0 + 1000 * (-b))
            y1 = int(y0 + 1000 * (a))
            x2 = int(x0 - 1000 * (-b))
            y2 = int(y0 - 1000 * (a))
            if theta > np.pi / 3 and theta < np.pi * 2 / 3:
                thetas.append(theta)
                new = cv2.line(new, (x1, y1), (x2, y2), (255, 255, 255), 1)

    theta_mean = np.mean(thetas)
    theta = rad_to_deg(theta_mean) if len(thetas) > 0 else 0

    return theta


def compute_skew(theta):
    # We assume a perfectly aligned page has lines at theta = 90 deg
    diff = 90 - theta

    # We want to reverse the difference.
    return -diff


def process_skewed_crop(image):
    theta = compute_skew(estimate_skew(image))
    ret, thresh = cv2.threshold(image.copy(), 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    rotated = rotate(thresh, theta)
    return (rotated, theta)