python source code of extract

# -*- coding: utf-8 -*-

import cv2
import operator
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
from scipy.signal import argrelextrema
from PIL import Image
import pytesseract
import PIL.ImageOps

#print(sys.executable)
#Setting fixed threshold criteria
USE_THRESH = False
#fixed threshold value
THRESH = 0.6
#Setting fixed threshold criteria
USE_TOP_ORDER = False
#Setting local maxima criteria
USE_LOCAL_MAXIMA = True
#Number of top sorted frames
NUM_TOP_FRAMES = 20


'''
Using crop mode(crop out subtitles area) can greatly improve recognition accuracy, 
but you need to manually adjust the crop area by modifying the value of cropper 
parameters(x, y, w, h).
To debug the appropriate value, set ADJUST_MODE = True to show cropped result.
'''

#Use croped frame
USE_CROP = False
#Adjust crop area mode
ADJUST_MODE = True
#Set cropper parameters
#left_padding
x=150
#top_padding
y=430
#window_width
w=600
#window_height
h=40

#Video path of the source file
videopath = sys.argv[1]
#Directory to store the processed frames
dir = "frames/"
#smoothing window size
len_window = int(sys.argv[2])

#language
LANG='chi_sim'






def smooth(x, window_len=13, window='hanning'):
    print(len(x), window_len)
    if x.ndim != 1:
        raise ValueError("smooth only accepts 1 dimension arrays.") 

    if x.size < window_len:
        raise ValueError("Input vector needs to be bigger than window size.")

    if window_len < 3:
        return x

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError("Window is on of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")

    s = np.r_[2 * x[0] - x[window_len:1:-1],
              x, 2 * x[-1] - x[-1:-window_len:-1]]
    #print(len(s))

    if window == 'flat':  # moving average
        w = np.ones(window_len, 'd')
    else:
        w = getattr(np, window)(window_len)
    y = np.convolve(w / w.sum(), s, mode='same')
    return y[window_len - 1:-window_len + 1]

#Class to hold information about each frame


class Frame:
    def __init__(self, id, frame, value):
        self.id = id
        self.frame = frame
        self.value = value

    def __lt__(self, other):
        if self.id == other.id:
            return self.id < other.id
        return self.id < other.id

    def __gt__(self, other):
        return other.__lt__(self)

    def __eq__(self, other):
        return self.id == other.id and self.id == other.id

    def __ne__(self, other):
        return not self.__eq__(other)


def rel_change(a, b):
   x = (b - a) / max(a, b)
   print(x)
   return x


def ocr_im(name):
    global ADJUST_MODE
    im=Image.open(dir + name)
    inverted_im=PIL.ImageOps.invert(im)
    #inverted_im.show()
    croped_im=inverted_im.crop((x,y,x+w,y+h))
    if ADJUST_MODE and USE_CROP:
        croped_im.show()
        ADJUST_MODE = False
    text=pytesseract.image_to_string(croped_im if USE_CROP else inverted_im, LANG)
    return text
    

#Print infos
print("[Video Path] " + videopath)
print("[Frame Directory] " + dir)
print("[Subtitles Language] " + LANG)
print("Extracting key frames, waiting...")


cap = cv2.VideoCapture(str(videopath))


curr_frame = None
prev_frame = None

frame_diffs = []
frames = []
ret, frame = cap.read()
i = 1

while(ret):
    luv = cv2.cvtColor(frame, cv2.COLOR_BGR2LUV)
    curr_frame = luv
    if curr_frame is not None and prev_frame is not None:
        #logic here
        diff = cv2.absdiff(curr_frame, prev_frame)
        count = np.sum(diff)
        frame_diffs.append(count)
        frame = Frame(i, frame, count)
        frames.append(frame)
    prev_frame = curr_frame
    i = i + 1
    ret, frame = cap.read()
"""
    cv2.imshow('frame',luv)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
"""
cap.release()
#cv2.destroyAllWindows()
last_subtitle = ""

if USE_TOP_ORDER:
    # sort the list in descending order
    frames.sort(key=operator.attrgetter("value"), reverse=True)
    for keyframe in frames[:NUM_TOP_FRAMES]:
        name = "frame_" + str(keyframe.id) + ".jpg"
        cv2.imwrite(dir + "/" + name, keyframe.frame)

if USE_THRESH:
    for i in range(1, len(frames)):
        if (rel_change(np.float(frames[i - 1].value), np.float(frames[i].value)) >= THRESH):
            #print("prev_frame:"+str(frames[i-1].value)+"  curr_frame:"+str(frames[i].value))
            name = "frame_" + str(frames[i].id) + ".jpg"
            cv2.imwrite(dir + "/" + name, frames[i].frame)


if USE_LOCAL_MAXIMA:
    diff_array = np.array(frame_diffs)
    sm_diff_array = smooth(diff_array, len_window)
    frame_indexes = np.asarray(argrelextrema(sm_diff_array, np.greater))[0]
    for i in frame_indexes:
        name = "frame_" + str(frames[i - 1].id) + ".jpg"
        #print(dir+name)
        cv2.imwrite(dir + name, frames[i - 1].frame)


        text = ocr_im(name)
        #Check for repeated subtitles 
        if text != last_subtitle:
            last_subtitle=text
            print(text)
        #Delete ocred frame images
        os.remove(dir + name)



#Draw plot
# plt.figure(figsize=(40, 20))
# plt.locator_params(numticks=100)
# plt.stem(sm_diff_array)
# plt.savefig(dir + 'plot.png')