python source code of clip

# ------------------------------------------------------------------------------
# Copyright (c) ETRI. All rights reserved.
# Licensed under the BSD 3-Clause License.
# This file is part of Youtube-Gesture-Dataset, a sub-project of AIR(AI for Robots) project.
# You can refer to details of AIR project at https://aiforrobots.github.io
# Written by Youngwoo Yoon (youngwoo@etri.re.kr)
# ------------------------------------------------------------------------------

import numpy as np
import cv2
import math

from data_utils import get_skeleton_from_frame
from config import my_config


class ClipFilter:
    def __init__(self, video, start_frame_no, end_frame_no, raw_skeleton, main_speaker_skeletons):
        self.skeleton_data = raw_skeleton
        self.main_speaker_skeletons = main_speaker_skeletons
        self.start_frame_no = start_frame_no
        self.end_frame_no = end_frame_no
        self.scene_length = end_frame_no - start_frame_no
        self.video = video
        self.filter_option = my_config.FILTER_OPTION

        # filtering criteria variable
        self.filtering_results = [0, 0, 0, 0, 0, 0, 0]  # too short, many_people, looking_back, joint_missing, looking_sideways, small, picture
        self.message = ''
        self.debugging_info = ['None', 'None', 'None', 'None', 'None']  # looking back, joint missing, looking sideways, small, picture

    def is_skeleton_back(self, ratio):
        n_incorrect_frame = 0

        for ia, skeleton in enumerate(self.main_speaker_skeletons):  # frames
            body = get_skeleton_from_frame(skeleton)
            if body:
                if body[2 * 3] > body[5 * 3]:
                    n_incorrect_frame += 1
            else:
                n_incorrect_frame += 1

        self.debugging_info[0] = round(n_incorrect_frame / self.scene_length, 3)

        return n_incorrect_frame / self.scene_length > ratio

    def is_skeleton_sideways(self, ratio):
        n_incorrect_frame = 0

        for ia, skeleton in enumerate(self.main_speaker_skeletons):  # frames
            body = get_skeleton_from_frame(skeleton)
            if body:
                if (body[0] < min(body[2 * 3], body[5 * 3]) or body[0] > max(body[2 * 3], body[5 * 3])):
                    n_incorrect_frame += 1
            else:
                n_incorrect_frame += 1

        self.debugging_info[2] = round(n_incorrect_frame / self.scene_length, 3)

        return n_incorrect_frame / self.scene_length > ratio

    def is_skeleton_missing(self, ratio):
        n_incorrect_frame = 0

        if self.main_speaker_skeletons == []:
            n_incorrect_frame = self.scene_length
        else:
            for ia, skeleton in enumerate(self.main_speaker_skeletons):  # frames

                body = get_skeleton_from_frame(skeleton)
                if body:
                    point_idx = [0, 1, 2, 3, 4, 5, 6, 7]  # head and arms
                    if any(body[idx * 3] == 0 for idx in point_idx):
                        n_incorrect_frame += 1

                else:
                    n_incorrect_frame += 1

        self.debugging_info[1] = round(n_incorrect_frame / self.scene_length, 3)
        return n_incorrect_frame / self.scene_length > ratio

    def is_skeleton_small(self, ratio):
        n_incorrect_frame = 0

        def distance(x1, y1, x2, y2):
            return np.sqrt((x1 - x2) ** 2 + (y1 - y2) ** 2)

        for ia, skeleton in enumerate(self.main_speaker_skeletons):  # frames
            body = get_skeleton_from_frame(skeleton)
            if body:
                threshold = self.filter_option['threshold']  # for TED videos in 720p
                if distance(body[2 * 3], body[2 * 3 + 1], body[5 * 3], body[5 * 3 + 1]) < threshold:  # shoulder length
                    n_incorrect_frame += 1
            else:
                n_incorrect_frame += 1

        self.debugging_info[3] = round(n_incorrect_frame / self.scene_length, 3)
        return n_incorrect_frame / self.scene_length > ratio

    def is_too_short(self):
        MIN_SCENE_LENGTH = 25 * 3  # assumed fps = 25
        return self.scene_length < MIN_SCENE_LENGTH

    def is_picture(self):
        sampling_interval = int(math.floor(self.scene_length / 5))
        sampling_frames = list(range(self.start_frame_no + sampling_interval,
                                     self.end_frame_no - sampling_interval + 1, sampling_interval))
        frames = []
        for frame_no in sampling_frames:
            self.video.set(cv2.CAP_PROP_POS_FRAMES, frame_no)
            ret, frame = self.video.read()
            frames.append(frame)

        diff = 0
        n_diff = 0
        for frame, next_frame in zip(frames, frames[1:]):
            diff += cv2.norm(frame, next_frame, cv2.NORM_L1)  # abs diff
            n_diff += 1
        diff /= n_diff
        self.debugging_info[4] = round(diff, 0)

        return diff < 3000000

    def is_many_people(self):
        n_people = []
        for skeleton in self.skeleton_data:
            n_people.append(len(skeleton))

        return len(n_people) > 0 and np.mean(n_people) > 5

    def is_correct_clip(self):
        # check if the clip is too short.
        if self.is_too_short():
            self.message = "too Short"
            return False
        self.filtering_results[0] = 1

        # check if there are too many people on the clip
        if self.is_many_people():
            self.message = "too many people"
            return False
        self.filtering_results[1] = 1

        # check if the ratio of back-facing skeletons in the clip exceeds the reference ratio
        if self.is_skeleton_back(0.3):
            self.message = "looking behind"
            return False
        self.filtering_results[2] = 1

        # check if the ratio of skeletons that missing joint in the clip exceeds the reference ratio
        if self.is_skeleton_missing(0.5):
            self.message = "too many missing joints"
            return False
        self.filtering_results[3] = 1

        # check if the ratio of sideways skeletons in the clip exceeds the reference ratio
        if self.is_skeleton_sideways(0.5):
            self.message = "looking sideways"
            return False
        self.filtering_results[4] = 1

        # check if the ratio of the too small skeleton in the clip exceeds the reference ratio
        if self.is_skeleton_small(0.5):
            self.message = "too small."
            return False
        self.filtering_results[5] = 1

        # check if the clip is picture
        if self.is_picture():
            self.message = "still picture"
            return False
        self.filtering_results[6] = 1

        self.message = "PASS"
        return True

    def get_filter_variable(self):
        return self.filtering_results, self.message, self.debugging_info