import time import numpy as np import cv2 # SmashScan libraries import util import timeline LABELS_LIST = ["battlefield", "dreamland", "finaldest", "fountain", "pokemon", "yoshis"] # An object that takes a capture and a number of input parameters and performs # a number of object detection operations. Parameters include a cv2 capture, # darkflow object, save_flag for saving results, and show_flag for display. class StageDetector: def __init__(self, capture, tfnet, show_flag=False, save_flag=False): self.capture = capture self.tfnet = tfnet self.save_flag = save_flag self.show_flag = show_flag # Predetermined parameters that have been tested to work best. self.end_fnum = int(self.capture.get(cv2.CAP_PROP_FRAME_COUNT)) self.max_num_match_frames = 30 self.min_match_length_s = 30 self.num_match_frames = 5 self.step_size = 60 self.timeline_empty_thresh = 4 #### STAGE DETECTOR TESTS ################################################## # Run the standard stage detector test over the entire video. def standard_test(self): # Create a timeline of the label history where the labels are stored as # integers while no result is (-1). Also create a bounding box list. dirty_timeline, bbox_hist = list(), list() # Iterate through video and use tfnet to perform object detection. start_time = time.time() for fnum in range(0, self.end_fnum, self.step_size): self.capture.set(cv2.CAP_PROP_POS_FRAMES, fnum) _, frame = self.capture.read() # Get the tfnet result with the largest confidence and extract info. bbox, label, confidence = self.get_tfnet_result(frame) # Store label if result found, or (-1) if no result was found. if label: dirty_timeline.append(LABELS_LIST.index(label)) bbox_hist.append(bbox) else: dirty_timeline.append(-1) bbox_hist.append(-1) # Display the frame if show_flag is enabled. Exit if q pressed. if self.show_flag: if confidence: text = '{}: {:.0f}%'.format(label, confidence * 100) util.show_frame(frame, bbox_list=[bbox], text=text, save_flag=self.save_flag, save_name="output/{:07d}.png".format(fnum)) else: util.show_frame(frame, save_flag=self.save_flag, save_name="output/{:07d}.png".format(fnum)) if cv2.waitKey(1) & 0xFF == ord('q'): break # End the TfNet session and display time taken to complete. util.display_fps(start_time, len(dirty_timeline), "Initial Sweep") # Fill holes in the history timeline list, and filter out timeline # sections that are smaller than a particular size. clean_timeline = timeline.fill_filter(dirty_timeline, self.timeline_empty_thresh) clean_timeline = timeline.size_filter(clean_timeline, self.step_size, self.min_match_length_s) timeline.show_plots(dirty_timeline, clean_timeline, LABELS_LIST) # Get a list of the matches and avg bboxes according to clean_timeline. match_ranges = timeline.get_ranges(clean_timeline) match_bboxes = self.get_match_bboxes(match_ranges, bbox_hist) # Show the beginning and end of each match according to the filters. display_frames, display_bboxes = list(), list() for i, match_range in enumerate(match_ranges): display_frames += [match_range[0]*self.step_size, match_range[1]*self.step_size] display_bboxes += [match_bboxes[i], match_bboxes[i]] util.show_frames(self.capture, display_frames, display_bboxes) #### STAGE DETECTOR INTERNAL METHODS ####################################### # Return the tfnet prediction with the highest confidence. def get_tfnet_result(self, frame): results = self.tfnet.return_predict(frame) result = dict() bbox, label, confidence = None, None, None max_confidence = 0 for result_iter in results: if result_iter["confidence"] > max_confidence: result = result_iter max_confidence = result_iter["confidence"] if result: tl = (result['topleft']['x'], result['topleft']['y']) br = (result['bottomright']['x'], result['bottomright']['y']) bbox = (tl, br) label = result['label'] confidence = result['confidence'] return bbox, label, confidence # Given match ranges and bounding box history, return a list of the average # bounding box (top left and bottom right coordinate pair) of each match. def get_match_bboxes(self, match_ranges, bbox_hist): match_bboxes = list() for mr in match_ranges: avg_bbox = util.get_avg_bbox(bbox_hist[mr[0]: mr[1]]) match_bboxes.append(avg_bbox) return match_bboxes # Given a list of predicted labels, determine the label that accurately # represents the match. If there is too much variance or no label was found # assume that a highlight reel or poor quality stream was given. def get_match_label(self, label_list, match_range): # If no stages were found, return and declare a failure. if not label_list: print("\tUnidentifiable Match Range: {}".format(match_range)) return None # If there is too much variance (multiple labels found), return. if len(set(label_list)) > 1: print("\tRemoved Match Range: {}".format(match_range)) return "multiple_stages_found" # Find the label that occured the most during the tested frames. match_label = max(set(label_list), key=label_list.count) return match_label #### STAGE DETECTOR EXTERNAL METHODS ####################################### # Given a list of match ranges, randomly select frames from each range and # return the average bounding box and expected label for each match. Also # return an updated list of the match ranges, with matches removed where # multiple stages were found, aka highlight reels. def get_match_info(self, match_ranges): # For each match range, generated random frame numbers to search. new_match_ranges, match_bboxes, match_labels = list(), list(), list() for match_range in match_ranges: random_fnum_list = np.random.randint(low=match_range[0], high=match_range[1], size=self.num_match_frames) bbox_list, label_list = list(), list() # Find the labels for the random frame numbers selected. for random_fnum in random_fnum_list: self.capture.set(cv2.CAP_PROP_POS_FRAMES, random_fnum) _, frame = self.capture.read() bbox, label, _ = self.get_tfnet_result(frame) if label: bbox_list.append(bbox) label_list.append(label) # Attempt to find a stage if none was found in the initial search. if not label_list: for _ in range(self.max_num_match_frames): fnum = np.random.randint(match_range[0], match_range[1]) self.capture.set(cv2.CAP_PROP_POS_FRAMES, fnum) _, frame = self.capture.read() bbox, label, _ = self.get_tfnet_result(frame) if label: bbox_list, label_list = [bbox], [label] break # Find the label that occured the most during the tested frames. If # zero or multiple stages were found, declare failure. match_label = self.get_match_label(label_list, match_range) if match_label is None: return None, None if match_label == "multiple_stages_found": continue new_match_ranges.append(match_range) match_bboxes.append(util.get_avg_bbox(bbox_list)) match_labels.append(match_label) return new_match_ranges, match_bboxes, match_labels