python source code of imgproc

# Copyright 2018 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Utility functions for reading and processing data."""

import cv2
import numpy as np
from scipy import interpolate
from scipy.misc import imresize


def imread_rgb(dset, path):
  if dset == 'ucf-101':
    rgb = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    return rgb[:, :-1]  # oflow is 1px smaller than rgb in ucf-101
  elif dset == 'ntu-rgbd' or dset == 'pku-mmd' or dset == 'cad-60':
    rgb = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
    return rgb
  else:
    assert False


def imread_oflow(dset, *paths):
  if dset == 'ucf-101':
    path_u, path_v = paths
    oflow_u = cv2.imread(path_u, cv2.IMREAD_GRAYSCALE)
    oflow_v = cv2.imread(path_v, cv2.IMREAD_GRAYSCALE)
    oflow = np.stack((oflow_u, oflow_v), axis=2)
    return oflow
  elif dset == 'ntu-rgbd' or dset == 'pku-mmd' or dset == 'cad-60':
    path = paths[0]
    oflow = cv2.imread(path)[..., ::-1][..., :2]
    return oflow
  else:
    assert False


def imread_depth(dset, path):
  # dset == 'ntu-rgbd' or dset == 'pku-mmd'
  depth = cv2.imread(path, cv2.IMREAD_UNCHANGED)[:, :, np.newaxis]
  depth = np.clip(depth/256, 0, 255).astype(np.uint8)
  return depth


def inpaint(img, threshold=1):
  h, w = img.shape[:2]

  if len(img.shape) == 3:  # RGB
    mask = np.all(img == 0, axis=2).astype(np.uint8)
    img = cv2.inpaint(img, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

  else:  # depth
    mask = np.where(img > threshold)
    xx, yy = np.meshgrid(np.arange(w), np.arange(h))
    xym = np.vstack((np.ravel(xx[mask]), np.ravel(yy[mask]))).T
    img = np.ravel(img[mask])
    interp = interpolate.NearestNDInterpolator(xym, img)
    img = interp(np.ravel(xx), np.ravel(yy)).reshape(xx.shape)

  return img


def resize(video, size, interpolation):
  """
  :param video: ... x h x w x num_channels
  :param size: (h, w)
  :param interpolation: 'bilinear', 'nearest'
  :return:
  """
  shape = video.shape[:-3]
  num_channels = video.shape[-1]
  video = video.reshape((-1, *video.shape[-3:]))
  resized_video = np.zeros((video.shape[0], *size, video.shape[-1]))

  for i in range(video.shape[0]):
    if num_channels == 3:
      resized_video[i] = imresize(video[i], size, interpolation)
    elif num_channels == 2:
      resized_video[i, ..., 0] = imresize(video[i, ..., 0], size, interpolation)
      resized_video[i, ..., 1] = imresize(video[i, ..., 1], size, interpolation)
    elif num_channels == 1:
      resized_video[i, ..., 0] = imresize(video[i, ..., 0], size, interpolation)
    else:
      raise NotImplementedError

  return resized_video.reshape((*shape, *size, video.shape[-1]))


def proc_oflow(images):
  h, w = images.shape[-3:-1]

  processed_images = []
  for image in images:
    hsv = np.zeros((h, w, 3), dtype=np.uint8)
    hsv[:, :, 0] = 255
    hsv[:, :, 1] = 255

    mag, ang = cv2.cartToPolar(image[..., 0], image[..., 1])
    hsv[..., 0] = ang*180/np.pi/2
    hsv[..., 2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)

    processed_image = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    processed_images.append(processed_image)

  return np.stack(processed_images)