python source code of read

"""
Takes in original H3.6M, renames and writes frames to file,
create annotations in a single pickle file that the later
h36_to_tfrecords_video.py can read in.

Mosh data is not available.

Requires spacepy & CDF:
- Get CDF (NASA): https://cdf.gsfc.nasa.gov/html/sw_and_docs.html
- Follow the instructions in README.install
- need to run . ~/Downloads/cdf36_4-dist/bin/definitions.B
for spacepy to work.

Then run:
python -m src.datasets.h36.read_human36m

Originally developed by Federica Bogo, edited by Angjoo Kanazawa
"""

from glob import glob
import os
from os import makedirs, system
from os.path import join, getsize, exists
import pickle
from spacepy import pycdf
import sys

import cv2
import numpy as np

from absl import flags


flags.DEFINE_string(
    'source_dir', '/scratch1/storage/human36m_full_raw',
    'Root dir of the original Human3.6M dataset unpacked with metadata.xml'
)
flags.DEFINE_string('out_dir', '/scratch1/storage/human36m_25fps',
                    'Output directory')
flags.DEFINE_integer('frame_skip', 2,
                     'subsample factor, 5 corresponds to 10fps, 2=25fps')

FLAGS = flags.FLAGS

colors = np.random.randint(0, 255, size=(17, 3))
joint_ids = [0, 1, 2, 3, 6, 7, 8, 12, 13, 14, 15, 17, 18, 19, 25, 26, 27]

# Mapping from H36M joints to LSP joints (0:13). In this roder:
_COMMON_JOINT_IDS = np.array([
    3,  # R ankle
    2,  # R knee
    1,  # R hip
    4,  # L hip
    5,  # L knee
    6,  # L ankle
    16,  # R Wrist
    15,  # R Elbow
    14,  # R shoulder
    11,  # L shoulder
    12,  # L Elbow
    13,  # L Wrist
    8,  # Neck top
    10,  # Head top
])


def plot_points(points_, img, points0_=None, radius=10):
    global colors
    tmp_img = img.copy()

    num_colors = len(colors)

    points = points_.T if points_.shape[0] == 2 else points_
    points0 = None if points0_ is None else (points0_.T
                                             if points0_.shape[0] == 2 else
                                             points0_)

    for i, coord in enumerate(points.astype(int)):
        if coord[0] < img.shape[1] and coord[1] < img.shape[0] and coord[0] > 0 and coord[1] > 0:
            cv2.circle(tmp_img,
                       tuple(coord), radius, colors[i % num_colors].tolist(),
                       -1)

    if points0 is not None:
        for i, coord in enumerate(points0.astype(int)):
            if coord[0] < img.shape[1] and coord[1] < img.shape[0] and coord[0] > 0 and coord[1] > 0:
                cv2.circle(tmp_img,
                           tuple(coord), radius + 2,
                           colors[i % num_colors].tolist(), 2)

    return tmp_img


def rotation_matrix(args):

    (x, y, z) = args

    X = np.vstack([[1, 0, 0], [0, np.cos(x), -np.sin(x)],
                   [0, np.sin(x), np.cos(x)]])
    Y = np.vstack([[np.cos(y), 0, np.sin(y)], [0, 1, 0],
                   [-np.sin(y), 0, np.cos(y)]])
    Z = np.vstack([[np.cos(z), -np.sin(z), 0], [np.sin(z),
                                                np.cos(z), 0], [0, 0, 1]])

    return (X.dot(Y)).dot(Z)


def project_point_radial(P, R, t, f, c, all_k):
    k = np.array(list(all_k[:2]) + list(all_k[-1:]))
    p = all_k[2:4]

    N = P.shape[0]

    X = R.dot(P.T - np.tile(t.reshape((-1, 1)), (1, len(P))))

    XX = X[:2, :] / np.tile(X[2, :], (2, 1))

    r2 = XX[0, :]**2 + XX[1, :]**2
    radial = 1 + np.sum(
        np.tile(k.reshape((-1, 1)), (1, N)) * np.vstack((r2, r2**2, r2**3)),
        axis=0)
    tan = p[0] * XX[1, :] + p[1] * XX[0, :]

    XXX = XX * np.tile(radial + tan, (2, 1)) + p[::-1].reshape(
        (-1, 1)).dot(r2.reshape((1, -1)))

    proj = (np.tile(f, (N, 1)) * XXX.T) + np.tile(c, (N, 1))
    return proj


def read_cam_parameters(xml_path, sbj_id, cam_id):
    import xml.etree.ElementTree

    # use the notation from 0 -- more practical to access array
    sbj_id = sbj_id - 1
    cam_id = cam_id - 1

    n_sbjs = 11
    n_cams = 4

    root = xml.etree.ElementTree.parse(xml_path).getroot()

    for child in root:
        if child.tag == 'w0':
            all_cameras = child.text
            tokens = all_cameras.split(' ')
            tokens[0] = tokens[0].replace('[', '')
            tokens[-1] = tokens[-1].replace(']', '')

            start = (cam_id * n_sbjs) * 6 + sbj_id * 6
            extrs = tokens[start:start + 6]

            start = (n_cams * n_sbjs * 6) + cam_id * 9
            intrs = tokens[start:start + 9]

            rot = rotation_matrix(np.array(extrs[:3], dtype=float))

            rt = rot
            t = np.array(extrs[3:], dtype=float)

            f = np.array(intrs[:2], dtype=float)
            c = np.array(intrs[2:4], dtype=float)

            distortion = np.array(intrs[4:], dtype=float)

            k = np.hstack((distortion[:2], distortion[3:5], distortion[2:3]))

            return (rt, t, f, c, k)


def read_action_name(xml_path, sbj_id, actionno, trialno):
    import xml.etree.ElementTree

    root = xml.etree.ElementTree.parse(xml_path).getroot()
    myactionno = actionno + 1  # otherwise we take ALL into account
    for child in root:
        if child.tag == 'mapping':
            for tr in child.getchildren():
                if tr.getchildren()[0].text == str(myactionno):
                    if tr.getchildren()[1].text == str(trialno):
                        return tr.getchildren()[2 + sbj_id - 1].text


def read_fua_results(path, sbj_id, trial_id, cam_id):
    from scipy.io import loadmat

    # sbj_id already follows the standard convention
    choose_id = sbj_id * (2 * 4) + (trial_id - 1) * 4 + (cam_id - 1)
    print(choose_id)
    res = loadmat(path)
    joints = res['Pred'].squeeze()
    return [j.reshape((-1, 3)) for j in joints[choose_id]]


def get_num_frames(path):
    vid = cv2.VideoCapture(path)
    return int(vid.get(cv2.CAP_PROP_FRAME_COUNT))


def read_frames(path, n_frames=None):
    vid = cv2.VideoCapture(path)

    imgs = []

    if n_frames is None:
        n_frames = int(vid.get(cv2.CAP_PROP_FRAME_COUNT))

    for _ in range(n_frames):
        success, img = vid.read()
        imgs.append(img)

    return imgs


def read_silhouettes(path, n_frames=None):
    import h5py
    f = h5py.File(path, 'r')
    refs = f['Masks']
    masks = []

    if n_frames is None:
        n_frames = len(refs)

    for i in range(n_frames):
        mask = np.array(f[refs[i, 0]], dtype=bool)
        mask = np.fliplr(np.rot90(mask, 3))
        masks.append(mask)
    return masks


def read_poses(path, n_frames=None, is_3d=False, joint_ids=range(32)):
    data = pycdf.CDF(path)

    # <CDF:
    # Pose: CDF_FLOAT [1, N, 64]
    # >
    poses = data['Pose'][...][0]

    if n_frames is None:
        n_frames = poses.shape[0]

    dim = 2 if not is_3d else 3
    packed_poses = [
        poses[i].reshape((-1, dim))[joint_ids] for i in range(n_frames)
    ]

    return packed_poses


def compute_fua_joints(joints, new_root_joint, in_meter=False):
    new_joints = np.zeros_like(joints)

    new_joints[0, :] = new_root_joint

    for i, offset in enumerate(joints[1:]):
        new_joints[i + 1] = new_root_joint + offset

    if in_meter:
        new_joints = np.vstack([j / 1000. for j in new_joints])

    return new_joints


def crop_image(silhs):
    res = np.asarray(silhs).any(axis=0)
    cnts, hier = cv2.findContours(
        np.uint8(res) * 255, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    """
    checks = []
    for cnt in cnts:
        kk = np.zeros((1000, 1000, 3), dtype=np.uint8)
        hull = cv2.convexHull(cnt)
        cv2.drawContours(kk, [cnt], 0, (255,255,255), -1)
        checks.append(kk)
    """

    max_id = 0
    max_length = len(cnts[0])
    for i in range(1, len(cnts)):
        if len(cnts[i]) > max_length:
            max_id = i
            max_length = len(cnts[i])

    (x, y, w, h) = cv2.boundingRect(cnts[max_id])
    return (x, y, w, h)


def crop_and_clean_mask_to_int(mask, x, y, w, h):
    mask = np.uint8(mask[y:y + h, x:x + w]) * 255

    # TODO: put this into a function (it's used above as well)
    cnts, hier = cv2.findContours(mask.copy(), cv2.RETR_LIST,
                                  cv2.CHAIN_APPROX_SIMPLE)

    max_id = 0
    max_length = len(cnts[0])
    for i in range(1, len(cnts)):
        if len(cnts[i]) > max_length:
            max_id = i
            max_length = len(cnts[i])

    tmp_mask = np.dstack((mask, mask, mask))
    for i, cnt in enumerate(cnts):
        if i != max_id:
            cv2.drawContours(tmp_mask, [cnt], 0, (0, 0, 0), -1)
    return cv2.split(tmp_mask)[0]


def main(raw_data_root, output_root, frame_skip):
    xml_path = join(raw_data_root, 'metadata.xml')

    # <actionno> for each different action class:
    # 1) Directions, 2) Discussion, 3) Eating, 4) Greeting,
    # 5) Phone Talk, 6) Posing, 7) Buying, 8) Sitting,
    # 9) Sitting Down, 10) Smoking, 11) Taking Photo, 12) Waiting,
    # 13) Walking, 14) Walking Dog, 15) Walking Pair
    action_names = [
        'Directions', 'Discussion', 'Eating', 'Greeting', 'Phoning', 'Posing',
        'Purchases', 'Sitting', 'SittingDown', 'Smoking', 'TakingPhoto',
        'Waiting', 'Walking', 'WakingDog', 'WalkTogether'
    ]

    n_frames = None

    sub_ids = [1, 6, 7, 8, 5, 9, 11]

    # Action, camera, suject id starts from 1 Matlab convention
    cam_ids = range(1, 5)
    trial_ids = [1, 2]
    action_ids = range(1, 16)
    import itertools
    all_pairs = [
        p
        for p in list(
            itertools.product(*[sub_ids, action_ids, trial_ids, cam_ids]))
    ]

    def has_num(str):
        return any(i.isdigit() for i in str)

    for (sbj_id, action_id, trial_id, cam_id) in all_pairs:
        seq_name = read_action_name(xml_path, sbj_id, action_id, trial_id)

        save_seq_name = '%s_%d' % (action_names[action_id - 1], trial_id - 1)

        output_base = join(output_root, 'S%s' % str(sbj_id), save_seq_name)
        output_dir = join(output_base, 'cam_%01d' % (cam_id - 1))

        print('Sub: {}, action {},  trial {}, cam {}'.format(
            sbj_id, action_id, trial_id, cam_id))
        print('Orig seq_name %s, new_seq_name %s' % (seq_name, save_seq_name))
        print('Saving to {}'.format(output_dir))

        if sbj_id == 11 and 'Phoning 2' in seq_name:
            print('Skipping.. {}'.format(output_dir))
            continue

        if not exists(output_dir):
            makedirs(output_dir)

        # Save orig name
        name_path = join(output_base, 'orig_seq_name.txt')
        if not exists(name_path):
            with open(name_path, "w") as f:
                f.write(seq_name)

        video_paths = sorted(
            glob(
                join(raw_data_root, 'S%s' % str(sbj_id), 'Videos',
                     '%s.*mp4' % seq_name)))
        pose2d_paths = sorted(
            glob(
                join(raw_data_root, 'S%s' % str(sbj_id),
                     'MyPoseFeatures/D2_Positions', '%s.*cdf' % seq_name)))
        pose3d_paths = sorted(
            glob(
                join(raw_data_root, 'S%s' % str(sbj_id),
                     'MyPoseFeatures/D3_Positions_mono',
                     '%s.*cdf' % seq_name)))

        (rot, t, flen, c, k) = read_cam_parameters(xml_path, sbj_id, cam_id)
        cam_path = join(output_dir, 'camera_wext.pkl')
        print('Writing %s' % cam_path)
        if not exists(cam_path):
            with open(cam_path, 'wb') as fw:
                pickle.dump({'f': flen, 'c': c, 'k': k, 'rt': rot, 't': t}, fw)

        # AJ: frames
        poses2d = read_poses(pose2d_paths[cam_id - 1], joint_ids=joint_ids)
        poses3d = read_poses(
            pose3d_paths[cam_id - 1], is_3d=True, joint_ids=joint_ids)

        # Check if we're done here.
        want_length = len(poses2d[::frame_skip])
        written_images = glob(join(output_dir, '*.png'))
        num_imgs_written = len(written_images)
        if want_length == num_imgs_written:
            is_done = True
            for fname in written_images:
                if getsize(fname) == 0:
                    is_done = False
                    break
            if is_done:
                print('Done!')
                continue

        # Write images..
        print('reading images...')
        imgs = read_frames(video_paths[cam_id - 1], n_frames=n_frames)
        # For some reason, len(poses2d) < len(imgs) by few frames sometimes
        # len(poses2d) == len(poses3d) always.
        # clip the images according to them..
        imgs = imgs[:len(poses2d)]

        # Subsample
        imgs = imgs[::frame_skip]
        poses2d = poses2d[::frame_skip]
        poses3d = poses3d[::frame_skip]
        gt_path = join(output_dir, 'gt_poses.pkl')
        if not exists(gt_path):
            with open(gt_path, 'wb') as fgt:
                pickle.dump({'2d': poses2d, '3d': poses3d}, fgt)

        # Link the mp4 with the new name to the old name.
        # video_paths[cam_id - 1]
        action_name = action_names[action_id - 1]
        out_video_name = 'S{}_{}_{}_cam_{}.mp4'.format(
            sbj_id, action_name, trial_id - 1, cam_id - 1)
        out_video_path = join(output_dir, out_video_name)
        if not exists(out_video_path):
            orig_vid_path = video_paths[cam_id - 1].replace(" ", "\ ")
            cmd = 'ln -s {} {}'.format(orig_vid_path, out_video_path)
            ret = system(cmd)
            if ret > 0:
                print('something went wrong!')
                import ipdb
                ipdb.set_trace()

        for i, (img) in enumerate(imgs):
            if exists(join(output_dir, 'frame%04d.png' % i)):
                if getsize(join(output_dir, 'frame%04d.png' % i)) > 0:
                    continue
            if i % 50 == 0:
                import matplotlib.pyplot as plt
                plt.ion()
                plt.imshow(plot_points(poses2d[i], img)[:, :, ::-1])
                plt.draw()
                plt.pause(1e-3)
            cv2.imwrite(join(output_dir, 'frame%04d.png' % i), img)
            if i % 50 == 0:
                print(join(output_dir, 'frame%04d.png' % i))


if __name__ == '__main__':
    FLAGS(sys.argv)
    frame_skip = FLAGS.frame_skip
    raw_data_root = FLAGS.source_dir
    output_root = FLAGS.out_dir

    main(raw_data_root, output_root, frame_skip)