python source code of utils

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

########################################################################
# GNU General Public License v3.0
# GNU GPLv3
# Copyright (c) 2019, Noureldien Hussein
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
########################################################################

"""
Helper functions for many things. Also, some needed classes.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import time
import h5py
import yaml
import numpy as np
import pickle as pkl
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import label_binarize
from sklearn import preprocessing, manifold
import scipy.io as sio

import os
import json
import natsort
import random
from multiprocessing.dummy import Pool

from core import const

logger = logging.getLogger(__name__)

# region Load and Dump

def pkl_load(path):
    with open(path, 'r') as f:
        data = pkl.load(f)
    return data

def txt_load(path):
    with open(path, 'r') as f:
        lines = f.read().splitlines()
    lines = np.array(lines)
    return lines

def byte_load(path):
    with open(path, 'rb') as f:
        data = f.read()
    return data

def json_load(path):
    with open(path, 'r') as f:
        data = json.load(f)

    return data

def yaml_load(file_path):
    with open(file_path, 'r') as f:
        data = yaml.load(f)
        data = AttrDict(data)

    data = convert_dict_to_attrdict(data)
    return data

def h5_load(path, dataset_name='data'):
    h5_file = h5py.File(path, 'r')
    data = h5_file[dataset_name].value
    h5_file.close()
    return data

def h5_load_multi(path, dataset_names):
    h5_file = h5py.File(path, 'r')
    data = [h5_file[name].value for name in dataset_names]
    h5_file.close()
    return data

def txt_dump(data, path):
    l = len(data) - 1
    with open(path, 'w') as f:
        for i, k in enumerate(data):
            if i < l:
                k = ('%s\n' % k)
            else:
                k = ('%s' % k)
            f.writelines(k)

def byte_dump(data, path):
    with open(path, 'wb') as f:
        f.write(data)

def pkl_dump(data, path, is_highest=True):
    with open(path, 'w') as f:
        if not is_highest:
            pkl.dump(data, f)
        else:
            pkl.dump(data, f, pkl.HIGHEST_PROTOCOL)

def json_dump(data, path):
    with open(path, 'w') as f:
        json.dump(data, f)

def h5_dump(data, path, dataset_name='data'):
    h5_file = h5py.File(path, 'w')
    h5_file.create_dataset(dataset_name, data=data, dtype=data.dtype)
    h5_file.close()

def h5_dump_multi(data, dataset_names, path):
    h5_file = h5py.File(path, 'w')
    n_items = len(data)
    for i in range(n_items):
        item_data = data[i]
        item_name = dataset_names[i]
        h5_file.create_dataset(item_name, data=item_data, dtype=item_data.dtype)
    h5_file.close()

def csv_load(path, sep=',', header='infer'):
    df = pd.read_csv(path, sep=sep, header=header)
    data = df.values
    return data

def mat_load(path, m_dict=None):
    """
    Load mat files.
    :param path:
    :return:
    """
    if m_dict is None:
        data = sio.loadmat(path)
    else:
        data = sio.loadmat(path, m_dict)

    return data

# endregion

# region File/Folder Names/Pathes

def file_names(path, is_nat_sort=False):
    if not os.path.exists(path):
        exp_msg = 'Sorry, folder path does not exist: %s' % (path)
        raise Exception(exp_msg)

    names = os.walk(path).next()[2]

    if is_nat_sort:
        names = natsort.natsorted(names)

    return names

def file_pathes(path, is_nat_sort=False):
    if not os.path.exists(path):
        exp_msg = 'Sorry, folder path does not exist: %s' % (path)
        raise Exception(exp_msg)

    names = os.walk(path).next()[2]

    if is_nat_sort:
        names = natsort.natsorted(names)

    pathes = ['%s/%s' % (path, n) for n in names]
    return pathes

def folder_names(path, is_nat_sort=False):
    if not os.path.exists(path):
        exp_msg = 'Sorry, folder path does not exist: %s' % (path)
        raise Exception(exp_msg)

    names = os.walk(path).next()[1]

    if is_nat_sort:
        names = natsort.natsorted(names)

    return names

def folder_pathes(path, is_nat_sort=False):
    if not os.path.exists(path):
        exp_msg = 'Sorry, folder path does not exist: %s' % (path)
        raise Exception(exp_msg)

    names = os.walk(path).next()[1]

    if is_nat_sort:
        names = natsort.natsorted(names)

    pathes = ['%s/%s' % (path, n) for n in names]
    return pathes

# endregion

# region Normalization

def normalize_mean_std(x):
    mean = np.mean(x, axis=0)
    std = np.std(x, axis=0)
    x -= mean
    x /= std
    return x

def normalize_mean(x):
    mean = np.mean(x, axis=0)
    x /= mean
    return x

def normalize_sum(x):
    sum = np.sum(x, axis=1)
    x = np.array([x_i / sum_i for x_i, sum_i in zip(x, sum)])
    return x

def normalize_l2(x):
    return preprocessing.normalize(x)

def normalize_l1(x):
    return preprocessing.normalize(x, norm='l1')

def normalize_range_0_to_1(x):
    x = np.add(x, -x.min())
    x = np.divide(x, x.max())
    return x

# endregion

# region Array Helpers

def array_to_text(a, separator=', '):
    text = separator.join([str(s) for s in a])
    return text

def get_size_in_kb(size):
    size /= float(1024)
    return size

def get_size_in_mb(size):
    size /= float(1024 * 1024)
    return size

def get_size_in_gb(size):
    size /= float(1024 * 1024 * 1024)
    return size

def get_array_memory_size(a):
    if type(a) is not np.ndarray:
        raise Exception('Sorry, input is not numpy array!')

    dtype = a.dtype
    if dtype == np.float16:
        n_bytes = 2
    elif dtype == np.float32:
        n_bytes = 4
    else:
        raise Exception('Sorry, unsupported dtype:', dtype)

    s = a.size
    size = s * n_bytes
    return size

def get_expected_memory_size(array_shape, array_dtype):
    dtype = array_dtype
    if dtype == np.float16:
        n_bytes = 2
    elif dtype == np.float32:
        n_bytes = 4
    else:
        raise Exception('Sorry, unsupported dtype:', dtype)

    s = 1
    for dim_size in array_shape:
        s *= dim_size

    size = s * n_bytes
    return size

def print_array(a):
    for item in a:
        print(item)

def print_array_joined(a):
    s = ', '.join([str(i) for i in a])
    print(s)

# endregion

# region Misc

def learn_manifold(manifold_type, feats, n_components=2):
    if manifold_type == 'tsne':
        feats_fitted = manifold.TSNE(n_components=n_components, random_state=0).fit_transform(feats)
    elif manifold_type == 'isomap':
        feats_fitted = manifold.Isomap(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'mds':
        feats_fitted = manifold.MDS(n_components=n_components).fit_transform(feats)
    elif manifold_type == 'spectral':
        feats_fitted = manifold.SpectralEmbedding(n_components=n_components).fit_transform(feats)
    else:
        raise Exception('wrong maniford type!')

    # methods = ['standard', 'ltsa', 'hessian', 'modified']
    # feats_fitted = manifold.LocallyLinearEmbedding(n_components=n_components, method=methods[0]).fit_transform(pred)

    return feats_fitted

def debinarize_label(labels):
    debinarized = np.array([np.where(l == 1)[0][0] for l in labels])
    return debinarized

def timestamp():
    time_stamp = "{0:%y}.{0:%m}.{0:%d}-{0:%I}:{0:%M}:{0:%S}".format(datetime.now())
    return time_stamp

def remove_extension(name):
    name = name[:-4]
    return name

def get_file_extension(name):
    name = name.split('.')[-1]
    return name

def print_counter(num, total, freq=None):
    if freq is None:
        logger.info('... %d/%d' % (num, total))
    elif num % freq == 0:
        logger.info('... %d/%d' % (num, total))

def calc_num_batches(n_samples, batch_size):
    n_batch = int(n_samples / float(batch_size))
    n_batch = n_batch if n_samples % batch_size == 0 else n_batch + 1
    return n_batch

def convert_dict_to_attrdict(d):
    for k, v in d.iteritems():
        if isinstance(v, dict):
            v = convert_dict_to_attrdict(v)
            d[k] = v

    if isinstance(d, dict):
        d = AttrDict(d)

    return d

def get_model_feat_maps_info(model_type, feature_type):
    """
    Get feature map details according to model type and feature type.
    :param model_type:
    :param feature_type:
    :return:
    """

    if model_type in ['vgg', 'vgg_charades_rgb']:
        if feature_type == 'pool5':
            return 512, 7, 7
        elif feature_type == 'conv5_3':
            return 512, 14, 14
        else:
            raise Exception('Sorry, unsupported feature type: %s' % (feature_type))
    elif model_type in ['resnet152', 'resnet152_charades_rgb']:
        if feature_type == 'res4b35':
            return 1024, 14, 14
        elif feature_type == 'res5c':
            return 2048, 7, 7
        elif feature_type == 'pool5':
            return 2048, 1, 1
        else:
            raise Exception('Sorry, unsupported feature type: %s' % (feature_type))
    elif model_type in ['i3d_rgb', 'i3d_pytorch_charades_rgb', 'i3d_kinetics_keras', 'i3d_keras_kinetics_rgb']:
        if feature_type == 'mixed_5c':
            return 1024, 7, 7
        elif feature_type == 'mixed_4f':
            return 832, 7, 7
        else:
            raise Exception('Sorry, unsupported feature type: %s' % (feature_type))
    elif model_type in ['i3d_resnet_50_kinetics_rgb', 'i3d_resnet_101_kinetics_rgb']:
        if feature_type == 'pool5':
            return 2048, 7, 7
        else:
            raise Exception('Sorry, unsupported feature type: %s' % (feature_type))
    elif model_type in ['i3d_resnet101_charades_rgb']:
        if feature_type == 'res5_2':
            return 2048, 7, 7
        else:
            raise Exception('Sorry, unsupported feature type: %s' % (feature_type))
    else:
        raise Exception('Sorry, unsupported model type: %s' % (model_type))

# endregion

# region Classes

class Path(str):
    def __new__(self, relative_path, args=None, root_type=const.ROOT_PATH_TYPES[0]):
        assert root_type in const.ROOT_PATH_TYPES
        root_types = list(const.ROOT_PATH_TYPES)
        idx_root_type = root_types.index(root_type)

        root_paths = [const.DATA_ROOT_PATH, const.PROJECT_ROOT_PATH]
        root_path = root_paths[idx_root_type]

        relative_path = relative_path % args if args is not None else relative_path
        path = os.path.join(root_path, relative_path)

        self.__path = path
        return self.__path

    def __str__(self):
        return self.__path

    def __repr__(self):
        return self.__path

class DurationTimer(object):
    def __init__(self):
        self.start_time = time.time()

    def duration(self, is_string=True):
        stop_time = time.time()
        durtation = stop_time - self.start_time
        if is_string:
            durtation = self.format_duration(durtation)
        return durtation

    def format_duration(self, duration):
        if duration < 60:
            return str(duration) + " sec"
        elif duration < (60 * 60):
            return str(duration / 60) + " min"
        else:
            return str(duration / (60 * 60)) + " hr"

class AttrDict(dict):
    """
    Subclass dict and define getter-setter. This behaves as both dict and obj.
    """

    def __getattr__(self, key):
        return self[key]

    def __setattr__(self, key, value):
        if key in self.__dict__:
            self.__dict__[key] = value
        else:
            self[key] = value

# endregion