python source code of dataset_cp

"""
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.

Relation-aware Graph Attention Network for Visual Question Answering
Linjie Li, Zhe Gan, Yu Cheng, Jingjing Liu
https://arxiv.org/abs/1903.12314

This code is written by Linjie Li.
"""
from __future__ import print_function
import os
import json
import pickle
import numpy as np
import utils
import h5py
import torch
from torch.utils.data import Dataset
import tools.compute_softscore
from dataset import is_howmany

COUNTING_ONLY = False


def _create_entry(img, question, answer):
    if answer is not None:
        answer.pop('image_id')
        answer.pop('question_id')
    entry = {
        'question_id': question['question_id'],
        'image_id': question['image_id'],
        'image': img,
        'coco_split': question["coco_split"],
        'question': question['question'],
        'answer': answer}
    return entry


def _load_dataset(dataroot, name, coco_train_img_id2val, coco_val_img_id2val,
                  label2ans):
    """Load entries

    coco_train_img_id2val/coco_val_img_id2val:
        dict {img_id -> val} val can be used to retrieve image or features
    dataroot: root path of dataset
    name: 'train', 'val'
    """
    question_path = os.path.join(
        dataroot, 'cp_v2_questions/vqacp_v2_%s_questions.json' % name)
    questions = sorted(json.load(open(question_path)),
                       key=lambda x: x['question_id'])
    answer_path = os.path.join(dataroot, 'cache', 'cp_v2_%s_target.pkl' % name)
    answers = pickle.load(open(answer_path, 'rb'))
    answers = sorted(answers, key=lambda x: x['question_id'])

    utils.assert_eq(len(questions), len(answers))
    entries = []
    for question, answer in zip(questions, answers):
        utils.assert_eq(question['question_id'], answer['question_id'])
        utils.assert_eq(question['image_id'], answer['image_id'])
        img_id = question['image_id']
        coco_split = question["coco_split"]
        index = coco_train_img_id2val[img_id]\
            if coco_split == "train2014" else coco_val_img_id2val[img_id]
        if not COUNTING_ONLY \
           or is_howmany(question['question'], answer, label2ans):
            entries.append(_create_entry(index, question, answer))
    return entries


class Image_Feature_Loader():
    def __init__(self, coco_split, relation_type, dataroot='data',
                 adaptive=True):
        super(Image_Feature_Loader, self).__init__()
        assert coco_split in ['train', 'val']
        self.adaptive = adaptive
        self.relation_type = relation_type
        prefix = '36'

        self.img_id2idx = pickle.load(
            open(os.path.join(dataroot, 'imgids/%s%s_imgid2idx.pkl' %
                              (coco_split, '' if self.adaptive else prefix)),
                 'rb'))
        h5_dataroot = dataroot+"/Bottom-up-features-adaptive" \
            if self.adaptive else dataroot+"/Bottom-up-features-fixed"
        h5_path = os.path.join(h5_dataroot,
                               '%s%s.hdf5' % (coco_split,
                                              '' if self.adaptive else prefix))

        print('loading features from h5 file %s' % h5_path)
        with h5py.File(h5_path, 'r') as hf:
            self.features = np.array(hf.get('image_features'))
            self.spatials = np.array(hf.get('spatial_features'))
            self.bb = np.array(hf.get('image_bb'))
            if "semantic_adj_matrix" in hf.keys() \
               and self.relation_type == "semantic":
                self.semantic_adj_matrix = np.array(
                                            hf.get('semantic_adj_matrix'))
                print("Loaded semantic adj matrix from file...",
                      self.semantic_adj_matrix.shape)
            else:
                self.semantic_adj_matrix = None
                print("Setting semantic adj matrix to None...")
            if "image_adj_matrix" in hf.keys() \
               and self.relation_type == "spatial":
                self.spatial_adj_matrix = np.array(hf.get('image_adj_matrix'))
                print("Loaded spatial adj matrix from file...",
                      self.spatial_adj_matrix.shape)
            else:
                self.spatial_adj_matrix = None
                print("Setting spatial adj matrix to None...")
            self.pos_boxes = None
            if self.adaptive:
                self.pos_boxes = np.array(hf.get('pos_boxes'))
        self.tensorize()

    def tensorize(self):
        self.features = torch.from_numpy(self.features)
        self.spatials = torch.from_numpy(self.spatials)
        self.bb = torch.from_numpy(self.bb)
        if self.semantic_adj_matrix is not None:
            self.semantic_adj_matrix = torch.from_numpy(
                                        self.semantic_adj_matrix).double()
        if self.spatial_adj_matrix is not None:
            self.spatial_adj_matrix = torch.from_numpy(
                                        self.spatial_adj_matrix).double()
        if self.pos_boxes is not None:
            self.pos_boxes = torch.from_numpy(self.pos_boxes)


class VQA_cp_Dataset(Dataset):
    def __init__(self, name, dictionary, coco_train_features,
                 coco_val_features, dataroot='data', adaptive=False,
                 pos_emb_dim=64):
        super(VQA_cp_Dataset, self).__init__()
        assert name in ['train', 'test']

        ans2label_path = os.path.join(dataroot, 'cache',
                                      'trainval_ans2label.pkl')
        label2ans_path = os.path.join(dataroot, 'cache',
                                      'trainval_label2ans.pkl')
        self.ans2label = pickle.load(open(ans2label_path, 'rb'))
        self.label2ans = pickle.load(open(label2ans_path, 'rb'))
        self.num_ans_candidates = len(self.ans2label)
        self.dictionary = dictionary
        self.adaptive = adaptive
        self.relation_type = coco_train_features.relation_type
        self.coco_train_features = coco_train_features
        self.coco_val_features = coco_val_features
        self.entries = _load_dataset(dataroot, name,
                                     self.coco_train_features.img_id2idx,
                                     self.coco_val_features.img_id2idx,
                                     self.label2ans)
        self.tokenize()
        self.tensorize()
        self.emb_dim = pos_emb_dim
        self.v_dim = self.coco_train_features.features.size(1 if self.adaptive
                                                            else 2)
        self.s_dim = self.coco_train_features.spatials.size(1 if self.adaptive
                                                            else 2)

    def tokenize(self, max_length=14):
        """Tokenizes the questions.

        This will add q_token in each entry of the dataset.
        -1 represent nil, and should be treated as padding_idx in embedding
        """
        for entry in self.entries:
            tokens = self.dictionary.tokenize(entry['question'], False)
            tokens = tokens[:max_length]
            if len(tokens) < max_length:
                # Note here we pad to the back of the sentence
                padding = [self.dictionary.padding_idx] * \
                          (max_length - len(tokens))
                tokens = tokens + padding
            utils.assert_eq(len(tokens), max_length)
            entry['q_token'] = tokens

    def tensorize(self):
        for entry in self.entries:
            question = torch.from_numpy(np.array(entry['q_token']))
            entry['q_token'] = question

            answer = entry['answer']
            if answer is not None:
                labels = np.array(answer['labels'])
                scores = np.array(answer['scores'], dtype=np.float32)
                if len(labels):
                    labels = torch.from_numpy(labels)
                    scores = torch.from_numpy(scores)
                    entry['answer']['labels'] = labels
                    entry['answer']['scores'] = scores
                else:
                    entry['answer']['labels'] = None
                    entry['answer']['scores'] = None

    def __getitem__(self, index):
        entry = self.entries[index]
        raw_question = entry["question"]
        image_id = entry["image_id"]
        coco_split = entry["coco_split"]

        question = entry['q_token']
        question_id = entry['question_id']
        if "train" in coco_split:
            coco_features = self.coco_train_features
        elif "val" in coco_split:
            coco_features = self.coco_val_features
        else:
            print("Unknown coco split: %s" % coco_split)

        if coco_features.spatial_adj_matrix is not None:
            spatial_adj_matrix = coco_features.spatial_adj_matrix[
                                    entry["image"]]
        else:
            spatial_adj_matrix = torch.zeros(1).double()
        if coco_features.semantic_adj_matrix is not None:
            semantic_adj_matrix = coco_features.semantic_adj_matrix[
                                    entry["image"]]
        else:
            semantic_adj_matrix = torch.zeros(1).double()

        if not self.adaptive:
            # fixed number of bounding boxes
            features = coco_features.features[entry['image']]
            spatials = coco_features.spatials[entry['image']]
            bb = coco_features.bb[entry["image"]]
        else:
            features = coco_features.features[
                coco_features.pos_boxes[
                    entry['image']][0]:coco_features.pos_boxes[
                                                    entry['image']][1], :]
            spatials = coco_features.spatials[
                coco_features.pos_boxes[
                    entry['image']][0]:coco_features.pos_boxes[
                                                    entry['image']][1], :]
            bb = coco_features.bb[
                coco_features.pos_boxes[
                    entry['image']][0]:coco_features.pos_boxes[
                                                    entry['image']][1], :]

        answer = entry['answer']
        if answer is not None:
            labels = answer['labels']
            scores = answer['scores']
            target = torch.zeros(self.num_ans_candidates)
            if labels is not None:
                target.scatter_(0, labels, scores)
            return features, spatials, question, target, question_id,\
                image_id, bb, spatial_adj_matrix, semantic_adj_matrix

        else:
            return features, spatials, question, question_id, question_id,\
                image_id, bb, spatial_adj_matrix, semantic_adj_matrix

    def __len__(self):
        return len(self.entries)