__author__ = 'charlie'
import numpy as np
import os
import random
from six.moves import cPickle as pickle
from tensorflow.python.platform import gfile
import glob

import TensorflowUtils as utils

DATA_URL = 'http://memorability.csail.mit.edu/lamem.tar.gz'


def read_dataset(data_dir):
    pickle_filename = "lamem.pickle"
    pickle_filepath = os.path.join(data_dir, pickle_filename)
    if not os.path.exists(pickle_filepath):
        utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True)
        lamem_folder = (DATA_URL.split("/")[-1]).split(os.path.extsep)[0]
        result = {'images': create_image_lists(os.path.join(data_dir, lamem_folder))}
        print ("Pickling ...")
        with open(pickle_filepath, 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)
    else:
        print ("Found pickle file!")

    with open(pickle_filepath, 'rb') as f:
        result = pickle.load(f)
        training_records = result['images']
        del result

    return training_records


def create_image_lists(image_dir):
    if not gfile.Exists(image_dir):
        print("Image directory '" + image_dir + "' not found.")
        return None
    image_list = []

    file_list = []
    file_glob = os.path.join(image_dir, "images", '*.' + 'jpg')
    file_list.extend(glob.glob(file_glob))

    if not file_list:
        print('No files found')
    else:
        image_list = file_list

    random.shuffle(image_list)
    no_of_images = len(image_list)
    print ('No. of Image files: %d' % no_of_images)

    return image_list