"""This script creates a subset of a dataset according to an ID list.
"""
import argparse
import os.path
import shutil
from utils import make_sure_path_exists, msd_id_to_dirs
from config import CONFIG
if CONFIG['multicore'] > 1:
    import joblib

def parse_args():
    """Return the parsed command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument('src', help="root path to the source dataset")
    parser.add_argument('dst', help="root path to the destination dataset")
    parser.add_argument('id_list_path', help="path to the ID list file")
    args = parser.parse_args()
    return args.src, args.dst, args.id_list_path

def collector(midi_md5, msd_id, src, dst):
    """Copy a multitrack pianoroll to the destination directory."""
    npz_path = os.path.join(src, midi_md5[0], midi_md5 + '.npz')
    result_path = os.path.join(dst, msd_id_to_dirs(msd_id), midi_md5 + '.npz')
    make_sure_path_exists(os.path.dirname(result_path))
    shutil.copyfile(npz_path, result_path)

def main():
    """Main function."""
    src, dst, id_list_path = parse_args()
    make_sure_path_exists(dst)

    with open(id_list_path) as f:
        id_list = [line.split() for line in f]

    if CONFIG['multicore'] > 1:
        joblib.Parallel(n_jobs=CONFIG['multicore'], verbose=5)(
            joblib.delayed(collector)(midi_md5, msd_id, src, dst)
            for midi_md5, msd_id in id_list)
    else:
        for midi_md5, msd_id in id_list:
            collector(midi_md5, msd_id, src, dst)

    print("Subset successfully collected for: {}".format(id_list_path))

if __name__ == "__main__":
    main()