import os

from config import DATA_DIR


def parse_csv(data_file):
    """

    Returns:
        X: a list of tweets
        y: a list of labels corresponding to the tweets

    """
    with open(data_file, 'r') as fd:
        data = [l.strip().split('\t') for l in fd.readlines()]
    X = [d[0] for d in data]
    y = [int(d[2]) for d in data]
    return X, y


def fix_text(text):
    try:
        return text.encode().decode('unicode-escape')
    except:
        return text


def load_task2(dataset):
    data_file = os.path.join(DATA_DIR, "task2/us_{}.text".format(dataset))
    label_file = os.path.join(DATA_DIR, "task2/us_{}.labels".format(dataset))

    X = []
    y = []
    with open(data_file, 'r', encoding="utf-8") as dfile, \
            open(label_file, 'r', encoding="utf-8") as lfile:
        for tweet, label in zip(dfile, lfile):
            X.append(tweet.rstrip())
            y.append(int(label.rstrip()))

    return X, y