import numpy as np
import torch
from torch.autograd import Variable

CLASSIFIER_TYPE_BERT = "BERT"
CLASSIFIER_TYPE_BERT_RNN = "BERT_RNN"
CLASSIFIER_TYPE_RNN = "RNN"


def generate_data(batch, use_cuda):
    """Create a formatted and ordered data batch to use in the
    three player model.

    :param batch: A pandas dataframe containing the tokens, masks, counts, and
        labels associated with a batch of data
    :type batch: DataFrame
    :param use_cuda: whether to use CUDA
    :type use_cuda: bool
    :return: formatted and ordered tokens (x), masks (m), and
        labels (y) associated with a batch of data
    :rtype: dict
    """
    # sort for rnn happiness
    batch.sort_values("counts", inplace=True, ascending=False)

    x_mask = np.stack(batch["mask"], axis=0)
    # drop all zero columns
    zero_col_idxs = np.argwhere(np.all(x_mask[..., :] == 0, axis=0))
    x_mask = np.delete(x_mask, zero_col_idxs, axis=1)

    x_mat = np.stack(batch["tokens"], axis=0)
    # drop all zero columns
    x_mat = np.delete(x_mat, zero_col_idxs, axis=1)

    y_vec = np.stack(batch["labels"], axis=0)

    batch_x_ = Variable(torch.from_numpy(x_mat)).to(torch.int64)
    batch_m_ = Variable(torch.from_numpy(x_mask)).type(torch.FloatTensor)
    batch_y_ = Variable(torch.from_numpy(y_vec)).to(torch.int64)

    if use_cuda:
        batch_x_ = batch_x_.cuda()
        batch_m_ = batch_m_.cuda()
        batch_y_ = batch_y_.cuda()

    return {"x": batch_x_, "m": batch_m_, "y": batch_y_}