python source code of task

#!/usr/bin/env python3

import numpy as np
import operator
from functools import reduce

import torch
import torch.nn as nn
from torch.autograd import Variable, Function
import torch.optim as optim
import torch.cuda

import importlib

import qpth
from qpth.qp import QPFunction

from block import block

import batch


class SolveNewsvendor(nn.Module):
    """ Solve newsvendor scheduling problem """
    def __init__(self, params, eps=1e-2):
        super(SolveNewsvendor, self).__init__()
        k = len(params['d'])
        self.Q = Variable(torch.diag(torch.Tensor(
            [params['c_quad']] + [params['b_quad']]*k + [params['h_quad']]*k)) \
                .cuda())
        self.p = Variable(torch.Tensor(
            [params['c_lin']] + [params['b_lin']]*k + [params['h_lin']]*k) \
                .cuda())
        self.G = Variable(torch.cat([
            torch.cat([-torch.ones(k,1), -torch.eye(k), torch.zeros(k,k)], 1),
            torch.cat([torch.ones(k,1), torch.zeros(k,k), -torch.eye(k)], 1),
            -torch.eye(1 + 2*k)], 0).cuda())
        self.h = Variable(torch.Tensor(
            np.concatenate([-params['d'], params['d'], np.zeros(1+ 2*k)])).cuda())
        self.one = Variable(torch.Tensor([1])).cuda()
        self.eps_eye = eps * Variable(torch.eye(1 + 2*k).cuda()).unsqueeze(0)

    def forward(self, y):
        nBatch, k = y.size()

        Q_scale = torch.cat([torch.diag(torch.cat(
            [self.one, y[i], y[i]])).unsqueeze(0) for i in range(nBatch)], 0)
        Q = self.Q.unsqueeze(0).expand_as(Q_scale).mul(Q_scale)
        p_scale = torch.cat([Variable(torch.ones(nBatch,1).cuda()), y, y], 1)
        p = self.p.unsqueeze(0).expand_as(p_scale).mul(p_scale)
        G = self.G.unsqueeze(0).expand(nBatch, self.G.size(0), self.G.size(1))
        h = self.h.unsqueeze(0).expand(nBatch, self.h.size(0))
        e = Variable(torch.Tensor().cuda()).double()

        out = QPFunction(verbose=False)\
            (Q.double(), p.double(), G.double(), h.double(), e, e).float()

        return out[:,:1]


def run_task_net(X, Y, X_test, Y_test, params, is_nonlinear=False):

    # Training/validation split
    th_frac = 0.8
    inds = np.random.permutation(X.shape[0])
    train_inds = inds[:int(X.shape[0]*th_frac)]
    hold_inds =  inds[int(X.shape[0]*th_frac):]
    X_train, X_hold = X[train_inds, :], X[hold_inds, :]
    Y_train, Y_hold = Y[train_inds, :], Y[hold_inds, :]

    X_train_t = torch.Tensor(X_train).cuda()
    Y_train_t = torch.Tensor(Y_train).cuda()
    X_hold_t = torch.Tensor(X_hold).cuda()
    Y_hold_t = torch.Tensor(Y_hold).cuda()
    X_test_t = torch.Tensor(X_test).cuda()
    Y_test_t = torch.Tensor(Y_test).cuda()

    Y_train_int_t = torch.LongTensor(
        np.where(Y_train_t.cpu().numpy())[1]).cuda()
    Y_hold_int_t = torch.LongTensor(
        np.where(Y_hold_t.cpu().numpy())[1]).cuda()
    Y_test_int_t = torch.LongTensor(
        np.where(Y_test_t.cpu().numpy())[1]).cuda()

    d_ = Variable(torch.Tensor(params['d'])).cuda()

    # Expected inventory cost and solver for newsvendor scheduling problem
    cost = lambda Z, Y : (params['c_lin'] * Z + 0.5 * params['c_quad'] * (Z**2) +
                          params['b_lin'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0) +
                          0.5 * params['b_quad'] * (Y.mv(d_).view(-1,1)-Z).clamp(min=0)**2 +
                          params['h_lin'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0) +
                          0.5 * params['h_quad'] * (Z-Y.mv(d_).view(-1,1)).clamp(min=0)**2) \
                        .mean()
    newsvendor_solve = SolveNewsvendor(params).cuda()
    cost_news_fn = lambda x, y: cost(newsvendor_solve(x), y)

    nll = nn.NLLLoss().cuda()
    lam = 10.0  # regularization

    if is_nonlinear:
        # Non-linear model, use ADAM step size 1e-3
        layer_sizes = [X_train.shape[1], 200, 200, Y_train.shape[1]]
        layers = reduce(operator.add, [[nn.Linear(a,b), nn.BatchNorm1d(b), 
                                        nn.ReLU(), nn.Dropout(p=0.5)]
                          for a,b in zip(layer_sizes[0:-2], layer_sizes[1:-1])])
        layers += [nn.Linear(layer_sizes[-2], layer_sizes[-1]), nn.Softmax()]
        model = nn.Sequential(*layers).cuda()
        step_size = 1e-3
    else:
        # Linear model, use ADAM step size 1e-2
        model = nn.Sequential(
            nn.Linear(X_train.shape[1], Y_train.shape[1]),
            nn.Softmax()
        ).cuda()
        step_size = 1e-2

    opt = optim.Adam(model.parameters(), lr=step_size)

    # For early stopping
    hold_costs, test_costs = [], []
    num_stop_rounds = 20

    for i in range(1000):
        model.eval()
        test_cost = batch.get_cost(
            100, i, model, X_test_t, Y_test_t, cost_news_fn)
        test_nll = batch.get_cost_nll(
            100, i, model, X_test_t, Y_test_int_t, nll)

        hold_cost = batch.get_cost(
            100, i, model, X_hold_t, Y_hold_t, cost_news_fn)
        hold_nll  = batch.get_cost_nll(
            100, i, model, X_hold_t, Y_hold_int_t, nll)

        model.train()
        train_cost, train_nll = batch_train(150, i, X_train_t, Y_train_t, 
            Y_train_int_t, model, cost_news_fn, nll, opt, lam)

        print(i, train_cost.data[0], train_nll.data[0], test_cost.data[0], 
              test_nll.data[0], hold_cost.data[0], hold_nll.data[0])

        # Early stopping
        test_costs.append(test_cost.data[0])
        hold_costs.append(hold_cost.data[0])
        if i > 0 and i % num_stop_rounds == 0:
            idx = hold_costs.index(min(hold_costs))
            # Stop if current cost is worst in num_stop_rounds rounds
            if max(hold_costs) == hold_cost.data[0]:
                print(test_costs[idx])
                return(test_costs[idx])
            else:
                # Keep only "best" round
                hold_costs = [hold_costs[idx]]
                test_costs = [test_costs[idx]]

    # In case of no early stopping, return best run so far
    idx = hold_costs.index(min(hold_costs))
    return test_costs[idx]

def batch_train(batch_sz, epoch, X_train_t, Y_train_t, Y_train_int_t,
    model, cost_fn_news, nll, opt, lam):

    train_cost_agg = 0
    train_nll_agg = 0

    batch_data_, batch_targets_ = \
        batch.get_vars(batch_sz, X_train_t, Y_train_t)
    _, batch_targets_int_ = \
        batch.get_vars_scalar_out(batch_sz, X_train_t, Y_train_int_t)
    size = batch_sz

    for i in range(0, X_train_t.size(0), batch_sz):

        # Deal with potentially incomplete (last) batch
        if i + batch_sz  > X_train_t.size(0):
            size = X_train_t.size(0) - i
            batch_data_, batch_targets_ = batch.get_vars(
                size, X_train_t, Y_train_t)
            _, batch_targets_int_ = batch.get_vars_scalar_out(
                size, X_train_t, Y_train_int_t)

        batch_data_.data[:] = X_train_t[i:i+size]
        batch_targets_.data[:] = Y_train_t[i:i+size]
        batch_targets_int_.data[:] = Y_train_int_t[i:i+size]

        opt.zero_grad()
        preds = model(batch_data_)
        train_cost = cost_fn_news(preds, batch_targets_)
        train_nll  = nll(preds, batch_targets_int_)

        (train_cost + lam * train_nll).backward()
        opt.step()

        # Keep running average of losses
        train_cost_agg += \
            (train_cost - train_cost_agg) * batch_sz / (i + batch_sz)
        train_nll_agg += \
            (train_nll - train_nll_agg) * batch_sz / (i + batch_sz)

        print('Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.4f}'.format(
            epoch, i+batch_sz, X_train_t.size(0),
            float(i+batch_sz)/X_train_t.size(0)*100,
            train_cost.data[0]))

    return train_cost_agg, train_nll_agg