import os import numpy as np import torch import torch.nn.functional as F from torch import nn from collections import OrderedDict from torch.autograd import Variable def gram_matrix(y): (b, ch, h, w) = y.size() features = y.view(b, ch, w * h) features_t = features.transpose(1, 2) gram = features.bmm(features_t) / (ch * h * w) return gram def poly_lr_scheduler(optimizer, init_lr, iter, lr_decay_iter=1, max_iter=30000, power=0.9,): """Polynomial decay of learning rate :param init_lr is base learning rate :param iter is a current iteration :param lr_decay_iter how frequently decay occurs, default is 1 :param max_iter is number of maximum iterations :param power is a polymomial power """ if iter % lr_decay_iter or iter > max_iter: return optimizer for param_group in optimizer.param_groups: tmp = (1 - iter/max_iter)**power param_group['lr'] = init_lr*tmp def wct(content_feat, style_feat): content_feat = content_feat.data.cpu() content_feat = content_feat.squeeze(0).double() style_feat = style_feat.data.cpu() style_feat = style_feat.squeeze(0).double() C, W, H = content_feat.size() transfered = whiten_and_color(content_feat.view(C, -1), style_feat.view(C, -1)) transfered = transfered.view_as(content_feat).float().unsqueeze(0) return Variable(transfered).cuda() def whiten_and_color(cF,sF): cFSize = cF.size() c_mean = torch.mean(cF,1) # c x (h x w) c_mean = c_mean.unsqueeze(1).expand_as(cF) cF = cF - c_mean contentConv = torch.mm(cF,cF.t()).div(cFSize[1]-1) + torch.eye(cFSize[0]).double() c_u,c_e,c_v = torch.svd(contentConv,some=False) k_c = cFSize[0] for i in range(cFSize[0]): if c_e[i] < 0.00001: k_c = i break sFSize = sF.size() s_mean = torch.mean(sF,1) sF = sF - s_mean.unsqueeze(1).expand_as(sF) styleConv = torch.mm(sF,sF.t()).div(sFSize[1]-1) s_u,s_e,s_v = torch.svd(styleConv,some=False) k_s = sFSize[0] for i in range(sFSize[0]): if s_e[i] < 0.00001: k_s = i break c_d = (c_e[0:k_c]).pow(-0.5) step1 = torch.mm(c_v[:,0:k_c],torch.diag(c_d)) step2 = torch.mm(step1,(c_v[:,0:k_c].t())) whiten_cF = torch.mm(step2,cF) s_d = (s_e[0:k_s]).pow(0.5) targetFeature = torch.mm(torch.mm(torch.mm(s_v[:,0:k_s],torch.diag(s_d)),(s_v[:,0:k_s].t())),whiten_cF) targetFeature = targetFeature + s_mean.unsqueeze(1).expand_as(targetFeature) return targetFeature def calc_mean_std(feat, eps=1e-5): # eps is a small value added to the variance to avoid divide-by-zero. size = feat.data.size() assert (len(size) == 4) N, C = size[:2] feat_var = feat.view(N, C, -1).var(dim=2) + eps feat_std = feat_var.sqrt().view(N, C, 1, 1) feat_mean = feat.view(N, C, -1).mean(dim=2).view(N, C, 1, 1) return feat_mean, feat_std def adaptive_instance_normalization(content_feat, style_feat): assert (content_feat.data.size()[:2] == style_feat.data.size()[:2]) size = content_feat.data.size() style_mean, style_std = calc_mean_std(style_feat) content_mean, content_std = calc_mean_std(content_feat) normalized_feat = (content_feat - content_mean.expand( size)) / content_std.expand(size) return normalized_feat * style_std.expand(size) + style_mean.expand(size) def load_model_filter(model, snapshot, prefix=False): pretrained_dict = torch.load(snapshot) if prefix: new_state_dict = OrderedDict() for k, v in pretrained_dict.items(): name = k[7:] # remove `enc.` or `dec.` new_state_dict[name] = v pretrained_dict = new_state_dict model_dict = model.state_dict() # 1. filter out unnecessary keys pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict} # 2. overwrite entries in the existing state dict model_dict.update(pretrained_dict) # 3. load the new state dict model.load_state_dict(pretrained_dict) return model def check_mkdir(dir_name): if not os.path.exists(dir_name): os.mkdir(dir_name) def initialize_weights(*models): for model in models: for module in model.modules(): if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear): nn.init.kaiming_normal(module.weight) if module.bias is not None: module.bias.data.zero_() elif isinstance(module, nn.BatchNorm2d): module.weight.data.fill_(1) module.bias.data.zero_() def get_upsampling_weight(in_channels, out_channels, kernel_size): factor = (kernel_size + 1) // 2 if kernel_size % 2 == 1: center = factor - 1 else: center = factor - 0.5 og = np.ogrid[:kernel_size, :kernel_size] filt = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float64) weight[range(in_channels), range(out_channels), :, :] = filt return torch.from_numpy(weight).float() class CrossEntropyLoss2d(nn.Module): def __init__(self, weight=None, size_average=True, ignore_index=255): super(CrossEntropyLoss2d, self).__init__() self.nll_loss = nn.NLLLoss2d(weight, size_average, ignore_index) def forward(self, inputs, targets): return self.nll_loss(F.log_softmax(inputs), targets) def cross_entropy2d(input, target, weight=None, size_average=True, ignore_index=255): n, c, h, w = input.size() log_p = F.log_softmax(input, dim=1) log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] log_p = log_p.view(-1, c) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, ignore_index=ignore_index, weight=weight, size_average=False) if size_average: loss /= mask.data.sum() return loss def bootstrapped_cross_entropy2d(input, target, K, weight=None, size_average=True): batch_size = input.size()[0] def _bootstrap_xentropy_single(input, target, K, weight=None, size_average=True): n, c, h, w = input.size() log_p = F.log_softmax(input, dim=1) log_p = log_p.transpose(1, 2).transpose(2, 3).contiguous().view(-1, c) log_p = log_p[target.view(n * h * w, 1).repeat(1, c) >= 0] log_p = log_p.view(-1, c) mask = target >= 0 target = target[mask] loss = F.nll_loss(log_p, target, weight=weight, ignore_index=255, reduce=False, size_average=False) topk_loss, _ = loss.topk(K) reduced_topk_loss = topk_loss.sum() / K return reduced_topk_loss loss = 0.0 # Bootstrap from each image not entire batch for i in range(batch_size): loss += _bootstrap_xentropy_single(input=torch.unsqueeze(input[i], 0), target=torch.unsqueeze(target[i], 0), K=K, weight=weight, size_average=size_average) return loss / float(batch_size) def _fast_hist(label_pred, label_true, num_classes): mask = (label_true >= 0) & (label_true < num_classes) hist = np.bincount( num_classes * label_true[mask].astype(int) + label_pred[mask], minlength=num_classes ** 2).reshape(num_classes, num_classes) return hist ### T-SNE def Hbeta(D=np.array([]), beta=1.0): """ Compute the perplexity and the P-row for a specific value of the precision of a Gaussian distribution. """ # Compute P-row and corresponding perplexity P = np.exp(-D.copy() * beta) sumP = sum(P) H = np.log(sumP) + beta * np.sum(D * P) / sumP P = P / sumP return H, P def x2p(X=np.array([]), tol=1e-5, perplexity=30.0): """ Performs a binary search to get P-values in such a way that each conditional Gaussian has the same perplexity. """ # Initialize some variables print("Computing pairwise distances...") (n, d) = X.shape sum_X = np.sum(np.square(X), 1) D = np.add(np.add(-2 * np.dot(X, X.T), sum_X).T, sum_X) P = np.zeros((n, n)) beta = np.ones((n, 1)) logU = np.log(perplexity) # Loop over all datapoints for i in range(n): # Print progress if i % 500 == 0: print("Computing P-values for point %d of %d..." % (i, n)) # Compute the Gaussian kernel and entropy for the current precision betamin = -np.inf betamax = np.inf Di = D[i, np.concatenate((np.r_[0:i], np.r_[i+1:n]))] (H, thisP) = Hbeta(Di, beta[i]) # Evaluate whether the perplexity is within tolerance Hdiff = H - logU tries = 0 while np.abs(Hdiff) > tol and tries < 50: # If not, increase or decrease precision if Hdiff > 0: betamin = beta[i].copy() if betamax == np.inf or betamax == -np.inf: beta[i] = beta[i] * 2. else: beta[i] = (beta[i] + betamax) / 2. else: betamax = beta[i].copy() if betamin == np.inf or betamin == -np.inf: beta[i] = beta[i] / 2. else: beta[i] = (beta[i] + betamin) / 2. # Recompute the values (H, thisP) = Hbeta(Di, beta[i]) Hdiff = H - logU tries += 1 # Set the final row of P P[i, np.concatenate((np.r_[0:i], np.r_[i+1:n]))] = thisP # Return final P-matrix print("Mean value of sigma: %f" % np.mean(np.sqrt(1 / beta))) return P def pca(X=np.array([]), no_dims=50): """ Runs PCA on the NxD array X in order to reduce its dimensionality to no_dims dimensions. """ print("Preprocessing the data using PCA...") (n, d) = X.shape X = X - np.tile(np.mean(X, 0), (n, 1)) (l, M) = np.linalg.eig(np.dot(X.T, X)) Y = np.dot(X, M[:, 0:no_dims]) return Y def tsne(X=np.array([]), no_dims=2, initial_dims=50, perplexity=30.0, max_iter=1000): """ Runs t-SNE on the dataset in the NxD array X to reduce its dimensionality to no_dims dimensions. The syntaxis of the function is `Y = tsne.tsne(X, no_dims, perplexity), where X is an NxD NumPy array. """ # Check inputs if isinstance(no_dims, float): print("Error: array X should have type float.") return -1 if round(no_dims) != no_dims: print("Error: number of dimensions should be an integer.") return -1 # Initialize variables X = pca(X, initial_dims).real (n, d) = X.shape max_iter = max_iter initial_momentum = 0.5 final_momentum = 0.8 eta = 500 min_gain = 0.01 Y = np.random.randn(n, no_dims) dY = np.zeros((n, no_dims)) iY = np.zeros((n, no_dims)) gains = np.ones((n, no_dims)) # Compute P-values P = x2p(X, 1e-5, perplexity) P = P + np.transpose(P) P = P / np.sum(P) P = P * 4. # early exaggeration P = np.maximum(P, 1e-12) # Run iterations for iter in range(max_iter): # Compute pairwise affinities sum_Y = np.sum(np.square(Y), 1) num = -2. * np.dot(Y, Y.T) num = 1. / (1. + np.add(np.add(num, sum_Y).T, sum_Y)) num[range(n), range(n)] = 0. Q = num / np.sum(num) Q = np.maximum(Q, 1e-12) # Compute gradient PQ = P - Q for i in range(n): dY[i, :] = np.sum(np.tile(PQ[:, i] * num[:, i], (no_dims, 1)).T * (Y[i, :] - Y), 0) # Perform the update if iter < 20: momentum = initial_momentum else: momentum = final_momentum gains = (gains + 0.2) * ((dY > 0.) != (iY > 0.)) + \ (gains * 0.8) * ((dY > 0.) == (iY > 0.)) gains[gains < min_gain] = min_gain iY = momentum * iY - eta * (gains * dY) Y = Y + iY Y = Y - np.tile(np.mean(Y, 0), (n, 1)) # Compute current value of cost function if (iter + 1) % 10 == 0: C = np.sum(P * np.log(P / Q)) print("Iteration %d: error is %f" % (iter + 1, C)) # Stop lying about P-values if iter == 100: P = P / 4. # Return solution return Y def evaluate(predictions, gts, num_classes): hist = np.zeros((num_classes, num_classes)) for lp, lt in zip(predictions, gts): hist += _fast_hist(lp.flatten(), lt.flatten(), num_classes) # axis 0: gt, axis 1: prediction acc = np.diag(hist).sum() / hist.sum() acc_cls = np.diag(hist) / hist.sum(axis=1) acc_cls = np.nanmean(acc_cls) iu = np.diag(hist) / (hist.sum(axis=1) + hist.sum(axis=0) - np.diag(hist)) mean_iu = np.nanmean(iu) freq = hist.sum(axis=1) / hist.sum() fwavacc = (freq[freq > 0] * iu[freq > 0]).sum() return acc, acc_cls, mean_iu, fwavacc, iu class AverageMeter(object): def __init__(self): self.reset() def reset(self): self.val = 0 self.avg = 0 self.sum = 0 self.count = 0 def update(self, val, n=1): self.val = val self.sum += val * n self.count += n self.avg = self.sum / self.count class PolyLR(object): def __init__(self, optimizer, curr_iter, max_iter, lr_decay): self.max_iter = float(max_iter) self.init_lr_groups = [] for p in optimizer.param_groups: self.init_lr_groups.append(p['lr']) self.param_groups = optimizer.param_groups self.curr_iter = curr_iter self.lr_decay = lr_decay def step(self): for idx, p in enumerate(self.param_groups): p['lr'] = self.init_lr_groups[idx] * (1 - self.curr_iter / self.max_iter) ** self.lr_decay class LogFile: def __init__(self, fl): open(fl,'w').close() self.fl = fl def log(self, log_str): with open(self.fl, 'a') as f: f.write(log_str+'\n')