python source code of support

from __future__ import division
from builtins import zip
from ektelo import util
from ektelo.matrix import EkteloMatrix
from functools import reduce
import math
import numpy as np
from scipy import sparse
from ektelo import workload

def cantor_pairing(a, b):
    """
    A function returning a unique positive integer for every pair (a,b) of positive integers
    """
    return (a+b)*(a+b+1)/2 + b
    
def _replace(vector, new_values):
    for i in range(len(vector)):
        vector[i] = new_values[ vector[i] ]
    return vector

def get_partition_vec(rank,n,cluster,closeRange=False):
    """ get the partition vector from clusters returned by partition algorithms
        rank: If the bins are sorted, give the rank of each item in the input list.
            Used by AHP partition. Set rank = None if not used.
        n: Length of vector in original domain
        cluster: Cluster/partition returned by partition algorithms
        closeRange: if set to True, ranges in clusters are close range. (DAWA partition)
            i.e. [a,b] indicates [a,a+1,...b-1,b]
            if set to False, ranges in clusters are default python representation. (AHP partition)
            i.e. [a,b] indicates [a,a+1,...b-1]
    """
    partition_vec_sorted = np.empty(n,int)
    assert cluster[0][0] == 0,"First bin of partition must start with 0"
    # assign groupID to elements in sorted list.
    for i in range(len(cluster)):
        if closeRange:
            assert cluster[-1][1] == n-1, " Last bin of partition must end with length of original data"
            partition_vec_sorted[cluster[i][0]:cluster[i][1]+1] = i
        else:
            assert cluster[-1][1] == n, " Last bin of partition must end with length of original data"
            partition_vec_sorted[cluster[i][0]:cluster[i][1]] = i
    # get index in sorted list for elements in original domain, then get groupID.
    if rank is None:
        partition_vec = partition_vec_sorted
    else:
        partition_vec = np.array([partition_vec_sorted[rank[i]] for i in range(n)] )

    return partition_vec


def update_corners(corner, groupID, row, start, end):
    ''' helper function for get_subdomain
        update corners coordinates for a certain group.
        return False if the domain is not rectangular
    '''
    # if it is the first ocurrence of the group
    # update the upper left and upper right corner
    if groupID not in corner:
        corner[groupID] = {'ul':(row, start),'ur':(row,end), 'll':(row, start),'lr':(row,end)}
    else:
        temp = corner[groupID]
        if row == temp['ll'][0]: # incontinous group on the upper line
            return False 

        # update the lower corners
        # make sure the columns match and rows are continous.
        if temp['ll'][1] == start and temp['lr'][1] == end and temp['ll'][0] == row-1:
            # move the lower corners one line lower
            corner[groupID]['ll'] = (temp['ll'][0]+1, temp['ll'][1])
            corner[groupID]['lr'] = (temp['lr'][0]+1, temp['lr'][1])
        else:
            return False

    return True



def get_subdomain_grid(mapping, domain_shape):
    '''
    Given a mapping, return the domain size of all the subdomain when it is 
    used by the SplitByPartition operator.
    The original domain needs to be 2D and the mapping should split the domain 
    to smaller grids. Non-rectangular subdomain shapes are not supported,
    None will be returned.

    '''
    assert len(domain_shape) == 2 , 'Only works for 2D domains'
    m, n = domain_shape
    # unflatten the mapping vector
    mapping = mapping.reshape(domain_shape)
    corners = {}
    # record corners of each group in one pass of the mapping vector
    for i in range(m):
        start = 0
        for j in range(n):
            if j+1 >= n or mapping[i][j] != mapping[i][j+1]:
                groupID = mapping[i][start]
                status = update_corners(corners, groupID, i, start, j)
                start = j+1
                if status == False:
                    return None

    # calculate subdomains from corners
    sub_domains = {}
    for g in corners:
        temp = corners[g]
        sub_domains[g] = (temp['ll'][0] - temp['ul'][0] + 1, temp['ur'][1] - temp['ul'][1] + 1)

    return sub_domains


def canonical_ordering(mapping):
    """ remap according to the canonical order.
     if bins are noncontiguous, use position of first occurrence.
     e.g. [3,4,1,1] => [1,2,3,3]; [3,4,1,1,0,1]=>[0,1,2,2,3,2]
    """
    unique, indices, inverse, counts = mapping_statistics(mapping)

    uniqueInverse, indexInverse = np.unique(inverse,return_index =True)

    indexInverse.sort()
    newIndex = inverse[indexInverse]
    tups = list(zip(uniqueInverse, newIndex)) 
    tups.sort(key=lambda x: x[1])
    u = np.array( [u for (u,i) in tups] )
    mapping = u[inverse].reshape(mapping.shape)

    return mapping


def mapping_statistics(mapping):
    return np.unique(mapping, return_index=True, return_inverse=True, return_counts=True)   




def reduction_matrix(mapping, canonical_order=False):
    """ Returns an m x n matrix R where n is the dimension of 
        the original data and m is the dimension of the reduced data.

        Reduces data vector x with R x
        Expands workload matrix W with W' R
    """
    assert mapping.ndim == 1, "Can only handle 1-dimesional mappings for now, domain should be flattened"

    unique, indices, inverse, counts = mapping_statistics(mapping)

    if canonical_order:
        mapping = canonical_ordering(mapping)

    n = mapping.size
    m = unique.size
    data = np.ones(n)
    cols = np.arange(n)
    rows = inverse

    return EkteloMatrix(sparse.csr_matrix((data, (rows, cols)), shape=(m, n), dtype=int))

def expansion_matrix(mapping, canonical_order=False):
    """ Returns an n x m matrix E where n is the dimension of 
        the original data and m is the dimension of the reduced data.

        Expands data vector x with E x'
        Reduces workload matrix W with W E
    """
    assert mapping.ndim == 1, "Can only handle 1-dimesional mappings for now, domain should be flattened"

    unique, indices, inverse, counts = mapping_statistics(mapping)

    if canonical_order:
        mapping = canonical_ordering(mapping)

    n = mapping.size
    m = unique.size
    data = np.ones(n)
    cols = np.arange(n)
    rows = inverse

    R = sparse.csr_matrix((data, (rows, cols)), shape=(m, n), dtype=int)
    scale = sparse.spdiags(1.0 /counts, 0, m, m)

    return EkteloMatrix(R.T * scale)

def projection_matrix(mapping, idx):
    """ Returns m x n matrix P where n is the dimension of the 
        original data and m is the number of occurence of idx
        in mapping.

        :param mapping: vector with indices representing groups
        :param idx: index of group from which to create projection

        Projects vector x with P x and matrix W with W P^T
        Unprojects vector x with P^T x and matrix W with W P
    """
    mask = np.ma.masked_where(mapping!=idx, mapping).mask

    if np.all(~mask): # when all entries are False, a single False will be returned
        mask = np.array([False]*len(mapping))

    cols = np.where(~mask)[0]
    rows = np.arange(cols.size)
    vals = np.ones_like(rows)
    P = sparse.csr_matrix((vals, (rows, cols)), (rows.size, mask.size))

    return EkteloMatrix(P)

def combine(p1, p2):
    """ Returns p3, an (n+m) dimensional array of integers such that
        p3[i,j] = p3[i', j'] iff p1[i] = p1[i'] and p2[j] = p2[j']

        :param p1:  an n dimensional array of integers
        :param p2:  an m dimensional array of integers
    """
    _, i1 = np.unique(p1.flatten(), return_inverse=True)
    _, i2 = np.unique(p2.flatten(), return_inverse=True)
    lookup = np.arange(i1.size * i2.size).reshape(i1.size, i2.size)
    # note: cartesian product, but order is very important
    # this order works when flattening/reshaping is done in row-major form
    pairs = np.dstack(np.meshgrid(i1, i2, indexing='ij')).reshape(-1,2)
    flat = lookup[pairs[:,0], pairs[:,1]]

    return flat.reshape(p1.shape + p2.shape)


def combine_all(mappings):
    """ Returns an ndarray with each dimension corresponding to one
        of mapping.
    """
    return reduce(combine, mappings, np.ones((), dtype=int))


def extract_M(W):
    assert type(W) is sparse.csr_matrix, 'W must by csr_sparse'

    return W.getrow(W.nonzero()[0][0])


def complement(A, grid_size=None):
    '''return the queries on the complementary domain
    :param grid_size: The griding size of the new queris, if None, return total on the complementary domain
    Currently complementary domain are those indices with column norm(L1) 0.
    '''
    comp = []
    if isinstance(A, np.ndarray) is False:
        A = A.toarray()
    norm = np.linalg.norm(A,ord = 1,axis = 0)

    compl_size = len(norm) - np.count_nonzero(norm)
    grid_size = compl_size if grid_size is None else grid_size
    grid_num = int(math.ceil(compl_size/float(grid_size)))
    if grid_num==0:
        return None

    ind = 0
    for g in range(grid_num):
        q = np.zeros(len(norm))
        remain_in_group = grid_size
        while (remain_in_group>0) and (ind<len(norm)):
            if np.isclose(norm[ind],0.0):
                q[ind]=1
                remain_in_group-=1
            ind +=1
        comp.append(q)

    return sparse.csr_matrix(comp)