python source code of rasterizer

# Functions for rasterizing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pyfor import gisexport
import pyfor.metrics


class Grid:
    """The Grid object is a representation of a point cloud that has been sorted into X and Y dimensional bins. From \
    the Grid object we can derive other useful products, most importantly, :class:`.Raster` objects.
    """

    def __init__(self, cloud, cell_size):
        """
        Upon initialization, the parent cloud object's :attr:`data.points` attribute is sorted into bins in place. \ The
        columns 'bins_x' and 'bins_y' are appended. Other useful information, such as the resolution, number of rows \
        and columns are also stored.

        :param cloud: The "parent" cloud object.
        :param cell_size: The size of the cell for sorting in the units of the input cloud object.
        """

        self.cloud = cloud
        self.cell_size = cell_size

        min_x, max_x = self.cloud.data.min[0], self.cloud.data.max[0]
        min_y, max_y = self.cloud.data.min[1], self.cloud.data.max[1]

        self.m = int(np.ceil((max_y - min_y) / cell_size))
        self.n = int(np.ceil((max_x - min_x) / cell_size))

        self.cloud.data.points.loc[:, "bins_x"] = (np.floor((self.cloud.data.points["x"].values - min_x) / self.cell_size)).astype(np.int)
        self.cloud.data.points.loc[:, "bins_y"] = (np.floor((max_y - self.cloud.data.points["y"].values) / self.cell_size)).astype(np.int)

        self.cells = self.cloud.data.points.groupby(["bins_x", "bins_y"])

    def _update(self):
        self.cloud.data._update()
        self.__init__(self.cloud, self.cell_size)

    def raster(self, func, dim, **kwargs):
        """
        Generates an m x n matrix with values as calculated for each cell in func. This is a raw array without \
        missing cells interpolated. See self.interpolate for interpolation methods.

        :param func: A function string, i.e. "max" or a function itself, i.e. :func:`np.max`. This function must be \
        able to take a 1D array of the given dimension as an input and produce a single value as an output. This \
        single value will become the value of each cell in the array.
        :param dim: A dimension to calculate on.
        :return: A 2D numpy array where the value of each cell is the result of the passed function.
        """
        bin_summary = self.cells.agg({dim: func}, **kwargs).reset_index()
        array = np.full((self.m, self.n), np.nan)
        array[bin_summary["bins_y"], bin_summary["bins_x"]] = bin_summary[dim]
        return Raster(array, self)

    @property
    def empty_cells(self):
        """
        Retrieves the cells with no returns in self.data

        return: An N x 2 numpy array where each row cooresponds to the [y x] coordinate of the empty cell.
        """
        array = self.raster("count", "z").array
        emptys = np.argwhere(np.isnan(array))

        return emptys

    def interpolate(self, func, dim, interp_method="nearest"):
        """
        Interpolates missing cells in the grid. This function uses scipy.griddata as a backend. Please see \
        documentation for that function for more details.

        :param func: The function (or function string) to calculate an array on the gridded data.
        :param dim: The dimension (i.e. column name of self.cells) to cast func onto.
        :param interp_method: The interpolation method call for scipy.griddata, one of any: "nearest", "cubic", \
        "linear"

        :return: An interpolated array.
        """
        from scipy.interpolate import griddata

        # Get points and values that we already have
        cell_values = self.cells[dim].agg(func).reset_index()

        points = cell_values[["bins_x", "bins_y"]].values
        values = cell_values[dim].values
        X, Y = np.mgrid[1 : self.n + 1, 1 : self.m + 1]

        # TODO generally a slow approach
        interp_grid = griddata(points, values, (X, Y), method=interp_method).T

        return Raster(interp_grid, self)

    def metrics(self, func_dict, as_raster=False):
        """
        Calculates summary statistics for each grid cell in the Grid.

        :param func_dict: A dictionary containing keys corresponding to the columns of self.data and values that \
        correspond to the functions to be  called on those columns.
        :return: A pandas dataframe with the aggregated metrics.
        """

        # Aggregate on the function
        aggregate = self.cells.agg(func_dict)
        if as_raster == False:
            return aggregate
        else:
            rasters = []
            for column in aggregate:
                array = np.asarray(
                    aggregate[column].reset_index().pivot("bins_y", "bins_x")
                )
                raster = Raster(array, self)
                rasters.append(raster)
            # Get list of dimension names
            dims = [tup[0] for tup in list(aggregate)]
            # Get list of metric names
            metrics = [tup[1] for tup in list(aggregate)]
            return pd.DataFrame(
                {"dim": dims, "metric": metrics, "raster": rasters}
            ).set_index(["dim", "metric"])

    def standard_metrics(self, heightbreak=0):
        return pyfor.metrics.standard_metrics_grid(self, heightbreak=heightbreak)


class ImportedGrid(Grid):
    """
    ImportedGrid is used to normalize a parent cloud object with an arbitrary raster file.
    """

    def __init__(self, path, cloud):
        import rasterio

        self.in_raster = rasterio.open(path)

        # Check cell size
        cell_size_x, cell_size_y = (
            self.in_raster.transform[0],
            abs(self.in_raster.transform[4]),
        )
        if cell_size_x != cell_size_y:
            print("Cell sizes not equal of input raster, not supported.")
            raise ValueError
        else:
            cell_size = cell_size_x

        self.cloud = cloud
        self.cell_size = cell_size

        min_x, max_x = self.in_raster.bounds[0], self.in_raster.bounds[2]
        min_y, max_y = self.in_raster.bounds[1], self.in_raster.bounds[3]

        self.m = self.in_raster.height
        self.n = self.in_raster.width

        # Create bins
        bins_x = np.searchsorted(
            np.linspace(min_x, max_x, self.n), self.cloud.data.points["x"]
        )
        bins_y = np.searchsorted(
            np.linspace(min_y, max_y, self.m), self.cloud.data.points["y"]
        )

        self.cloud.data.points["bins_x"] = bins_x
        self.cloud.data.points["bins_y"] = bins_y
        self.cells = self.cloud.data.points.groupby(["bins_x", "bins_y"])

    def _update(self):
        self.cloud.data._update()


class Raster:
    def __init__(self, array, grid):
        from rasterio.transform import from_origin

        self.grid = grid
        self.cell_size = self.grid.cell_size

        self.array = array
        self._affine = from_origin(
            self.grid.cloud.data.min[0],
            self.grid.cloud.data.max[1],
            self.grid.cell_size,
            self.grid.cell_size,
        )

    @classmethod
    def from_rasterio(cls):
        pass

    def force_extent(self, bbox):
        """
        Sets `self._affine` and `self.array` to a forced bounding box. Useful for trimming edges off of rasters when
        processing buffered tiles. This operation is done in place.

        :param bbox: Coordinates of output raster as a tuple (min_x, max_x, min_y, max_y)
        """
        from rasterio.transform import from_origin

        new_left, new_right, new_bot, new_top = bbox

        m, n = self.array.shape[0], self.array.shape[1]

        # Maniupulate the array to fit the new affine transformation
        old_left, old_top = self.grid.cloud.data.min[0], self.grid.cloud.data.max[1]
        old_right, old_bot = (
            old_left + n * self.grid.cell_size,
            old_top - m * self.grid.cell_size,
        )

        left_diff, top_diff, right_diff, bot_diff = (
            old_left - new_left,
            old_top - new_top,
            old_right - new_right,
            old_bot - new_bot,
        )
        left_diff, top_diff, right_diff, bot_diff = (
            int(np.rint(left_diff / self.cell_size)),
            int(np.rint(top_diff / self.cell_size)),
            int(np.rint(right_diff / self.cell_size)),
            int(np.rint(bot_diff / self.cell_size)),
        )

        if left_diff > 0:
            # bbox left is outside of raster left, we need to add columns of nans
            emptys = np.empty((m, left_diff))
            emptys[:] = np.nan
            self.array = np.insert(self.array, 0, np.transpose(emptys), axis=1)
        elif left_diff != 0:
            # bbox left is inside of raster left, we need to remove left diff columns
            self.array = self.array[:, abs(left_diff) :]

        if top_diff < 0:
            # bbox top is outside of raster top, we need to add rows of nans
            emptys = np.empty((abs(top_diff), self.array.shape[1]))
            emptys[:] = np.nan
            self.array = np.insert(self.array, 0, emptys, axis=0)
        elif top_diff != 0:
            # bbox top is inside of raster top, we need to remove rows of nans
            self.array = self.array[abs(top_diff) :, :]

        if right_diff < 0:
            # bbox right is outside of raster right, we need to add columns of nans
            emptys = np.empty((self.array.shape[0], abs(right_diff)))
            emptys[:] = np.nan
            self.array = np.append(self.array, emptys, axis=1)
        elif right_diff != 0:
            # bbox right is inside raster right, we need to remove columns
            self.array = self.array[:, :-right_diff]

        if bot_diff > 0:
            # bbox bottom is outside of raster bottom, we need to add rows of nans
            emptys = np.empty((abs(bot_diff), self.array.shape[1]))
            emptys[:] = np.nan
            self.array = np.append(self.array, emptys, axis=0)
        elif bot_diff != 0:
            # bbox bottom is inside of raster bottom, we need to remove columns
            self.array = self.array[:bot_diff, :]

        # Handle the affine transformation
        new_affine = from_origin(
            new_left, new_top, self.grid.cell_size, self.grid.cell_size
        )
        self._affine = new_affine

    def plot(self, cmap="viridis", block=False, return_plot=False):
        """
        Default plotting method for the Raster object.
        """

        # TODO implement cmap
        fig = plt.figure()
        ax = fig.add_subplot(111)
        caz = ax.matshow(self.array)
        fig.colorbar(caz)
        ax.xaxis.tick_bottom()
        ax.set_xticks(np.linspace(0, self.grid.n, 3))
        ax.set_yticks(np.flip(np.linspace(0, self.grid.m, 3)))

        x_ticks, y_ticks = (
            np.rint(
                np.linspace(self.grid.cloud.data.min[0], self.grid.cloud.data.max[0], 3)
            ),
            np.rint(
                np.linspace(self.grid.cloud.data.min[1], self.grid.cloud.data.max[1], 3)
            ),
        )

        ax.set_xticklabels(x_ticks)
        ax.set_yticklabels(y_ticks)

        if return_plot == True:
            return ax

        else:
            plt.show(block=block)

    def pit_filter(self, kernel_size):
        """
        Filters pits in the raster. Intended for use with canopy height models (i.e. grid(0.5).interpolate("max", "z").
        This function modifies the raster array **in place**.
        
        :param kernel_size: The size of the kernel window to pass over the array. For example 3 -> 3x3 kernel window.
        """
        from scipy.signal import medfilt2d

        self.array = medfilt2d(self.array, kernel_size=kernel_size)

    def write(self, path):
        """
        Writes the raster to a geotiff. Requires the Cloud.crs attribute to be filled by a projection string (ideally \
        wkt or proj4).
        
        :param path: The path to write to.
        """

        if not self.grid.cloud.crs:
            from warnings import warn

            warn(
                "No coordinate reference system defined. Please set the .crs attribute of the Cloud object.",
                UserWarning,
            )

        gisexport.array_to_raster(self.array, self._affine, self.grid.cloud.crs, path)