''' Utilities and convenience functions for manipulating spatial data and spectral data cubes. Contains: * `as_array()` * `as_mask()` * `as_raster()` * `array_to_raster()` * `array_to_raster_clone()` * `binary_mask()` * `cfmask()` * `combine_dicts()` * `combine_masks()` * `composite()` * `copy_nodata()` * `density_slice()` * `dump_raster()` * `fill_nan_bandwise()` * `fill_nodata_bandwise()` * `get_coord_transform()` * `intersect_rasters()` * `mae()` * `mask_ledaps_qa()` * `mask_saturation()` * `partition()` * `pixel_to_geojson()` * `pixel_to_xy()` * `rmse()` * `spectra_at_idx()` * `spectra_at_xy()` * `stack_hdf_as_array()` * `subarray()` * `xy_to_pixel()` ''' import json import os import re import numpy as np from concurrent.futures import ProcessPoolExecutor from functools import partial from osgeo import gdal, gdalconst, gdal_array, gdalnumeric, ogr, osr def as_array(path, band_axis=True): ''' A convenience function for opening a raster as an array and accessing its spatial information; takes a single string argument. Arguments: path The path of the raster file to open as an array band_axis True to include a band axis, even for single-band rasters ''' ds = gdal.Open(path) arr = ds.ReadAsArray() gt = ds.GetGeoTransform() wkt = ds.GetProjection() ds = None # Make sure that single-band rasters have a band axis if band_axis and len(arr.shape) < 3: shp = arr.shape arr = arr.reshape((1, shp[0], shp[1])) return (arr, gt, wkt) def as_mask(path, nodata=-9999): ''' Converts all non-zero values in all bands to ones. Arguments: path The path of the raster file to open as a mask array nodata The NoData value; these areas not masked ''' rast, gt, wkt = as_array(path) # Create a baseline raster base = np.empty((1, rast.shape[-2], rast.shape[-1])) base.fill(False) # Case of multiband raster if rast.ndim == 3: # Update the mask for nonzero values in any band for i in range(rast.shape[0]): np.logical_or(base, (rast[i,...].ravel() > 0).reshape(rast[i,...].shape), out=base) # Repeat the value of one (1) across the bands np.place(rast, base.repeat(rast.shape[0], axis=0), (1,)) elif rast.ndim == 2: # Create a single band (dim-3 array) rast = rast.reshape((1, rast.shape[-2], rast.shape[-1])) # Update the mask for nonzero values in any band np.logical_or(base, (rast.ravel() > 0).reshape(rast.shape), out=base) # Repeat the value of one (1) across the bands np.place(rast, base, (1,)) else: raise ValueError('Number of array dimensions must be 2 or 3') # Replace the NoData values rast[rast == nodata] = 0 return (rast, gt, wkt) def as_raster(path): ''' A convenience function for opening a raster and accessing its spatial information; takes a single string argument. Arguments: path The path of the raster file to open as a gdal.Dataset ''' ds = gdal.Open(path) gt = ds.GetGeoTransform() wkt = ds.GetProjection() return (ds, gt, wkt) def array_to_raster(a, gt, wkt, xoff=None, yoff=None, dtype=None): ''' Creates a raster from a given array, with optional x- and y-offsets if the array was clipped. Arguments: a A NumPy array gt A GDAL GeoTransform tuple wkt Well-Known Text projection xoff The offset in the x-direction; should be provided when clipped yoff The offset in the y-direction; should be provided when clipped dtype The data type to coerce on the array ''' if dtype is not None: a = a.astype(dtype) try: rast = gdal_array.OpenNumPyArray(a) except AttributeError: # For backwards compatibility with older version of GDAL rast = gdal.Open(gdalnumeric.GetArrayFilename(a)) except: rast = gdal_array.OpenArray(a) rast.SetGeoTransform(gt) rast.SetProjection(wkt) if xoff is not None and yoff is not None: # Bit of a hack; essentially, re-create the raster but with the # correct X and Y offsets (don't know how to do this without the # use of CopyDatasetInfo()) return array_to_raster_clone(a, rast, xoff, yoff) return rast def array_to_raster_clone(a, proto, xoff=None, yoff=None): ''' Creates a raster from a given array and a prototype raster dataset, with optional x- and y-offsets if the array was clipped. Arguments: a A NumPy array proto A prototype dataset xoff The offset in the x-direction; should be provided when clipped yoff The offset in the y-direction; should be provided when clipped ''' try: rast = gdal_array.OpenNumPyArray(a) except AttributeError: # For backwards compatibility with older version of GDAL rast = gdal.Open(gdalnumeric.GetArrayFilename(a)) except: rast = gdal_array.OpenArray(a) kwargs = dict() if xoff is not None and yoff is not None: kwargs = dict(xoff=xoff, yoff=yoff) # Copy the projection info and metadata from a prototype dataset if type(proto) == str: proto = gdal.Open(proto) gdalnumeric.CopyDatasetInfo(proto, rast, **kwargs) return rast def binary_mask(rast, mask, nodata=-9999, invert=False): ''' Applies an arbitrary, binary mask (data in [0,1]) where pixels with a value of 1 are pixels to be masked out. Arguments: rast A gdal.Dataset or a NumPy array mask A gdal.Dataset or a NumPy array nodata The NoData value; defaults to -9999. invert Invert the mask? (tranpose meaning of 0 and 1); defaults to False. ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rastr = rast.ReadAsArray() else: rastr = rast.copy() if not isinstance(mask, np.ndarray): maskr = mask.ReadAsArray() else: maskr = mask.copy() if not np.alltrue(np.equal(rastr.shape[-2:], maskr.shape[-2:])): raise ValueError('Raster and mask do not have the same shape') # Convert Boolean arrays to ones and zeros if maskr.dtype == bool: maskr = maskr.astype(np.int0) # Transform into a "1-band" array and apply the mask if maskr.shape != rastr.shape: maskr = maskr.reshape((1, maskr.shape[-2], maskr.shape[-1]))\ .repeat(rastr.shape[0], axis=0) # Copy the mask across the "bands" # TODO Compare to place(), e.g., # np.place(rastr, mask.repeat(rastr.shape[0], axis=0), (nodata,)) # Mask out areas that match the mask (==1) if invert: rastr[maskr < 1] = nodata else: rastr[maskr > 0] = nodata return rastr def cfmask(mask, mask_values=(1,2,3,4,255), nodata=-9999): ''' Returns a binary mask according to the CFMask algorithm results for the image; mask has True for water, cloud, shadow, and snow (if any) and False everywhere else. More information can be found: https://landsat.usgs.gov/landsat-surface-reflectance-quality-assessment Landsat 4-7 Pre-Collection pixel_qa values to be masked: mask_values = (1, 2, 3, 4) Landsat 4-7 Collection 1 pixel_qa values to be masked (for "Medium" confidence): mask_values = (1, 68, 72, 80, 112, 132, 136, 144, 160, 176, 224) Landsat 8 Collection 1 pixel_qa values to be masked (for "Medium" confidence): mask_values = (1, 324, 328, 386, 388, 392, 400, 416, 432, 480, 832, 836, 840, 848, 864, 880, 900, 904, 912, 928, 944, 992, 1024) Arguments: mask A gdal.Dataset or a NumPy array mask_path The path to an EOS HDF4 CFMask raster mask_values The values in the mask that correspond to NoData pixels nodata The NoData value; defaults to -9999. ''' if not isinstance(mask, np.ndarray): maskr = mask.ReadAsArray() else: maskr = mask.copy() # Mask according to bit-packing described here: # https://landsat.usgs.gov/landsat-surface-reflectance-quality-assessment maskr = np.in1d(maskr.reshape((maskr.shape[0] * maskr.shape[1])), mask_values)\ .reshape((1, maskr.shape[0], maskr.shape[1])).astype(np.int0) return maskr def clean_mask(rast): ''' Clips the values in a mask to the interval [0, 1]; values larger than 1 become 1 and values smaller than 0 become 0. Arguments: rast An input gdal.Dataset or numpy.array instance ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rastr = rast.ReadAsArray() else: rastr = rast.copy() return np.clip(rastr, a_min=0, a_max=1) def combine_dicts(*dicts): ''' Combines two dictionaries that have lists as values. Arguments: dicts Two or more dictionaries with lists as values ''' d = dict() keys = set() for a_dict in dicts: keys = keys.union(a_dict.keys()) # Iterate through each input dictionary for key in keys: for a_dict in dicts: # Skip keys not used in this dictionary if key not in a_dict.keys(): continue if key not in d.keys(): # Create missing keys in the output dictionary d[key] = list() d[key].extend(a_dict[key]) return d def combine_masks(*masks, multiply=False): ''' All masks must have the same shape. When multiply=False, the combined mask takes on a value of 1 where any individual mask has 1; this produces a "greedy" mask. When multiply=True, the combined mask takes on a value of 0 where any individual mask has 0 (by multiplying individual values); this produces a "conservative" mask. If 1 is "bad" data and 0 is "good" data, this means the greedy mask would mask only those pixels that are "good" in all images, whereas the conservative mask would maks only those pixels that are "bad" in all images. Arguments: [masks ...] Any number of numpy.ndarrays, all of the same shape multiply True to create a superposition of zeros (by multiplying masks together) ''' base = np.zeros(masks[0].shape) if multiply: base = np.ones(masks[0].shape) for mask in masks: # Protect against someone, e.g., including True as a mask assert type(mask) == np.ndarray, 'Expected a numpy.ndarray type' # TODO Do this outside for loop? if multiply: base = np.multiply(base, mask) # TODO This takes up too much memory; try adding the masks together? else: base = np.where(mask > 0, mask, base) return base def composite( *rasters, target_band=1, reducer='median', nodata=-9999.0, dtype=np.float32, processes=1): ''' Composites multiple rasters in a single band. Compared to composite2() and composite3(), this function is extremely fast but does not support normalization and will not composite more than one band at a time. Arguments: rasters The input rasters to composite target_band The index of the band to composite reducer The name of the reducer function, either: "median", "min", "max", or "mean" nodata The NoData value to ignore in compositing dtype The data type to enforce in the output processes (Optional) Number of processes to use ''' if reducer not in ('median', 'min', 'max', 'mean'): raise ValueError('Invalid reducer name') shp = rasters[0].shape assert all(map(lambda x: x == shp, [r.shape for r in rasters])), 'Rasters must have the same shape' # For single-band rasters... if rasters[0].ndim < 3: shp = (1, shp[0], shp[1]) rasters = list(map(lambda r: r.reshape(shp), rasters)) # Stack the rasters in a continuous, single-band "tapestry" using # vstack(), then cut out the rasters concatenated in this way into # separate bands using reshape() b = target_band stack = np.vstack(map(lambda r: r[b,...], rasters))\ .reshape((len(rasters), shp[1] * shp[2])) # Insert nan into NoData locations stack = np.where(stack == nodata, np.nan, stack) reducer_func = partial(getattr(np, 'nan%s' % reducer), axis = 0) # Get index ranges for each process to work on work = partition(stack, num_processes = processes, axis = 1) with ProcessPoolExecutor(max_workers = processes) as executor: all_results = executor.map( # Work done pixel-wise reducer_func, [stack[:,i:j] for i, j in work]) # Stack each reduced band (and reshape to multi-band image) result = np.concatenate(list(all_results), axis = 0)\ .reshape((1, shp[1], shp[2])) return np.where(np.isnan(result), nodata, result) def composite2( reducers, *rasters, normalize='sum', nodata=-9999.0, dtype=np.float32): ''' NOTE: Uses masked arrays in NumPy and therefore is MUCH slower than the `composite3()` function, which is equivalent in output. Creates a multi-image (multi-date) composite from input rasters. The reducers argument specifies, in the order of the bands (endmembers), how to pick a value for that band in each pixel. If None is given, then the median value of that band from across the images is used for that pixel value. If None is specified as a reducer, the corresponding band(s) will be dropped. Combining None reducer(s) with a normalized sum effectively subtracts an endmember under the unity constraint. Arguments: reducers One of ('min', 'max', 'mean', 'median', None) for each endmember rasters One or more raster files to composite normalize True (by default) to normalize results by their sum nodata The NoData value (defaults to -9999) dtype The data type to coerce in the output array; very important if the desired output is float but NoData value is integer ''' shp = rasters[0].shape num_non_null_bands = shp[0] - len([b for b in reducers if b is None]) assert all(map(lambda x: x == shp, [r.shape for r in rasters])), 'Rasters must have the same shape' assert len(reducers) == shp[0], 'Must provide a reducer for each band (including None to drop the band)' # Swap the sequence of rasters for a sequence of bands, then collapse the X-Y axes stack = np.array(rasters).swapaxes(0, 1).reshape( shp[0], len(rasters), shp[-1]*shp[-2]) # Mask out NoData values stack_masked = np.ma.masked_where(stack == nodata, stack) # For each band (or endmember)... band_arrays = [] for i in range(shp[0]): if reducers[i] in ('min', 'max', 'median', 'mean'): band_arrays.append(getattr(np.ma, reducers[i])(stack_masked[i, ...], axis=0)) # Stack each reduced band (and reshape to multi-band image) final_stack = np.ma.vstack(band_arrays).reshape( (num_non_null_bands, shp[-2], shp[-1])) # Calculate a normalized sum (e.g., fractions must sum to one) if normalize is not None: constant = getattr(final_stack, normalize)(axis=0) # The sum across the bands constant.set_fill_value(1.0) # NaNs will be divided by 1.0 constant = np.ma.repeat(constant, num_non_null_bands, axis=0).reshape(final_stack.shape) # Divide the values in each band by the normalized sum across the bands if num_non_null_bands > 1: final_stack = final_stack / constant.swapaxes(0, 1) else: final_stack = final_stack / constant # NOTE: Essential to cast type, e.g., to float in case first pixel (i.e. top-left) is all NoData of an integer type final_stack.set_fill_value(dtype(nodata)) # Fill NoData for NaNs return final_stack.filled() def composite3( reducers, *rasters, normalize='sum', nodata=-9999.0, dtype=np.float32): ''' Creates a multi-image (multi-date) composite from input rasters. The reducers argument specifies, in the order of the bands (endmembers), how to pick a value for that band in each pixel. If None is given, then the median value of that band from across the images is used for that pixel value. Note that 'min,' 'max,' or 'mean' are each considerably faster than median or the starred versions because these methods are available on the NumPy array and do not require `apply_along_axis()` to be called. The median, 'min*' and 'max*' and 'mean*' reducers will remove NoData values before calculation. If None is specified as a reducer, the corresponding band(s) will be dropped. Combining None reducer(s) with a normalized sum effectively subtracts an endmember under the unity constraint. Arguments: reducers One of ('min', 'max', 'mean', 'min*', 'max*', 'mean*', 'median', None) for each endmember rasters One or more raster files to composite normalize True (by default) to normalize results by their sum nodata The NoData value (defaults to -9999) dtype The data type to coerce in the output array; very important if the desired output is float but NoData value is integer ''' shp = rasters[0].shape assert all(map(lambda x: x == shp, [r.shape for r in rasters])), 'Rasters must have the same shape' assert len(reducers) == shp[0] or len(reducers) == len(shp) - 1, 'Must provide a reducer for each endmember (including None for a median reduction)' # For single-band rasters... if len(shp) < 3: shp = (1, shp[0], shp[1]) rasters = list(map(lambda r: r.reshape(shp), rasters)) # For each band (or endmember)... num_non_null_bands = shp[0] - len([b for b in reducers if b is None]) band_arrays = [] for i in range(shp[0]): # Stack the rasters in a continuous, single-band "tapestry" using vstack(), then cut out the rasters concatenated in this way into separate bands using reshape() stack = np.vstack(map(lambda r: r[i,...], rasters))\ .reshape((len(rasters), shp[1], shp[2])) # Get the reducing function for this band and apply across images if reducers[i] in ('min', 'max', 'mean'): band_arrays.append(getattr(stack, reducers[i])(axis=0)) continue # Do not apply the more complex reducer_func below elif reducers[i] in ('min*', 'max*', 'mean*', 'median'): reducer_func_name = reducers[i].strip('*') # For None, skip this band (effectively subtracting it in a normalized sum) else: continue # If not all values are NoData, remove the NoData values before # calculating the median; otherwise, the answer is NoData reducer_func = lambda xy: getattr(np, reducer_func_name)([ a for a in xy if a != nodata ]) if not np.where(xy == nodata, True, False).all() else dtype(nodata) # NOTE: Essential to cast type, e.g., to float in case first pixel (i.e. top-left) is all NoData of an integer type band_arrays.append(np.apply_along_axis(reducer_func, 0, stack)) # Stack each reduced band (and reshape to multi-band image) final_stack = np.vstack(band_arrays).reshape( (num_non_null_bands, shp[-2], shp[-1])) # Calculate a normalized sum/ mean/ etc. (e.g., fractions must sum to one) if normalize is not None: # Again, essential to call dtype() to avoid erroneous type coercion normal_func = lambda x: x / getattr(x, normalize)() if not np.where(x == nodata, True, False).all() else np.repeat(dtype(nodata), num_non_null_bands) return np.apply_along_axis(normal_func, 0, final_stack) return final_stack def copy_nodata(source, target, nodata=-9999): ''' Copies the NoData values from a source raster or raster array into a target raster or raster array. That is, source's NoData values are embedded in target. This is useful, for instance, when you want to mask out dropped scanlines in a Landsat 7 image; these areas are NoData in the EOS HDF but they are not included in the QA mask. Arguments: source A gdal.Dataset or a NumPy array target A gdal.Dataset or a NumPy array nodata The NoData value to look for (and embed) ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(source, np.ndarray): source = source.ReadAsArray() if not isinstance(target, np.ndarray): target = target.ReadAsArray() else: target = target.copy() assert source.ndim == target.ndim, "Source and target rasters must have the same number of axes" if source.ndim == 3: assert source.shape[1:] == target.shape[1:], "Source and target rasters must have the same shape (not including band axis)" return np.where(source[0,...] == nodata, nodata, target) else: assert source.shape == target.shape, "Source and target rasters must have the same shape" return np.where(source == nodata, nodata, target) def density_slice(rast, rel=np.less_equal, threshold=1000, nodata=-9999): ''' Returns a density slice from a given raster. Arguments: rast A gdal.Dataset or a NumPy array rel A NumPy logic function; defaults to np.less_equal threshold An integer number ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rastr = rast.ReadAsArray() else: rastr = rast.copy() if (len(rastr.shape) > 2 and min(rastr.shape) > 1): raise ValueError('Expected a single-band raster array') return np.logical_and( rel(rastr, np.ones(rast.shape) * threshold), np.not_equal(rastr, np.ones(rast.shape) * nodata)).astype(np.int0) def dump_raster(rast, rast_path, driver='GTiff', gdt=None, nodata=None): ''' Creates a raster file from a given gdal.Dataset instance. Arguments: rast A gdal.Dataset; does NOT accept NumPy array rast_path The path of the output raster file driver The name of the GDAL driver to use (determines file type) gdt The GDAL data type to use, e.g., see gdal.GDT_Float32 nodata The NoData value; defaults to -9999. ''' if gdt is None: gdt = rast.GetRasterBand(1).DataType driver = gdal.GetDriverByName(driver) sink = driver.Create( rast_path, rast.RasterXSize, rast.RasterYSize, rast.RasterCount, int(gdt)) assert sink is not None, 'Cannot create dataset; there may be a problem with the output path you specified' sink.SetGeoTransform(rast.GetGeoTransform()) sink.SetProjection(rast.GetProjection()) for b in range(1, rast.RasterCount + 1): dat = rast.GetRasterBand(b).ReadAsArray() sink.GetRasterBand(b).WriteArray(dat) sink.GetRasterBand(b).SetStatistics(*map(np.float64, [dat.min(), dat.max(), dat.mean(), dat.std()])) if nodata is None: nodata = rast.GetRasterBand(b).GetNoDataValue() if nodata is None: nodata = -9999 sink.GetRasterBand(b).SetNoDataValue(np.float64(nodata)) sink.FlushCache() def fill_nodata_bandwise(arr, fill_values=None, nodata=-9999): ''' Fill NoData values in a raster array with the column means (default) or a user-specified vector of values. Arguments: arr The raster array with NoData values to fill fill_values A user-specified vector of values, one for each band nodata The NoData value to fill ''' arr2 = np.where(arr == -9999, np.nan, arr) return fill_nan_bandwise(arr2, fill_values = fill_values) def fill_nan_bandwise(arr, fill_values=None): ''' Fill np.nan values in a raster array with the column means (default) or a user-specified vector of values. Arguments: arr The raster array with np.nan values to fill fill_values A user-specified vector of values, one for each band ''' # Assumes band axis is band 0 shp = arr.shape arr2 = arr.copy() if arr.ndim > 2: arr2 = arr.reshape((shp[0], shp[1] * shp[2])) if fill_values is not None: assert len(fill_values) == shp[0], 'If specifying a vector of fill values, the length must equal the number of bands' fill_values = np.array(fill_values).reshape((shp[0],)) else: # Get band means fill_values = np.nanmean(arr2, axis = 1) # Get indices of nans idx = np.where(np.isnan(arr2)) # Place column means in the indices, arrays aligned with take() arr2[idx] = np.take(fill_values, idx[0]) return arr2.reshape(shp) def get_coord_transform(source_epsg, target_epsg): ''' Creates an OGR-framework coordinate transformation for use in projecting coordinates to a new coordinate reference system (CRS). Used as, e.g.: transform = get_coord_transform(source_epsg, target_epsg) transform.TransformPoint(x, y) Arguments: source_epsg The EPSG code for the source CRS target_epsg The EPSG code for the target CRS ''' # Develop a coordinate transformation, if desired transform = None source_ref = osr.SpatialReference() target_ref = osr.SpatialReference() source_ref.ImportFromEPSG(source_epsg) target_ref.ImportFromEPSG(target_epsg) return osr.CoordinateTransformation(source_ref, target_ref) def intersect_rasters( ref_rast_defn, src_rast_defn, nodata=-9999, gdt=gdalconst.GDT_Int32): ''' Projects the source raster so that its top-left corner is aligned with the reference raster. Then, clips or pads the source raster so that it has the same number of rows and columns (covers the same extent at the same grid resolution) as the reference raster. Arguments: ref_rast_defn A (raster, gt, wkt) tuple for the reference raster src_rast_defn A (raster, gt, wkt) tuple for the source raster; the raster to be changed nodata The NoData value to fill in where the reference raster is larger gdt The GDAL data type to use for the output raster NOTE: If the reference raster's top-left corner is far left and/or above that of the source raster, the interesect raster may contain no data from the original raster, i.e., an empty raster will result. ''' rast_ref, gt, wkt = ref_rast_defn rast_src, gt0, wkt0 = src_rast_defn # Create a new raster with the desired attributes width, height = (rast_ref.RasterXSize, rast_ref.RasterYSize) width0, height0 = (rast_src.RasterXSize, rast_src.RasterYSize) rast_out = gdal.GetDriverByName('MEM').Create('temp.file', width, height, rast_src.RasterCount, gdt) # Initialize and set the NoData value for i in range(1, rast_src.RasterCount + 1): b = rast_out.GetRasterBand(i) b.Fill(nodata) b.SetNoDataValue(nodata) # Set the desired geotransform, and projection rast_out.SetGeoTransform(gt) rast_out.SetProjection(wkt) # Re-project the source image; now the top-left corners are aligned gdal.ReprojectImage(rast_src, rast_out, wkt0, wkt, gdalconst.GRA_Bilinear) arr = rast_out.ReadAsArray() del rast_src # Delete original raster references del rast_ref del rast_out # Clip the extent of the src image if ref is smaller if arr.ndim > 2: ch, cw = arr.shape[1:] else: ch, cw = arr.shape if (width <= width0): cw = width # Clip src to ref extents if (height <= height0): ch = height if arr.ndim > 2: arr_out = arr[:,0:ch,0:cw] else: arr_out = arr[0:ch,0:cw] return array_to_raster(arr_out, gt, wkt) def mae(reference, predictions, idx=None, n=1): ''' Mean absolute error (MAE) for (p x n) raster arrays, where p is the number of bands and n is the number of pixels. Arguments: reference Raster array of reference ("truth" or measured) data predictions Raster array of predictions idx Optional array of indices at which to sample the arrays n A normalizing constant for residuals; e.g., the number of endmembers when calculating RMSE for modeled reflectance ''' if idx is None: r = reference.shape[1] residuals = reference - predictions else: r = len(idx) residuals = reference[:, idx] - predictions[:, idx] # Divide the MSE by the number of bands before taking the root return np.apply_along_axis(lambda x: np.divide(np.abs(x).sum(), n), 0, residuals) def mask_by_query(rast, query, invert=False, nodata=-9999): ''' Mask pixels (across bands) that match a query in any one band or all bands. For example: `query = rast[1,...] < -25` queries those pixels with a value less than -25 in band 2; these pixels would be masked (if `invert=False`). By default, the pixels that are queried are masked, but if `invert=True`, the query serves to select pixels NOT to be masked (`np.invert()` can also be called on the query before calling this function to achieve the same effect). Arguments: rast A gdal.Dataset or numpy.array instance query A NumPy boolean array representing the result of a query invert True to invert the query nodata The NoData value to apply in the masking ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rastr = rast.ReadAsArray() else: rastr = rast.copy() shp = rastr.shape if query.shape != rastr.shape: assert len(query.shape) == 2 or len(query.shape) == len(shp), 'Query must either be 2-dimensional (single-band) or have a dimensionality equal to the raster array' assert shp[-2] == query.shape[-2] and shp[-1] == query.shape[-1], 'Raster and query must be conformable arrays in two dimensions (must have the same extent)' # Transform query into a 1-band array and then into a multi-band array query = query.reshape((1, shp[-2], shp[-1])).repeat(shp[0], axis=0) # Mask out areas that match the query if invert: rastr[np.invert(query)] = nodata else: rastr[query] = nodata return rastr def mask_ledaps_qa(rast, mask, nodata=-9999): ''' Applies a given LEDAPS QA mask to a raster. It's unclear how these bit-packed QA values ought to be converted back into 16-bit binary numbers: "{0:b}".format(42).zfill(16) # Convert binary to decimal padded left? "{0:b}".format(42).ljust(16, '0') # Or convert ... padded right? The temporary solution is to use the most common (modal) value as the "clear" pixel classification and discard everything else. We'd like to just discard pixels above a certain value knowing that everything above this threshold has a certain bit-packed QA meanining. For example, mask pixels with QA values greater than or equal to 12287: int("1000000000000000", 2) == 32768 # Maybe clouds int("0010000000000000", 2) == 12287 # Maybe cirrus Similarly, we'd like to discard pixels at or below 4, as these small binary numbers correspond to dropped frames, desginated fill values, and/or terrain occlusion. Arguments: rast A gdal.Dataset or a NumPy array mask A gdal.Dataset or a NumPy array ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rast = rast.ReadAsArray() else: rastr = rast.copy() if not isinstance(mask, np.ndarray): maskr = mask.ReadAsArray() else: maskr = mask.copy() # Since the QA output is so unreliable (e.g., clouds are called water), # we take the most common QA bit-packed value and assume it refers to # the "okay" pixels mode = np.argmax(np.bincount(maskr.ravel())) assert mode > 4 and mode < 12287, 'The modal value corresponds to a known error value' maskr[np.isnan(maskr)] = 0 maskr[maskr != mode] = 0 maskr[maskr == mode] = 1 # Transform into a "1-band" array and apply the mask maskr = maskr.reshape((1, maskr.shape[0], maskr.shape[1]))\ .repeat(rastr.shape[0], axis=0) # Copy the mask across the "bands" rastr[maskr == 0] = nodata return rastr def pixel_to_geojson(pixel_pairs, gt=None, wkt=None, path=None, indent=2): ''' This method translates given pixel locations into longitude-latitude locations on a given dataset. Assumes decimal degrees output. Arguments: pixel_pairs The pixel pairings to be translated in the form [[x1, y1],[x2, y2]] gt [Optional] A GDAL GeoTransform tuple wkt [Optional] Projection information as Well-Known Text path The file location of the GeoTIFF ''' coords = pixel_to_xy(pixel_pairs, gt=gt, wkt=wkt, path=path, dd=True) doc = { 'type': 'GeometryCollection', 'geometries': [] } for pair in coords: doc['geometries'].append({ 'type': 'Point', 'coordinates': pair }) return json.dumps(doc, sort_keys=False, indent=indent) def partition(array, num_processes, axis=0): ''' Creates index ranges for partitioning an array to work on over multiple processes. Arguments: array The 2-dimensional array to paritition num_processes The number of processes desired ''' N = array.shape[axis] P = (num_processes + 1) # Number of breaks (number of partitions + 1) # Break up the indices into (roughly) equal parts partitions = list(zip(np.linspace(0, N, P, dtype=int)[:-1], np.linspace(0, N, P, dtype=int)[1:])) # Final range of indices should end +1 past last index for completeness work = partitions[:-1] work.append((partitions[-1][0], partitions[-1][1] + 1)) return work def pixel_to_xy(pixel_pairs, gt=None, wkt=None, path=None, dd=False): ''' Modified from code by Zachary Bears (zacharybears.com/using-python-to- translate-latlon-locations-to-pixels-on-a-geotiff/). This method translates given pixel locations into longitude-latitude locations on a given GeoTIFF. Arguments: pixel_pairs The pixel pairings to be translated in the form [[x1, y1],[x2, y2]] gt [Optional] A GDAL GeoTransform tuple wkt [Optional] Projection information as Well-Known Text path The file location of the GeoTIFF dd True to use decimal degrees for longitude-latitude (False is the default) NOTE: This method does not take into account pixel size and assumes a high enough image resolution for pixel size to be insignificant. ''' assert path is not None or (gt is not None and wkt is not None), 'Function requires either a reference dataset or a geotransform and projection' pixel_pairs = map(list, pixel_pairs) srs = osr.SpatialReference() # Create a spatial ref. for dataset if path is not None: ds = gdal.Open(path) # Load the image dataset gt = ds.GetGeoTransform() # Get geotransform of the dataset if dd: srs.ImportFromWkt(ds.GetProjection()) # Set up coord. transform. else: srs.ImportFromWkt(wkt) # Will use decimal-degrees if so specified if dd: ct = osr.CoordinateTransformation(srs, srs.CloneGeogCS()) # Go through all the point pairs and translate them to pixel pairings xy_pairs = [] for point in pixel_pairs: # Translate the pixel pairs into untranslated points lon = point[0] * gt[1] + gt[0] lat = point[1] * gt[5] + gt[3] if dd: (lon, lat, holder) = ct.TransformPoint(lon, lat) # Points to space xy_pairs.append((lon, lat)) # Add the point to our return array return xy_pairs def rmse(reference, predictions, idx=None, n=1, nodata=-9999): ''' RMSE for (p x n) raster arrays, where p is the number of bands and n is the number of pixels. RMSE is calculated after Powell et al. (2007) in Remote Sensing of the Environment. Arguments: reference Raster array of reference ("truth" or measured) data predictions Raster array of predictions idx Optional array of indices at which to sample the arrays n A normalizing constant for residuals; e.g., the number of endmembers when calculating RMSE for modeled reflectance ''' shp = reference.shape if idx is None: residuals = (reference - predictions).reshape((1, shp[-2], shp[-1])) else: idx = idx[reference[:,idx][0,:] != nodata] # Strip out NoData areas if len(reference.shape) == 3: residuals = reference.reshape( (shp[0], shp[1]*shp[2]))[:, idx] - predictions.reshape( (shp[0], shp[1]*shp[2]))[:, idx] else: residuals = reference[:, idx] - predictions[:, idx] # Divide the MSE by the number of bands before taking the root return np.sqrt( np.apply_along_axis(lambda x: np.divide(np.square(x).sum(), n), 0, residuals)) def saturation_mask(rast, saturation_value=10000, nodata=-9999): ''' Returns a binary mask that has True for saturated values (e.g., surface reflectance values greater than 16,000, however, SR values are only considered valid on the range [0, 10,000]) and False everywhere else. Arguments: rast A gdal.Dataset or NumPy array saturation_value The value beyond which pixels are considered saturated nodata The NoData value; defaults to -9999. ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rastr = rast.ReadAsArray() else: rastr = rast.copy() # Create a baseline "nothing is saturated in any band" raster mask = np.empty((1, rastr.shape[1], rastr.shape[2])) mask.fill(False) # Update the mask for saturation in any band for i in range(rastr.shape[0]): np.logical_or(mask, rastr[i,...] > saturation_value, out=mask) return mask def spectra_at_idx(hsi_cube, idx): ''' Returns the spectral profile of the pixels indicated by the indices provided. NOTE: Assumes an HSI cube (transpose of a GDAL raster). Arguments: hsi_cube An HSI cube (n x m x p) idx An array of indices that specify one or more pixels in a raster ''' return np.array([hsi_cube[p[0],p[1],:] for p in idx]) def spectra_at_xy(rast, xy, gt=None, wkt=None, dd=False): ''' Returns the spectral profile of the pixels indicated by the longitude- latitude pairs provided. Arguments: rast A gdal.Dataset or NumPy array xy An array of X-Y (e.g., longitude-latitude) pairs gt A GDAL GeoTransform tuple; ignored for gdal.Dataset wkt Well-Known Text projection information; ignored for gdal.Dataset dd Interpret the longitude-latitude pairs as decimal degrees Returns a (q x p) array of q endmembers with p bands. ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): gt = rast.GetGeoTransform() wkt = rast.GetProjection() rast = rast.ReadAsArray() # You would think that transposing the matrix can't be as fast as # transposing the coordinate pairs, however, it is. return spectra_at_idx(rast.transpose(), xy_to_pixel(xy, gt=gt, wkt=wkt, dd=dd)) def stack_hdf_as_array(path, bands=(1,2,3,4,5,7), tpl=None): ''' Stacks the layers of an HDF4 file assuming it contains subdata from EOS imagery (i.e., contains the prefix "HDF4_EOS:EOS_GRID:"); returns a tuple of (numpy.array, tuple) where the second argument is a GDAL GeoTransform. Assumes that the GeoTransform for all layers is the same (for downstream clipping purposes). The output array will have shape (b, m, n) where b is the number of bands. Arguments: path The path to a raster to be opened as a gdal.Dataset bands A tuple of the band numbers to include tpl The template for an HDF4 subdataset path ''' if not os.path.exists(path): raise ValueError('File path not found') # Note: Template 'HDF4_EOS:EOS_GRID:"%s":Grid:band%d' is for LT5/LE7 # surface reflectance data if tpl is None: tpl = 'HDF4_EOS:EOS_GRID:"%s":Grid:sr_band%d' subdata = [tpl % (path, b) for b in bands] # Open the specified bands layers = [gdal.Open(n) for n in subdata] arr = np.array([l.GetRasterBand(1).ReadAsArray() for l in layers]) gt = layers[0].GetGeoTransform() wkt = layers[0].GetProjection() for l in layers: l = None # Close layers return (arr, gt, wkt) def subarray(rast, filtered_value=-9999, indices=False): ''' Given a (p x m x n) raster (or array), returns a (p x z) subarray where z is the number of cases (pixels) that do not contain the filtered value (in any band, in the case of a multi-band image). Arguments: rast The input gdal.Dataset or a NumPy array filtered_value The value to remove from the raster array indices If True, return a tuple: (indices, subarray) ''' # Can accept either a gdal.Dataset or numpy.array instance if not isinstance(rast, np.ndarray): rastr = rast.ReadAsArray() else: rastr = rast.copy() shp = rastr.shape if len(shp) == 1: # If already raveled return rastr[rastr != filtered_value] if len(shp) == 2 or shp[0] == 1: # If a "single-band" image arr = rastr.reshape(1, shp[-2]*shp[-1]) return arr[arr != filtered_value] # For multi-band images arr = rastr.reshape(shp[0], shp[1]*shp[2]) idx = (arr != filtered_value).any(axis=0) if indices: # Return the indices as well rast_shp = (shp[-2], shp[-1]) return (np.indices(rast_shp)[:,idx.reshape(rast_shp)], arr[:,idx]) return arr[:,idx] def xy_to_pixel(xy_pairs, gt=None, wkt=None, path=None, dd=False): ''' Modified from code by Zachary Bears (zacharybears.com/using-python-to- translate-latlon-locations-to-pixels-on-a-geotiff/). This method translates given longitude-latitude pairs into pixel locations on a given GeoTIFF. Arguments: xy_pairs The X-Y coordinate pairs (e.g., decimal lat/lon pairs) to be translated in the form [[lon1, lat1], [lon2, lat2]] gt [Optional] A GDAL GeoTransform tuple wkt [Optional] Projection information as Well-Known Text path The file location of the GeoTIFF dd True to use decimal degrees for longitude-latitude (False is the default) NOTE: This method does not take into account pixel size and assumes a high enough image resolution for pixel size to be insignificant. Essentially, xy_pairs assumed to be pixel centroids but may be assigned to the nearest neighbor of the intended pixel. For this reason, for relevant applications, target pixels should be embedded in a matrix of similar pixels, e.g., a 90-by-90 meter area, for Landsat pixels. ''' assert path is not None or (gt is not None and wkt is not None), \ 'Function requires either a reference dataset or a geotransform and projection' xy_pairs = map(list, xy_pairs) srs = osr.SpatialReference() # Create a spatial ref. for dataset if path is not None: ds = gdal.Open(path) # Load the image dataset gt = ds.GetGeoTransform() # Get geotransform of the dataset if dd: srs.ImportFromWkt(ds.GetProjection()) # Set up coord. transform. else: srs.ImportFromWkt(wkt) # Set up coord. transform. # Will use decimal-degrees if so specified if dd: ct = osr.CoordinateTransformation(srs.CloneGeogCS(), srs) # Go through all the point pairs and translate them to lng-lat pairs pixel_pairs = [] for point in xy_pairs: if dd: # Change the point locations into the GeoTransform space (point[0], point[1], holder) = ct.TransformPoint(point[0], point[1]) # Translate the x and y coordinates into pixel values x = (point[0] - gt[0]) / gt[1] y = (point[1] - gt[3]) / gt[5] pixel_pairs.append((int(x), int(y))) # Add point to our return array return pixel_pairs