""" GBDX Vector Services Interface. Contact: nate.ricklin@digitalglobe.com """ #from __future__ import absolute_import from string import Template from builtins import object import six from imageio import imsave import base64 from io import BytesIO import dask.array as da import requests from shapely.wkt import loads as load_wkt from collections import OrderedDict import json, time, os from shapely.ops import cascaded_union from shapely.geometry import shape, box, mapping from gbdxtools.vector_layers import VectorGeojsonLayer, VectorTileLayer, \ ImageLayer from gbdxtools.map_templates import BaseTemplate from gbdxtools.auth import Auth class Vectors(object): default_index = 'vector-gbdx-alpha-catalog-v2-*' def __init__(self, **kwargs): ''' Construct the Vectors interface class Returns: An instance of the Vectors interface class. ''' interface = Auth(**kwargs) self.gbdx_connection = interface.gbdx_connection self.logger = interface.logger self.query_url = 'https://vector.geobigdata.io/insight-vector/api/vectors/query/items' self.query_index_url = 'https://vector.geobigdata.io/insight-vector/api/index/query/%s/items' self.query_page_url = 'https://vector.geobigdata.io/insight-vector/api/vectors/query/paging' self.query_index_page_url = 'https://vector.geobigdata.io/insight-vector/api/index/query/%s/paging' self.page_url = 'https://vector.geobigdata.io/insight-vector/api/vectors/paging' self.get_url = 'https://vector.geobigdata.io/insight-vector/api/vector/%s/' self.create_url = 'https://vector.geobigdata.io/insight-vector/api/vectors' self.aggregations_url = 'https://vector.geobigdata.io/insight-vector/api/aggregation' self.aggregations_by_index_url = 'https://vector.geobigdata.io/insight-vector/api/index/aggregation/%s' def create(self, vectors, index=None): """ Create a vectors in the vector service. Args: vectors: A single geojson vector or a list of geojson vectors. Item_type and ingest_source are required. index (str): optional index to write to, defaults to 'vector-user-provided' Returns: (dict): key 'SuccessfulItemIds' is a list of succesfully created feature URLs key 'errorMessages' is a list of failed feature error messages Example: >>> vectors.create( ... { ... "type": "Feature", ... "geometry": { ... "type": "Point", ... "coordinates": [1.0,1.0] ... }, ... "properties": { ... "text" : "item text", ... "name" : "item name", ... "item_type" : "type", ... "ingest_source" : "source", ... "attributes" : { ... "latitude" : 1, ... "institute_founded" : "2015-07-17", ... "mascot" : "moth" ... } ... } ... } ... ) """ if type(vectors) is dict: vectors = [vectors] # validate they all have item_type and ingest_source in properties for vector in vectors: if not 'properties' in list(vector.keys()): raise Exception('Vector does not contain "properties" field.') if not 'item_type' in list(vector['properties'].keys()): raise Exception('Vector does not contain "item_type".') if not 'ingest_source' in list(vector['properties'].keys()): raise Exception('Vector does not contain "ingest_source".') url = self.create_url if index is not None: url = '%s/%s/' % (url, index) r = self.gbdx_connection.post(url, data=json.dumps(vectors)) r.raise_for_status() return r.json() def create_from_wkt(self, wkt, item_type, ingest_source, index=None, **attributes): ''' Create a single vector in the vector service Args: wkt (str): wkt representation of the geometry item_type (str): item_type of the vector ingest_source (str): source of the vector attributes: a set of key-value pairs of attributes index (str): optional index to write to, defaults to 'vector-user-provided' Returns: (str): feature ID ''' # verify the "depth" of the attributes is single layer geojson = load_wkt(wkt).__geo_interface__ vector = { 'type': "Feature", 'geometry': geojson, 'properties': { 'item_type': item_type, 'ingest_source': ingest_source, 'attributes': attributes } } results = self.create(vector, index=index) if len(results['errorMessages']) == 0: item = results['successfulItemIds'][0] return item.split('/')[-1] else: raise Exception(results['errorMessages'][0]) def get(self, ID, index='vector-web-s'): '''Retrieves a vector. Not usually necessary because searching is the best way to find & get stuff. Args: ID (str): ID of the vector object index (str): Optional. Index the object lives in. defaults to 'vector-web-s' Returns: record (dict): A dict object identical to the json representation of the catalog record ''' url = self.get_url % index r = self.gbdx_connection.get(url + ID) r.raise_for_status() return r.json() def query(self, searchAreaWkt, query, count=100, ttl='10s', index=default_index): ''' Perform a vector services query using the QUERY API (https://gbdxdocs.digitalglobe.com/docs/vs-query-list-vector-items-returns-default-fields) ElasticSearch spatial indexing has some slop in it and can return some features that are near to but not overlapping the search geometry. If you need precise overlapping of the search API you will need to run a geometric check on each result. If the caller requests more than 1000 records and it's possible that it will take longer than the default TTL value to pull a single page of 1000 records into memory, it's possible to raise the TTL duration by setting the 'ttl' parameter to something higher than the default of 10 seconds. For example, to set the TTL to 30 seconds, use '30s'. For one minute, use '1m'. Args: searchAreaWkt: WKT Polygon of area to search query: Elastic Search query count: Maximum number of results to return, default is 100 ttl: Amount of time for each temporary vector page to exist Returns: List of vector results ''' if count < 1000: # issue a single page query search_area_polygon = load_wkt(searchAreaWkt) geojson = json.dumps(mapping(search_area_polygon)) params = { "q": query, "count": min(count,1000), } url = self.query_index_url % index if index else self.query_url r = self.gbdx_connection.post(url, data=geojson, params=params) r.raise_for_status() return r.json() else: return list(self.query_iteratively(searchAreaWkt, query, count, ttl, index)) def query_iteratively(self, searchAreaWkt, query, count=100, ttl='10s', index=default_index): ''' Perform a vector services query using the QUERY API (https://gbdxdocs.digitalglobe.com/docs/vs-query-list-vector-items-returns-default-fields) If iterating through a page of results results in seeing duplicate records consistently, it's possible that the query context TTL is expiring before the page is finished being processed by the caller. In that case, it's possible to raise the TTL duration by setting the 'ttl' parameter to something higher than the default of 10 seconds. For example, to set the TTL to 30 seconds, use '30s'. For one minute, use '1m'. Args: searchAreaWkt: WKT Polygon of area to search query: Elastic Search query count: Maximum number of results to return ttl: Amount of time for each temporary vector page to exist Returns: generator of vector results ''' search_area_polygon = load_wkt(searchAreaWkt) geojson = json.dumps(mapping(search_area_polygon)) params = { "q": query, "count": min(count,1000), "ttl": ttl, } # initialize paging request url = self.query_index_page_url % index if index else self.query_page_url r = self.gbdx_connection.post(url, params=params, data=geojson) r.raise_for_status() page = r.json() paging_id = page['next_paging_id'] item_count = int(page['item_count']) data = page['data'] num_results = 0 for vector in data: num_results += 1 if num_results > count: break yield vector if num_results == count: return # get vectors from each page while paging_id and item_count > 0 and num_results < count: headers = {'Content-Type':'application/x-www-form-urlencoded'} data = { "pagingId": paging_id, "ttl": ttl } r = self.gbdx_connection.post(self.page_url, headers=headers, data=data) r.raise_for_status() page = r.json() paging_id = page['next_paging_id'] item_count = int(page['item_count']) data = page['data'] for vector in data: num_results += 1 if num_results > count: break yield vector def aggregate_query(self, searchAreaWkt, agg_def, query=None, start_date=None, end_date=None, count=10, index=default_index): """Aggregates results of a query into buckets defined by the 'agg_def' parameter. The aggregations are represented by dicts containing a 'name' key and a 'terms' key holding a list of the aggregation buckets. Each bucket element is a dict containing a 'term' key containing the term used for this bucket, a 'count' key containing the count of items that match this bucket, and an 'aggregations' key containing any child aggregations. Args: searchAreaWkt (str): wkt representation of the geometry agg_def (str or AggregationDef): the aggregation definitions query (str): a valid Elasticsearch query string to constrain the items going into the aggregation start_date (str): either an ISO-8601 date string or a 'now' expression (e.g. "now-6d" or just "now") end_date (str): either an ISO-8601 date string or a 'now' expression (e.g. "now-6d" or just "now") count (int): the number of buckets to include in the aggregations (the top N will be returned) index (str): the index (or alias or wildcard index expression) to run aggregations against, set to None for the entire set of vector indexes Returns: results (list): A (usually single-element) list of dict objects containing the aggregation results. """ geojson = load_wkt(searchAreaWkt).__geo_interface__ aggs_str = str(agg_def) # could be string or AggregationDef params = { "count": count, "aggs": aggs_str } if query: params['query'] = query if start_date: params['start_date'] = start_date if end_date: params['end_date'] = end_date url = self.aggregations_by_index_url % index if index else self.aggregations_url r = self.gbdx_connection.post(url, params=params, json=geojson) r.raise_for_status() return r.json(object_pairs_hook=OrderedDict)['aggregations'] def tilemap(self, query, styles={}, bbox=[-180,-90,180,90], zoom=16, api_key=os.environ.get('MAPBOX_API_KEY', None), image=None, image_bounds=None, index="vector-user-provided", name="GBDX_Task_Output", **kwargs): """ Renders a mapbox gl map from a vector service query """ try: from IPython.display import display except: print("IPython is required to produce maps.") return assert api_key is not None, "No Mapbox API Key found. You can either pass in a token or set the MAPBOX_API_KEY environment variable." wkt = box(*bbox).wkt features = self.query(wkt, query, index=index) union = cascaded_union([shape(f['geometry']) for f in features]) lon, lat = union.centroid.coords[0] url = 'https://vector.geobigdata.io/insight-vector/api/mvt/{z}/{x}/{y}?'; url += 'q={}&index={}'.format(query, index); if styles is not None and not isinstance(styles, list): styles = [styles] map_id = "map_{}".format(str(int(time.time()))) map_data = VectorTileLayer(url, source_name=name, styles=styles, **kwargs) image_layer = self._build_image_layer(image, image_bounds) template = BaseTemplate(map_id, **{ "lat": lat, "lon": lon, "zoom": zoom, "datasource": json.dumps(map_data.datasource), "layers": json.dumps(map_data.layers), "image_layer": image_layer, "mbkey": api_key, "token": self.gbdx_connection.access_token }) template.inject() def map(self, features=None, query=None, styles=None, bbox=[-180,-90,180,90], zoom=10, center=None, image=None, image_bounds=None, cmap='viridis', api_key=os.environ.get('MAPBOX_API_KEY', None), **kwargs): """ Renders a mapbox gl map from a vector service query or a list of geojson features Args: features (list): a list of geojson features query (str): a VectorServices query styles (list): a list of VectorStyles to apply to the features bbox (list): a bounding box to query for features ([minx, miny, maxx, maxy]) zoom (int): the initial zoom level of the map center (list): a list of [lat, lon] used to center the map api_key (str): a valid Mapbox API key image (dict): a CatalogImage or a ndarray image_bounds (list): a list of bounds for image positioning Use outside of GBDX Notebooks requires a MapBox API key, sign up for free at https://www.mapbox.com/pricing/ Pass the key using the `api_key` keyword or set an environmental variable called `MAPBOX API KEY` cmap (str): MatPlotLib colormap to use for rendering single band images (default: viridis) """ try: from IPython.display import display except: print("IPython is required to produce maps.") return assert api_key is not None, "No Mapbox API Key found. You can either pass in a key or set the MAPBOX_API_KEY environment variable. Use outside of GBDX Notebooks requires a MapBox API key, sign up for free at https://www.mapbox.com/pricing/" if features is None and query is not None: wkt = box(*bbox).wkt features = self.query(wkt, query, index=None) elif features is None and query is None and image is None: print('Must provide either a list of features or a query or an image') return if styles is not None and not isinstance(styles, list): styles = [styles] geojson = {"type":"FeatureCollection", "features": features} if center is None and features is not None: union = cascaded_union([shape(f['geometry']) for f in features]) lon, lat = union.centroid.coords[0] elif center is None and image is not None: try: lon, lat = shape(image).centroid.coords[0] except: lon, lat = box(*image_bounds).centroid.coords[0] else: lat, lon = center map_id = "map_{}".format(str(int(time.time()))) map_data = VectorGeojsonLayer(geojson, styles=styles, **kwargs) image_layer = self._build_image_layer(image, image_bounds, cmap) template = BaseTemplate(map_id, **{ "lat": lat, "lon": lon, "zoom": zoom, "datasource": json.dumps(map_data.datasource), "layers": json.dumps(map_data.layers), "image_layer": image_layer, "mbkey": api_key, "token": 'dummy' }) template.inject() def _build_image_layer(self, image, image_bounds, cmap='viridis'): if image is not None: if isinstance(image, da.Array): if len(image.shape) == 2 or \ (image.shape[0] == 1 and len(image.shape) == 3): arr = image.compute() else: arr = image.rgb() coords = box(*image.bounds) else: assert image_bounds is not None, "Must pass image_bounds with ndarray images" arr = image coords = box(*image_bounds) b64 = self._encode_image(arr, cmap) return ImageLayer(b64, self._polygon_coords(coords)) else: return 'false'; def _polygon_coords(self, g): c = list(map(list, list(g.exterior.coords))) return [c[2], c[1], c[0], c[3]] def _encode_image(self, arr, cmap): io = BytesIO() imsave(io, arr, cmap=cmap) io.seek(0) img_str = base64.b64encode(io.getvalue()).decode() return 'data:image/{};base64,{}'.format('png', img_str) class AggregationDef(object): def __init__(self, agg_type=None, value=None, children=None): """Constructs an aggregation definition. Possible 'agg_type' values include: 'geohash', 'date_hist', 'terms', 'avg', 'sum', 'cardinality' , 'avg_geo_lat', 'avg_geo_lon'. The 'value' parameter is specific to whichever aggregation type is specified. For more, detail, please see the VectorServices aggregation REST API documentation. Args: agg_type(str): the aggregation type to define value(str): a value to supplement the type, often indicating how to divide up buckets children(str or AggregationDef): any child aggregations to be run on each bucket Returns: the created AggregationDef """ self.agg_type = agg_type self.value = value self.children = children def __repr__(self): """Creates a string representation of an aggregation definition suitable for use in VectorServices calls Returns: A string representation of an aggregation definition suitable for use in VectorServices calls """ if self.value: base = '%s:%s' % (self.agg_type, self.value) else: base = '%s' % self.agg_type if self.children: if isinstance(self.children, six.string_types): return '%s;%s' % (base, self.children) elif isinstance(self.children, AggregationDef): return '%s;%s' % (base, self.children.__repr__()) else: # assume it's iterable kids = [] for child in self.children: kids.append(child.__repr__()) kids_str = '(%s)' % ','.join(kids) return '%s;%s' % (base, kids_str) else: return base class GeohashAggDef(AggregationDef): def __init__(self, hash_length='3', **kwargs): super(GeohashAggDef, self).__init__('geohash', hash_length, **kwargs) class DateHistogramAggDef(AggregationDef): def __init__(self, bucket_period='M', **kwargs): super(DateHistogramAggDef, self).__init__('date_hist', bucket_period, **kwargs) class FieldBasedAggDef(AggregationDef): def __init__(self, agg_type, field=None, **kwargs): if not field: raise Exception('The "field" property cannot be empty.') super(FieldBasedAggDef, self).__init__(agg_type, field, **kwargs) class TermsAggDef(FieldBasedAggDef): def __init__(self, field=None, **kwargs): super(TermsAggDef, self).__init__('terms', field, **kwargs) class CardinalityAggDef(FieldBasedAggDef): def __init__(self, field=None): super(CardinalityAggDef, self).__init__('cardinality', field) class AvgAggDef(FieldBasedAggDef): def __init__(self, field=None): super(AvgAggDef, self).__init__('avg', field) class SumAggDef(FieldBasedAggDef): def __init__(self, field=None): super(SumAggDef, self).__init__('sum', field) class AvgGeoLatAggDef(AggregationDef): def __init__(self): super(AvgGeoLatAggDef, self).__init__('avg_geo_lat') class AvgGeoLonAggDef(AggregationDef): def __init__(self): super(AvgGeoLonAggDef, self).__init__('avg_geo_lon')