python source code of synthetic

import abc
import warnings
from functools import partial
from typing import Any, Dict, Iterable, List, Optional

import geopandas as gpd
import pandas as pd
import pyproj
from shapely.geometry import LineString, MultiPoint, Point, shape
from shapely.ops import linemerge, split, transform

from .toolkit import generate_random_name


def _generate_point_array_override(
        mp_array: Iterable[Point],
        route_shape: LineString,
        existing_graph_nodes: gpd.GeoDataFrame,
        stop_distance_distribution: float) -> Iterable[Point]:
    # TODO: Better parameterize the factor used with the distance distribution
    # Figure that we give a 10% "give" - this is, if a stop is within
    # 10% of the target segment distance, then it should be re-assigned
    # to that stop
    reasonable_distance = 0.5 * stop_distance_distribution

    # TODO: Better parameterize the buffer distance setting here
    # Find the nearby stops for the target line from those available
    buffer_distance = 10  # meters
    buffered = route_shape.buffer(buffer_distance)

    try:
        # Use spatial index if available
        egn_sindex = existing_graph_nodes.sindex
        possibles = list(egn_sindex.intersection(route_shape.bounds))
        existing_graph_nodes_sub = existing_graph_nodes.iloc[possibles]
        intersects_mask = existing_graph_nodes_sub.intersects(buffered)
        intersecting_stops_gdf = existing_graph_nodes_sub[intersects_mask]

    except Exception:
        # Otherwise skip the spatial index and just intersect
        # against all geometries in the GeoDataFrame
        intersects_mask = existing_graph_nodes.intersects(buffered)
        intersecting_stops_gdf = existing_graph_nodes[intersects_mask]

    # If we encounter a situation where there are no nearby nodes, bail
    # early and just retyurn the original input array
    if len(intersecting_stops_gdf) == 0:
        return mp_array

    # TODO: Right now we only consider the stops as points (shapes),
    #       but in the future should aim to reuse graph nodes that are
    #       the same location by passing along the node id
    intersecting_stops = intersecting_stops_gdf['geometry'].values

    mp_array_override = []
    for pt in mp_array:
        # TODO: This is can be super slow, is there a potentially
        #       faster path or is the fact that this is only a subset
        #       of all stop nodes mean that the number of points being
        #       evaluated is acceptably small?
        dists = intersecting_stops_gdf.distance(pt).values

        # Skip if the closest existing stop is still too far away
        if dists.min() > reasonable_distance:
            mp_array_override.append(pt)
            continue

        # TODO: This mask operation is slow - is there a faster way?
        dists_mask = dists <= dists.min()
        sub_g_nodes = intersecting_stops[dists_mask]

        # Handle edge cases where matches are not captured
        if len(sub_g_nodes) == 0:
            mp_array_override.append(pt)
            continue

        # Otherwise, we should pass the first nearest stop node
        # as the new stop location
        first_nearest = sub_g_nodes[0]

        # But still make sure that it is on the line because
        # we need to use the points to break up the line and get the
        # segment distances in the next step
        adj_projected = route_shape.project(first_nearest)
        adjusted_nearest = route_shape.interpolate(adj_projected,
                                                   normalized=True)
        mp_array_override.append(adjusted_nearest)

    return mp_array_override


def generate_meter_projected_chunks(
        route_shape: LineString,
        custom_stops: Optional[List[List[float]]]=None,
        stop_distance_distribution: int=None,
        from_proj='epsg:4326',
        to_proj='epsg:2163',
        existing_graph_nodes: Optional[pd.DataFrame]=None) -> List[LineString]:

    # Reproject 4326 lat/lon coordinates to equal area
    project = partial(
        pyproj.transform,
        # source coordinate system
        pyproj.Proj(init=from_proj, preserve_units=True),
        # destination coordinate system
        pyproj.Proj(init=to_proj, preserve_units=True))

    rs2 = transform(project, route_shape)  # apply projection

    # Two ways to break apart this route into chunks:
    #   1. Using custom stops as break points (this one takes precedence)
    #   2. Using a custom distance to segment out the route

    # In either case, we need to generate mp_array such that we have
    # target stops or "break points" for the route line shape

    # Path 1 if available
    if custom_stops is not None:
        mp_array = []
        for custom_stop in custom_stops:
            # Now reproject with cast point geometry
            custom_stop_proj = transform(project, Point(custom_stop))
            interp_stop = rs2.interpolate(rs2.project(custom_stop_proj))
            mp_array.append(interp_stop)

    # Otherwise we go with path 2
    else:
        # Sanity check (this should never occur due to checks in class init)
        if stop_distance_distribution is None:
            raise ValueError(('Auto stop assignment triggered, but '
                              'stop_distance_distribution is Nonetype'))

        # Divide the target route into roughly equal length segments
        # and get the number that would be needed to accomplish this
        stop_count = round(rs2.length / stop_distance_distribution)

        # Create the array of break points/joints
        mp_array = []
        for i in range(1, stop_count):
            fr = (i / stop_count)
            mp_array.append(rs2.interpolate(fr, normalized=True))

        # At this point, we have an array of what might be described as
        # "potential stops." From these stops, we want to look to see if there
        # are nearby stop alternatives that are existing stops used by the
        # current network graph. If there are, then we should use those
        # instead.
        if existing_graph_nodes is not None and len(existing_graph_nodes) > 0:
            mp_array_override = _generate_point_array_override(
                mp_array,
                rs2,
                existing_graph_nodes,
                stop_distance_distribution)

            # Now that mp_array_override has been fully populated,
            # can now override the original array holding the estimated
            # stop point locations
            mp_array = mp_array_override

    # Cast array as a Shapely object
    splitter = MultiPoint(mp_array)

    # 1 meter buffer to address floating point discrepencies
    chunks = split(rs2, splitter.buffer(1))

    # TODO: Potential for length errors with this 1 meter
    #       threshold check

    # Take chunks and merge in the small lines
    # from intersection inside of the buffered circles
    # and attach to nearest larger line
    clean_chunks = [chunks[0]]
    r = len(chunks)
    for c in range(1, r):
        latest = clean_chunks[-1]
        current = chunks[c]
        # Again, this is a week point of the buffer of
        # 1 meter method
        if latest.length <= 2:
            # Merge in the small chunks with the larger chunks
            clean_chunks[-1] = linemerge([latest, current])
        else:
            clean_chunks.append(current)

    return clean_chunks


def generate_stop_points(chunks: List[LineString]) -> List[Point]:
    all_points = []

    # First point is the first node on the first line
    first_chunk = chunks[0].coords
    first_pt = [Point(p) for p in first_chunk][0]
    all_points.append(first_pt)

    # Then for all other points, we get from the end of each line
    for chunk in chunks:
        last_pt = [Point(p) for p in chunk.coords][-1]
        all_points.append(last_pt)

    # Now we need to convert to a Shapely object
    ap_ma = MultiPoint(all_points)

    # So we can reproject back out of equal area to 4326
    project = partial(
        pyproj.transform,
        pyproj.Proj(init='epsg:2163'),  # source coordinate system
        pyproj.Proj(init='epsg:4326'))  # destination coordinate system

    ap_ma_reproj = transform(project, ap_ma)  # apply projection

    # Final step will be to pull out all points back into a list
    return [p for p in ap_ma_reproj]


def generate_stop_ids(stops_count: int) -> List[str]:
    shape_name = generate_random_name(5)
    stop_names = []
    for i in range(stops_count):
        stop_names.append('_'.join([shape_name, str(i)]))
    return stop_names


def generate_nodes_df(
        stop_ids: List[str],
        all_points: List[Point],
        headway: float,
        wait_time_cost_method: Any,
        mode: Optional[str]) -> pd.DataFrame:
    avg_costs = []
    stop_lats = []
    stop_lons = []
    mode_col = []

    default_avg_cost = headway / 2

    for point in all_points:
        avg_costs.append(default_avg_cost)
        stop_lats.append(point.y)
        stop_lons.append(point.x)

        # TODO: Improve addition of the mode list to the
        #       nodes dataframe
        if mode:
            mode_col.append([mode])
        else:
            # When no mode is provided just pass through empty list
            mode_col.append([])

    nodes_df = pd.DataFrame({
        'stop_id': stop_ids,
        'avg_cost': avg_costs,
        'stop_lat': stop_lats,
        'stop_lon': stop_lons,
        'modes': mode_col,
    })

    return nodes_df


def generate_edges_df(
        stop_ids: List[str],
        chunks: List[LineString],
        avg_speed: float) -> pd.DataFrame:
    from_stop_ids = []
    to_stop_ids = []
    edge_costs = []
    mode_col = []

    paired_nodes = list(zip(stop_ids[:-1], stop_ids[1:]))

    # Sanity check
    if not len(chunks) == len(paired_nodes):
        raise Exception('Chunking operation did not result '
                        'correct route shape subdivisions. '
                        '\nChunk len: {} \nPaired len: {}'.format(
                            str(len(chunks)),
                            str(len(paired_nodes))))

    for i, nodes in enumerate(paired_nodes):
        point_a = nodes[0]
        point_b = nodes[1]
        from_stop_ids.append(point_a)
        to_stop_ids.append(point_b)

        # Estimate the amount of time it would
        # take to traverse this portion of the
        # route path given the default speed
        l_km = chunks[i].length / 1000  # distance in km

        # Note: Average speed is to be supplied in kmph
        in_hours = round(l_km / avg_speed, 3)
        # Convert to seconds
        in_seconds = in_hours * 60 * 60
        edge_costs.append(in_seconds)

    edges_df = pd.DataFrame({
        'from_stop_id': from_stop_ids,
        'to_stop_id': to_stop_ids,
        'edge_cost': edge_costs,
    })

    return edges_df


def _validate_feature_properties(props: Dict) -> Dict:
    fresh_props = {}

    if 'headway' in props:
        fresh_props['headway'] = float(props['headway'])

    if 'average_speed' in props:
        fresh_props['average_speed'] = float(props['average_speed'])

    if 'bidirectional' in props:
        fresh_props['bidirectional'] = bool(props['bidirectional'])

    if 'mode' in props:
        # GTFS mode types are held as strings
        fresh_props['mode'] = str(props['mode'])

    # For this section, either the custom stops of the stop distance
    # value must be set
    if 'stops' in props:
        # Make sure that this value is supplied as a list
        if isinstance(props['stops'], list):
            fresh_props['custom_stops'] = props['stops']

    if 'stop_distance_distribution' in props:
        fresh_props['stop_dist'] = float(props['stop_distance_distribution'])

    # Sanity check; if both custom stops and stops distance are None
    # then we cannot proceed
    no_stops = 'custom_stops' not in fresh_props
    no_dist = 'stop_dist' not in fresh_props
    if no_stops and no_dist:
        raise ValueError('Synthetic network addition must have either '
                         'custom stops or stops distance default set.')

    if 'mode' not in fresh_props:
        # Mode id based on GTFS mode spec - routes supplied absent modes
        # may lead to undesired effects
        warnings.warn(
            'No mode id supplied for synthetic route, leaving blank.')

    return fresh_props


class SyntheticTransitLine(abc.ABC):
    """
    Represents a single synthetic transit lines custom attributes and shape.

    Derived from a single Feature in a TransitJSON GeoJSON FeatureCollection.
    """

    def __init__(
            self,
            feature: Dict[str, Any],
            wait_time_cost_method: Any,
            existing_graph_nodes: Optional[pd.DataFrame]=None):
        # All values have defaults built in; which are overridden when the
        # user supplies, through the TransitJSON, custom values for those
        # properties.
        feature_props = feature['properties']
        props = _validate_feature_properties(feature_props)

        self._mode = props.get('mode', None)

        # Headway measured in seconds (30 minutes to seconds)
        self._headway = props.get('headway', 30 * 60)
        self._wait_time_cost_method = wait_time_cost_method

        # Speed is measured in miles per hour
        self._average_speed = props.get('average_speed', 8)

        self._bidirectional = props.get('bidirectional', False)

        # Via the validation step; one of these two will be set
        custom_stops = props.get('custom_stops', None)
        self.used_custom_stops = custom_stops is not None

        # Note that stops distances are set in meters (e.g. 402 meters
        # is the equivalent of every 1/4 of a mile)
        stop_distance = props.get('stop_dist', 402)

        # We require this GeoJSON coordinate component to be valid format
        self._route_path = shape(feature['geometry'])

        # Generate reference geometry data, note (and this is confusing) but
        # chunks is in meter projection and all_pts is in web mercator
        # this is because we only need (from chunks) the length value
        # and do not actually preserve the geometry beyond these operations
        chunks = generate_meter_projected_chunks(
            route_shape=self._route_path,
            custom_stops=custom_stops,
            stop_distance_distribution=stop_distance,
            existing_graph_nodes=existing_graph_nodes)

        # Generate stops from each chunk and assign each a unique id
        all_pts = generate_stop_points(chunks)
        stop_ids = generate_stop_ids(len(all_pts))

        # Produce key graph components
        self._nodes = generate_nodes_df(
            stop_ids,
            all_pts,
            self._headway,
            self._wait_time_cost_method,
            self._mode)
        self._edges = generate_edges_df(
            stop_ids,
            chunks,
            self._average_speed)

    def get_nodes(self) -> pd.DataFrame:
        # Do this to prevent upstream mutation of the reference DataFrame
        return self._nodes.copy()

    def get_edges(self) -> pd.DataFrame:
        # Do this to prevent upstream mutation of the reference DataFrame
        return self._edges.copy()

    def get_bidrectional(self) -> bool:
        # Always attempt to avoid mutation
        return bool(self._bidirectional)

    nodes = property(get_nodes)
    edges = property(get_edges)
    bidirectional = property(get_bidrectional)


class SyntheticTransitNetwork(abc.ABC):
    """
    Holds a list of SyntheticTransitLine

    Below is an example of the most basic TransitJSON that could/should be \
    provided to peartree for modeling out a new transit line:

    {
        "type": "FeatureCollection",
        "features": [
            {
                "geometry": {
                    "type": "LineString",
                    "coordinates": [
                        [-82.7, 39.5], [-82.8, 39.6], [-82.9, 39.7] ]
                }, "properties": {
                        "average_speed": 16,
                        "headway": 600,
                        "stops": [ [-82.8, 39.6] ],
                }, "type": "Feature"
            }
        ],
    }
    """

    def __init__(
            self,
            feature_collection: Dict[str, Any],
            wait_time_cost_method: Any,
            existing_graph_nodes: Optional[pd.DataFrame]=None):
        # Initialize an empty list
        self._lines = []

        # For each Feature in the FeatureCollection group; add an additional
        # instantiated SyntheticTransitLine object
        for feature in feature_collection['features']:
            new_line = SyntheticTransitLine(
                feature,
                wait_time_cost_method,
                existing_graph_nodes)
            self._lines.append(new_line)

    def _create_all_lines_generator(self):
        for line in self._lines:
            yield line

    def get_all_lines(self) -> Iterable[SyntheticTransitLine]:
        return self._create_all_lines_generator()

    lines = property(get_all_lines)