python source code of estimate_arrivals

Project: performance_tracker (GitHub Link)

performance_tracker-master
- src
  - actions.py
  - library
    - query_predictions.py
    - prepare_stop_list.py
    - process_vehicles.py
    - helpers
      - s3_resource.py
      - datetimefs.py
      - __init__.py
      - fs_resource.py
      - timing.py
    - process_schedule.py
    - preprocess_vehicles.py
    - get_schedule.py
    - upload_latest.py
    - get_vehicles.py
    - analysis
      - nextbus.py
      - __init__.py
    - __init__.py
    - context.py
    - record.py
    - estimate_arrivals.py
    - analyzer
      - visualizer.py
      - line.py
      - process_vehicles.py
      - calendar.py
      - summary.py
      - geoHelpers.py
      - analyze_estimates.py
      - stations.py
      - __init__.py
      - tracker.py
      - track.py
      - nextBusData.py
      - schedule.py
      - estimate_arrivals.py
  - data_migration
    - step_2.py
    - step_1.py
  - __init__.py
  - main.py
  - README.md
  - directions.md
- LICENSE
- sample_crontab
- data
  - GIS
    - stations
      - Stations_All_0316.shp
      - Stations_All_0316.dbf
      - Stations_All_0316.sbx
      - Stations_All_0316.shx
      - Stations_All_0316.prj
      - Stations_All_0316.sbn
    - track_edits.qgz
    - tracks
      - 803_lametro-rail
        803_Track_0316.prj
        803_Track_0316.cpg
        803_Track_0316.shx
        803_Track_0316.sbn
        803_Track_0316.sbx
        803_Track_0316.dbf
        803_Track_0316.shp
      - 806_lametro-rail
        806_Track_0316.shx
        806_Track_0316.sbx
        806_Track_0316.cpg
        806_Track_0316.shp.xml
        806_Track_0316.dbf
        806_Track_0316.sbn
        806_Track_0316.shp
        806_Track_0316.prj
      - 804_lametro-rail
        804_Track_0316.sbx
        804_Track_0316.shp
        804_Track_0316.sbn
        804_Track_0316.shx
        804_Track_0316.dbf
        804_Track_0316.cpg
        804_Track_0316.prj
      - 801_lametro-rail
        801_Track_0316.shp
        801_Track_0316.sbn
        801_Track_0316.shx
        801_Track_0316.sbx
        801_Track_0316.shp.xml
        801_Track_0316.cpg
        801_Track_0316.dbf
        801_Track_0316.prj
      - 802_805_lametro-rail
        802_805_Track_0316.dbf
        802_805_Track_0316.cpg
        802_805_Track_0316.sbx
        802_805_Track_0316.shp.xml
        802_805_Track_0316.prj
        802_805_Track_0316.shx
        802_805_Track_0316.sbn
        802_805_Track_0316.shp
  - line_info
    - 803
      - 803_0_stations.csv
      - 803_0.geojson
      - 803_1_stations.csv
      - 803_1.geojson
    - 806
      - 806_1.geojson
      - 806_0_stations.csv
      - 806_0.geojson
      - 806_1_stations.csv
    - 804
      - 804_1_stations.csv
      - 804_0.geojson
      - 804_1.geojson
      - 804_0_stations.csv
    - 805
      - 805_0.geojson
      - 805_0_stations.csv
      - 805_1_stations.csv
      - 805_1.geojson
    - 802
      - 802_1_stations.csv
      - 802_1.geojson
      - 802_0_stations.csv
      - 802_0.geojson
    - 801
      - 801_0_stations.csv
      - 801_1_stations.csv
      - 801_1.geojson
      - 801_0.geojson
- sample_env
- tasks
  - EVERY_MINUTE.sh
  - EVERY_6_HOURS.sh
  - EVERY_15_MINS.sh
- README.md
- tests
  - test_timing.py
  - test_summary.py
- AWS
  - AWS_Setup.sh
- requirements.txt
- Dockerfile
- .gitignore

import pandas as pd


def estimate_arrivals(trip_id, trip, stations, direction):
    trip.loc[:, "estimate"] = False
    stations.loc[:, "estimate"] = True
    trip_est = stations
    trip_est.loc[:, "trip_id"] = trip_id
    trip_est.loc[:, "direction_id"] = direction
    combined = trip.append(trip_est)
    combined = combined.sort_values("relative_position")
    combined = combined.reset_index(drop=True)
    # shift vals to move adjacent position and date data into each row
    combined.loc[:, "previous_pos"] = combined.relative_position.shift()
    combined.loc[:, "next_pos"] = combined.relative_position.shift(-1)
    combined.loc[:, "previous_dt"] = combined.datetime.shift()
    combined.loc[:, "next_dt"] = combined.datetime.shift(-1)
    select = combined[combined["estimate"] == True]
    select.loc[:, "weight"] = (select.relative_position - select.previous_pos) / (
        select.next_pos - select.previous_pos
    )
    select.loc[:, "time_interpolation"] = (
        select.next_dt - select.previous_dt
    ) * select.weight
    select.loc[:, "datetime"] = select.previous_dt + select.time_interpolation
    select.loc[:, "datetime"] = pd.DatetimeIndex(select.datetime).round("S")
    select.loc[:, "stop_id"] = pd.to_numeric(select.stop_id, downcast="integer")
    # Some station estimates cannot be reliably estimated using this
    # technique and will have datetime = NaT, so we remove them.
    select = select.dropna(subset=["datetime"])
    return select


def estimate_arrivals_by_trip(trips, stations, direction):
    return pd.concat(
        [
            estimate_arrivals(trip_id, trip, stations, direction)
            for trip_id, trip in trips
        ]
    )