"""Routines used for extracting the raw FERC 714 data."""

import logging
import pathlib
import zipfile

import pandas as pd

import pudl.constants as pc

logger = logging.getLogger(__name__)

TABLE_FNAME = {
    "id_certification_ferc714": "Part 1 Schedule 1 - Identification Certification.csv",
    "gen_plants_ba_ferc714": "Part 2 Schedule 1 - Balancing Authority Generating Plants.csv",
    "demand_monthly_ba_ferc714": "Part 2 Schedule 2 - Balancing Authority Monthly Demand.csv",
    "net_energy_load_ba_ferc714": "Part 2 Schedule 3 - Balancing Authority Net Energy for Load.csv",
    "adjacency_ba_ferc714": "Part 2 Schedule 4 - Adjacent Balancing Authorities.csv",
    "interchange_ba_ferc714": "Part 2 Schedule 5 - Balancing Authority Interchange.csv",
    "lambda_hourly_ba_ferc714": "Part 2 Schedule 6 - Balancing Authority Hourly System Lambda.csv",
    "lambda_description_ferc714": "Part 2 Schedule 6 - System Lambda Description.csv",
    "description_pa_ferc714": "Part 3 Schedule 1 - Planning Area Description.csv",
    "demand_forecast_pa_ferc714": "Part 3 Schedule 2 - Planning Area Forecast Demand.csv",
    "demand_hourly_pa_ferc714": "Part 3 Schedule 2 - Planning Area Hourly Demand.csv",
    "respondent_id_ferc714": "Respondent IDs.csv",
}
"""Dictionary mapping PUDL tables to filenames within the FERC 714 zipfile."""

TABLE_ENCODING = {
    "id_certification_ferc714": "iso-8859-1",
    "gen_plants_ba_ferc714": "iso-8859-1",
    "demand_monthly_ba_ferc714": None,
    "net_energy_load_ba_ferc714": None,
    "adjacency_ba_ferc714": "iso-8859-1",
    "interchange_ba_ferc714": "iso-8859-1",
    "lambda_hourly_ba_ferc714": None,
    "lambda_description_ferc714": "iso-8859-1",
    "description_pa_ferc714": "iso-8859-1",
    "demand_forecast_pa_ferc714": None,
    "demand_hourly_pa_ferc714": None,
    "respondent_id_ferc714": None,
}
"""Dictionary describing the character encodings of the FERC 714 CSV files."""


def _get_zpath(pudl_table, pudl_settings):
    """Given a table and pudl_settings, return a Path to the requested file."""
    return zipfile.Path(
        pathlib.Path(pudl_settings["data_dir"],
                     "local/ferc714/ferc714.zip"),
        TABLE_FNAME[pudl_table]
    )


def extract(tables=pc.pudl_tables["ferc714"], pudl_settings=None):
    """
    Extract the raw FERC Form 714 dataframes from their original CSV files.

    Args:
        ferc714_tables (iterable): The set of tables to be extracted.
        pudl_settings (dict): A PUDL settings dictionary.

    Returns:
        dict: A dictionary of dataframes, with raw FERC 714 table names as the
        keys, and minimally processed pandas.DataFrame instances as the values.

    """
    raw_dfs = {}
    for table in tables:
        if table not in pc.pudl_tables["ferc714"]:
            raise ValueError(
                f"No extract function found for requested FERC Form 714 data "
                f"table {table}!"
            )
        logger.info(f"Reading {table} from CSV into pandas DataFrame.")
        with _get_zpath(table, pudl_settings).open() as f:
            raw_dfs[table] = pd.read_csv(f, encoding=TABLE_ENCODING[table])
    return raw_dfs