Python pandas.read_sql_table() Examples

The following are 30 code examples of pandas.read_sql_table(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: load_data.py    From PowerGenome with MIT License 7 votes vote down vote up
def load_ownership_eia860(pudl_engine, data_years=[2017]):

    cols = [
        "report_date",
        "utility_id_eia",
        "plant_id_eia",
        "generator_id",
        # "operational_status_code",
        "owner_utility_id_eia",
        "owner_name",
        "owner_state",
        "fraction_owned",
    ]
    ownership = pd.read_sql_table(
        "ownership_eia860", pudl_engine, columns=cols, parse_dates=["report_date"]
    )
    ownership = ownership.loc[ownership["report_date"].dt.year.isin(data_years)]

    return ownership 
Example #2
Source File: orf.py    From mikado with GNU Lesser General Public License v3.0 6 votes vote down vote up
def serialize(self):
        """
        This method performs the parsing of the ORF file and the
        loading into the SQL database.
        """

        self.load_fasta()
        self.query_cache = pd.read_sql_table("query", self.engine, index_col="query_name", columns=["query_name", "query_id"])
        self.query_cache = self.query_cache.to_dict()["query_id"]
        self.initial_cache = (len(self.query_cache) > 0)

        if self.procs == 1:
            self.__serialize_single_thread()
        else:
            try:
                self.__serialize_multiple_threads()
            finally:
                pass 
Example #3
Source File: ferc1.py    From pudl with MIT License 6 votes vote down vote up
def plants_small_ferc1(pudl_engine):
    """Pull a useful dataframe related to the FERC Form 1 small plants."""
    plants_small_df = (
        pd.read_sql_table("plants_small_ferc1", pudl_engine)
        .drop(['id'], axis="columns")
        .merge(pd.read_sql_table("utilities_ferc1", pudl_engine),
               on="utility_id_ferc1")
        .pipe(pudl.helpers.organize_cols, ['report_year',
                                           'utility_id_ferc1',
                                           'utility_id_pudl',
                                           'utility_name_ferc1',
                                           "plant_name_original",
                                           'plant_name_ferc1',
                                           "record_id"])
    )
    return plants_small_df 
Example #4
Source File: routes.py    From thewarden with MIT License 6 votes vote down vote up
def tradedetails():
    if request.method == "GET":
        id = request.args.get("id")
        # if tradesonly is true then only look for buy and sells
        tradesonly = request.args.get("trades")
        df = pd.read_sql_table("trades", db.engine)
        # Filter only the trades for current user
        df = df[(df.user_id == current_user.username)]
        df = df[(df.trade_reference_id == id)]
        # Filter only buy and sells, ignore deposit / withdraw
        if tradesonly:
            df = df[(df.trade_operation == "B") | (df.trade_operation == "S")]
        # df['trade_date'] = pd.to_datetime(df['trade_date'])
        df.set_index("trade_reference_id", inplace=True)
        df.drop("user_id", axis=1, inplace=True)
        details = df.to_json()
        return details 
Example #5
Source File: nrelatb.py    From PowerGenome with MIT License 6 votes vote down vote up
def fetch_atb_heat_rates(pudl_engine):
    """Get heat rate projections for power plants

    Data is originally from AEO, NREL does a linear interpolation between current and
    final years.

    Parameters
    ----------
    pudl_engine : sqlalchemy.Engine
        A sqlalchemy connection for use by pandas

    Returns
    -------
    DataFrame
        Power plant heat rate data by year with columns:
        ['technology', 'tech_detail', 'basis_year', 'heat_rate']
    """

    heat_rates = pd.read_sql_table("technology_heat_rates_nrelatb", pudl_engine)

    return heat_rates 
Example #6
Source File: load_data.py    From PowerGenome with MIT License 6 votes vote down vote up
def load_ipm_plant_region_map(pudl_engine):
    """Load the table associating each power plant to an IPM region

    Parameters
    ----------
    pudl_engine : sqlalchemy.Engine
        A sqlalchemy connection for use by pandas

    Returns
    -------
    dataframe
        All plants in the NEEDS database and their associated IPM region. Columns are
        plant_id_eia and region.
    """
    region_map_df = pd.read_sql_table(
        "plant_region_map_epaipm", con=pudl_engine, columns=["plant_id_eia", "region"]
    )

    return region_map_df 
Example #7
Source File: io.py    From modin with Apache License 2.0 6 votes vote down vote up
def read_sql_table(
        cls,
        table_name,
        con,
        schema=None,
        index_col=None,
        coerce_float=True,
        parse_dates=None,
        columns=None,
        chunksize=None,
    ):
        ErrorMessage.default_to_pandas("`read_sql_table`")
        return cls.from_pandas(
            pandas.read_sql_table(
                table_name,
                con,
                schema=schema,
                index_col=index_col,
                coerce_float=coerce_float,
                parse_dates=parse_dates,
                columns=columns,
                chunksize=chunksize,
            )
        ) 
Example #8
Source File: data_download.py    From stock with Apache License 2.0 6 votes vote down vote up
def download_all_stock_history_k_line():
    print('download all stock k-line start')

    try:
        engine = db.get_w_engine()
        df = pd.read_sql_table(STOCK_BASIC_TABLE, engine)
        codes = df[KEY_CODE].tolist()
        print('total stocks:{0}'.format(len(codes)))
        for code in codes:
            download_stock_kline_by_code(code)

        # codes = codes[::-1]
        #codes = r.lrange(INDEX_STOCK_BASIC, 0, -1)
        # pool = ThreadPool(processes=10)
        # pool.map(download_stock_kline_by_code, codes)
        # pool.close()
        # pool.join()

    except Exception as e:
        print(str(e))
    print('download all stock k-line finish') 
Example #9
Source File: test_packers.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_readonly_axis_zlib_to_sql(self):
        # GH11880
        if not _ZLIB_INSTALLED:
            pytest.skip('no zlib')
        if not self._SQLALCHEMY_INSTALLED:
            pytest.skip('no sqlalchemy')
        expected = DataFrame({'A': list('abcd')})
        df = self.encode_decode(expected, compress='zlib')
        eng = self._create_sql_engine("sqlite:///:memory:")
        df.to_sql('test', eng, if_exists='append')
        result = pandas.read_sql_table('test', eng, index_col='index')
        result.index.names = [None]
        assert_frame_equal(expected, result) 
Example #10
Source File: test_packers.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_readonly_axis_blosc_to_sql(self):
        # GH11880
        if not _BLOSC_INSTALLED:
            pytest.skip('no blosc')
        if not self._SQLALCHEMY_INSTALLED:
            pytest.skip('no sqlalchemy')
        expected = DataFrame({'A': list('abcd')})
        df = self.encode_decode(expected, compress='blosc')
        eng = self._create_sql_engine("sqlite:///:memory:")
        df.to_sql('test', eng, if_exists='append')
        result = pandas.read_sql_table('test', eng, index_col='index')
        result.index.names = [None]
        assert_frame_equal(expected, result) 
Example #11
Source File: run.py    From apicheck with Apache License 2.0 5 votes vote down vote up
def run(running_config: RunningConfig):
    target = HDFStore(running_config.fout)
    df = pd.read_sql_table(
        "proxy_logs",
        "sqlite:///mydatabase.sqlite3",
        index_col='id'
    )

    request = json_to_columns(df, 'request')
    request["session"] = df["proxy_session_id"]
    response = json_to_columns(df, 'response')
    response["session"] = df["proxy_session_id"]
    request_headers = request['headers'].apply(pd.Series)
    response_headers = response['headers'].apply(pd.Series)
    request = request.drop("headers", 1)
    request_headers_norm = pd.melt(
        request_headers.reset_index(), id_vars=["id"], var_name="header"
    )
    request_headers_norm = request_headers_norm.dropna()
    request_headers_norm["type"] = "request"
    response = response.drop("headers", 1)
    response_headers_norm = pd.melt(
        response_headers.reset_index(), id_vars=["id"], var_name="header"
    )
    response_headers_norm = response_headers_norm.dropna()
    response_headers_norm["type"] = "response"
    headers_norm = pd.concat([request_headers_norm, response_headers_norm])
    target.put("request", request, format="table", data_columns=True)
    target.put("response", response, format="table", data_columns=True)
    target.put("headers", headers_norm, format="table", data_columns=True)
    target.close() 
Example #12
Source File: test_full_pipeline.py    From FlowKit with Mozilla Public License 2.0 5 votes vote down vote up
def test_get_only_one_day(populated_test_data_table, run_task, all_tasks):
    """
    Test that only data for the one day is returned even if other data is present.
    """
    for task_id in all_tasks:
        return_code, result = run_task(
            dag_id="remote_table_dag", task_id=task_id, exec_date="2016-06-15"
        )
        print(
            f"Dag: remote_table_dag, task: {task_id}, exec date: 2016-06-15.\n\n{result}\n\n"
        )
    db_content = pd.read_sql_table(
        "calls_20160615", populated_test_data_table, "events"
    )
    assert len(db_content) == 1 
Example #13
Source File: load_data.py    From PowerGenome with MIT License 5 votes vote down vote up
def load_utilities_eia(pudl_engine):

    utilities = pd.read_sql_table("utilities_eia", pudl_engine)

    return utilities 
Example #14
Source File: load_data.py    From PowerGenome with MIT License 5 votes vote down vote up
def load_plants_860(pudl_engine, data_years=[2017]):

    plants = pd.read_sql_table(
        "plants_eia860", pudl_engine, parse_dates=["report_date"]
    )

    plants = plants.loc[plants["report_date"].dt.year.isin(data_years)]

    return plants 
Example #15
Source File: tabular_utils.py    From mikado with GNU Lesser General Public License v3.0 5 votes vote down vote up
def get_queries(engine):
    queries = pd.read_sql_table("query", engine, index_col="query_name")
    queries.columns = ["qid", "qlength"]
    queries["qid"] = queries["qid"].astype(int)
    assert queries.qid.drop_duplicates().shape[0] == queries.shape[0]
    return queries 
Example #16
Source File: sql_upload.py    From ontask_b with MIT License 5 votes vote down vote up
def _load_df_from_sqlconnection(
    conn_item: models.SQLConnection,
    run_params: Dict,
) -> pd.DataFrame:
    """Load a DF from a SQL connection.

    :param conn_item: SQLConnection object with the connection parameters.
    :param run_params: Dictionary with the execution parameters.
    :return: Data frame or raise an exception.
    """
    if conn_item.db_password:
        password = conn_item.db_password
    else:
        password = run_params['db_password']

    if conn_item.db_table:
        table_name = conn_item.db_table
    else:
        table_name = run_params['db_table']

    db_engine = pandas.create_db_engine(
        conn_item.conn_type,
        conn_item.conn_driver,
        conn_item.db_user,
        password,
        conn_item.db_host,
        conn_item.db_name)

    # Try to fetch the data
    data_frame = pd.read_sql_table(table_name, db_engine)

    # Remove the engine
    db_engine.dispose()

    # Strip white space from all string columns and try to convert to
    # datetime just in case
    return pandas.detect_datetime_columns(data_frame) 
Example #17
Source File: database.py    From ontask_b with MIT License 5 votes vote down vote up
def load_table(
    table_name: str,
    columns: Optional[List[str]] = None,
    filter_exp: Optional[Dict] = None,
) -> Optional[pd.DataFrame]:
    """Load a Pandas data frame from the SQL DB.

    :param table_name: Table name
    :param columns: Optional list of columns to load (all if NOne is given)
    :param filter_exp: JSON expression to filter a subset of rows
    :return: data frame
    """
    if table_name not in connection.introspection.table_names():
        return None

    if settings.DEBUG:
        LOGGER.debug('Loading table %s', table_name)

    if columns or filter_exp:
        # A list of columns or a filter exp is given
        query, query_fields = sql.get_select_query_txt(
            table_name,
            column_names=columns,
            filter_formula=filter_exp)
        return pd.read_sql_query(
            query,
            OnTaskSharedState.engine,
            params=query_fields)

    # No special fields given, load the whole thing
    return pd.read_sql_table(table_name, OnTaskSharedState.engine) 
Example #18
Source File: ferc1.py    From pudl with MIT License 5 votes vote down vote up
def purchased_power_ferc1(pudl_engine):
    """Pull a useful dataframe of FERC Form 1 Purchased Power data."""
    purchased_power_df = (
        pd.read_sql_table("purchased_power_ferc1", pudl_engine)
        .drop(['id'], axis="columns")
        .merge(pd.read_sql_table("utilities_ferc1", pudl_engine),
               on="utility_id_ferc1")
        .pipe(pudl.helpers.organize_cols, ["report_year",
                                           "utility_id_ferc1",
                                           "utility_id_pudl",
                                           "utility_name_ferc1",
                                           "seller_name",
                                           "record_id"])
    )
    return purchased_power_df 
Example #19
Source File: ferc1.py    From pudl with MIT License 5 votes vote down vote up
def plants_pumped_storage_ferc1(pudl_engine):
    """Pull a dataframe of FERC Form 1 Pumped Storage plant data."""
    pumped_storage_df = (
        pd.read_sql_table("plants_pumped_storage_ferc1", pudl_engine)
        .drop(['id'], axis="columns")
        .merge(pudl.output.ferc1.plants_utils_ferc1(pudl_engine),
               on=["utility_id_ferc1", "plant_name_ferc1"])
        .pipe(pudl.helpers.organize_cols, ["report_year",
                                           "utility_id_ferc1",
                                           "utility_id_pudl",
                                           "utility_name_ferc1",
                                           "plant_name_ferc1",
                                           "record_id"])
    )
    return pumped_storage_df 
Example #20
Source File: tabular_utils.py    From mikado with GNU Lesser General Public License v3.0 5 votes vote down vote up
def get_targets(engine):
    targets = pd.read_sql_table("target", engine, index_col="target_name")
    targets.columns = ["sid", "slength"]
    targets["sid"] = targets["sid"].astype(int)
    assert targets.sid.drop_duplicates().shape[0] == targets.shape[0]

    if targets[targets.slength.isna()].shape[0] > 0:
        raise KeyError("Unbound targets!")
    return targets 
Example #21
Source File: ferc1.py    From pudl with MIT License 5 votes vote down vote up
def plants_hydro_ferc1(pudl_engine):
    """Pull a useful dataframe related to the FERC Form 1 hydro plants."""
    plants_hydro_df = (
        pd.read_sql_table("plants_hydro_ferc1", pudl_engine)
        .drop(['id'], axis="columns")
        .merge(plants_utils_ferc1(pudl_engine),
               on=["utility_id_ferc1", "plant_name_ferc1"])
        .pipe(pudl.helpers.organize_cols, ["report_year",
                                           "utility_id_ferc1",
                                           "utility_id_pudl",
                                           "utility_name_ferc1",
                                           "plant_name_ferc1",
                                           "record_id"])
    )
    return plants_hydro_df 
Example #22
Source File: test_packers.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_readonly_axis_zlib_to_sql(self):
        # GH11880
        if not _ZLIB_INSTALLED:
            pytest.skip('no zlib')
        if not self._SQLALCHEMY_INSTALLED:
            pytest.skip('no sqlalchemy')
        expected = DataFrame({'A': list('abcd')})
        df = self.encode_decode(expected, compress='zlib')
        eng = self._create_sql_engine("sqlite:///:memory:")
        df.to_sql('test', eng, if_exists='append')
        result = pandas.read_sql_table('test', eng, index_col='index')
        result.index.names = [None]
        assert_frame_equal(expected, result) 
Example #23
Source File: routes.py    From thewarden with MIT License 5 votes vote down vote up
def portfolio_tickers_json():
    if request.method == "GET":
        df = pd.read_sql_table("trades", db.engine)
        df = df[(df.user_id == current_user.username)]
        list_of_tickers = df.trade_asset_ticker.unique().tolist()
        try:
            list_of_tickers.remove(current_user.fx())
        except ValueError:
            pass
        return jsonify(list_of_tickers) 
Example #24
Source File: utils.py    From thewarden with MIT License 5 votes vote down vote up
def transactions_fx():
    # Gets the transaction table and fills with fx information
    # Note that it uses the currency exchange for the date of transaction
    # Get all transactions from db and format
    df = pd.read_sql_table('trades', db.engine)
    df = df[(df.user_id == current_user.username)]
    # df = df[(df.trade_operation == "B") | (df.trade_operation == "S")]
    df['trade_date'] = pd.to_datetime(df['trade_date'])
    df = df.set_index('trade_date')
    # Ignore times in df to merge - keep only dates
    df.index = df.index.floor('d')
    df.index.rename('date', inplace=True)
    # The current fx needs no conversion, set to 1
    df[current_user.fx()] = 1
    # Need to get currencies into the df in order to normalize
    # let's load a list of currencies needed and merge
    list_of_fx = df.trade_currency.unique().tolist()
    # loop through currency list
    for currency in list_of_fx:
        if currency == current_user.fx():
            continue
        # Make a price request
        df[currency] = df.apply(find_fx, axis=1)
    # Now create a cash value in the preferred currency terms
    df['fx'] = df.apply(lambda x: x[x['trade_currency']], axis=1)
    df['cash_value_fx'] = df['cash_value'].astype(float) / df['fx'].astype(float)
    df['trade_fees_fx'] = df['trade_fees'].astype(float) / df['fx'].astype(float)
    df['trade_price_fx'] = df['trade_price'].astype(float) / df['fx'].astype(float)
    return (df) 
Example #25
Source File: utils.py    From thewarden with MIT License 5 votes vote down vote up
def list_tickers():
    df = pd.read_sql_table('trades', db.engine)
    df = df[(df.user_id == current_user.username)]
    # remove the currencies from tickers
    df['is_currency'] = df['trade_asset_ticker'].apply(is_currency)
    df = df[df['is_currency'] == False]
    return (df.trade_asset_ticker.unique().tolist())


# ---------------- PANDAS HELPER FUNCTION --------------------------
# This is a function to concatenate a function returning multiple columns into
# a dataframe. 
Example #26
Source File: routes.py    From thewarden with MIT License 5 votes vote down vote up
def account_positions():
    transactions = Trades.query.filter_by(user_id=current_user.username)
    if transactions.count() == 0:
        return render_template("empty.html")
    df = pd.read_sql_table("trades", db.engine)
    df = df[(df.user_id == current_user.username)]
    df["trade_date"] = pd.to_datetime(df["trade_date"])

    account_table = df.groupby(["trade_account", "trade_asset_ticker"
                                ])[["trade_quantity"]].sum()
    # All accounts
    all_accounts = (account_table.query(
        "trade_asset_ticker != '" + current_user.fx() +
        "'").index.get_level_values("trade_account").unique().tolist())
    # Trim the account list only for accounts that currently hold a position
    account_table = account_table[account_table.trade_quantity != 0]
    # Remove accounts with USD only Positions
    account_table = account_table.query("trade_asset_ticker != 'USD'")

    # account_table = account_table['trade_asset_ticker' != 'USD']
    accounts = account_table.index.get_level_values(
        "trade_account").unique().tolist()
    tickers = (account_table.index.get_level_values(
        "trade_asset_ticker").unique().tolist())
    # if 'USD' in tickers:
    #     tickers.remove('USD')

    return render_template(
        "account_positions.html",
        title="Account Positions",
        accounts=accounts,
        tickers=tickers,
        account_table=account_table,
        all_accounts=all_accounts,
    ) 
Example #27
Source File: test_packers.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_readonly_axis_blosc_to_sql(self):
        # GH11880
        if not _BLOSC_INSTALLED:
            pytest.skip('no blosc')
        if not self._SQLALCHEMY_INSTALLED:
            pytest.skip('no sqlalchemy')
        expected = DataFrame({'A': list('abcd')})
        df = self.encode_decode(expected, compress='blosc')
        eng = self._create_sql_engine("sqlite:///:memory:")
        df.to_sql('test', eng, if_exists='append')
        result = pandas.read_sql_table('test', eng, index_col='index')
        result.index.names = [None]
        assert_frame_equal(expected, result) 
Example #28
Source File: test_packers.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_readonly_axis_zlib_to_sql(self):
        # GH11880
        if not _ZLIB_INSTALLED:
            pytest.skip('no zlib')
        if not self._SQLALCHEMY_INSTALLED:
            pytest.skip('no sqlalchemy')
        expected = DataFrame({'A': list('abcd')})
        df = self.encode_decode(expected, compress='zlib')
        eng = self._create_sql_engine("sqlite:///:memory:")
        df.to_sql('test', eng, if_exists='append')
        result = pandas.read_sql_table('test', eng, index_col='index')
        result.index.names = [None]
        assert_frame_equal(expected, result) 
Example #29
Source File: test_packers.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_readonly_axis_blosc_to_sql(self):
        # GH11880
        if not _BLOSC_INSTALLED:
            pytest.skip('no blosc')
        if not self._SQLALCHEMY_INSTALLED:
            pytest.skip('no sqlalchemy')
        expected = DataFrame({'A': list('abcd')})
        df = self.encode_decode(expected, compress='blosc')
        eng = self._create_sql_engine("sqlite:///:memory:")
        df.to_sql('test', eng, if_exists='append')
        result = pandas.read_sql_table('test', eng, index_col='index')
        result.index.names = [None]
        assert_frame_equal(expected, result) 
Example #30
Source File: ferc1.py    From pudl with MIT License 5 votes vote down vote up
def fuel_by_plant_ferc1(pudl_engine, thresh=0.5):
    """Summarize FERC fuel data by plant for output.

    This is mostly a wrapper around
    :func:`pudl.transform.ferc1.fuel_by_plant_ferc1`
    which calculates some summary values on a per-plant basis (as indicated
    by ``utility_id_ferc1`` and ``plant_name_ferc1``) related to fuel
    consumption.

    Args:
        pudl_engine (sqlalchemy.engine.Engine): Engine for connecting to the
            PUDL database.
        thresh (float): Minimum fraction of fuel (cost and mmbtu) required in
            order for a plant to be assigned a primary fuel. Must be between
            0.5 and 1.0. default value is 0.5.

    Returns:
        pandas.DataFrame: A DataFrame with fuel use summarized by plant.

    """
    fbp_df = (
        pd.read_sql_table('fuel_ferc1', pudl_engine)
        .drop(['id'], axis="columns")
        .pipe(pudl.transform.ferc1.fuel_by_plant_ferc1, thresh=thresh)
        .merge(plants_utils_ferc1(pudl_engine),
               on=['utility_id_ferc1', 'plant_name_ferc1'])
        .pipe(pudl.helpers.organize_cols, ['report_year',
                                           'utility_id_ferc1',
                                           'utility_id_pudl',
                                           'utility_name_ferc1',
                                           'plant_id_pudl',
                                           'plant_name_ferc1'])
    )
    return fbp_df