Python pandas.to_numeric() Examples

The following are 30 code examples of pandas.to_numeric(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function

Example #1

Source File: toolbox.py From xalpha with MIT License

7 votes

def __init__(self, code, start=None, end=None):
        """

        :param code: str. 指数代码，eg. SH000016
        :param start:
        :param end:
        """
        df = xu.get_daily("teb-" + code, start=start, end=end)
        df["e"] = pd.to_numeric(df["e"])
        df["b"] = pd.to_numeric(df["b"])
        df["lnb"] = df["b"].apply(lambda s: np.log(s))
        df["lne"] = df["e"].apply(lambda s: np.log(s))
        df["roe"] = df["e"] / df["b"] * 100
        df["date_count"] = (df["date"] - df["date"].iloc[0]).apply(
            lambda s: int(s.days)
        )
        self.df = df
        self.fit(verbose=False)

Example #2

Source File: data_loader.py From PADME with MIT License

6 votes

def get_user_specified_features(df, featurizer, verbose=True):
  """Extract and merge user specified features. 

  Merge features included in dataset provided by user
  into final features dataframe

  Three types of featurization here:

    1) Molecule featurization
      -) Smiles string featurization
      -) Rdkit MOL featurization
    2) Complex featurization
      -) PDB files for interacting molecules.
    3) User specified featurizations.

  """
  time1 = time.time()
  df[featurizer.feature_fields] = df[featurizer.feature_fields].apply(
      pd.to_numeric)
  X_shard = df.as_matrix(columns=featurizer.feature_fields)
  time2 = time.time()
  log("TIMING: user specified processing took %0.3f s" % (time2 - time1),
      verbose)
  return X_shard

Example #3

Source File: test_numeric.py From vnpy_crypto with MIT License

6 votes

def test_error(self):
        s = pd.Series([1, -3.14, 'apple'])
        msg = 'Unable to parse string "apple" at position 2'
        with tm.assert_raises_regex(ValueError, msg):
            to_numeric(s, errors='raise')

        res = to_numeric(s, errors='ignore')
        expected = pd.Series([1, -3.14, 'apple'])
        tm.assert_series_equal(res, expected)

        res = to_numeric(s, errors='coerce')
        expected = pd.Series([1, -3.14, np.nan])
        tm.assert_series_equal(res, expected)

        s = pd.Series(['orange', 1, -3.14, 'apple'])
        msg = 'Unable to parse string "orange" at position 0'
        with tm.assert_raises_regex(ValueError, msg):
            to_numeric(s, errors='raise')

Example #4

Source File: test_numeric.py From recruit with Apache License 2.0

6 votes

def test_coerce_uint64_conflict(self):
        # see gh-17007 and gh-17125
        #
        # Still returns float despite the uint64-nan conflict,
        # which would normally force the casting to object.
        df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]})
        expected = pd.Series([200, 300, np.nan, np.nan,
                              30000000000000000000], dtype=float, name="a")
        result = to_numeric(df["a"], errors="coerce")
        tm.assert_series_equal(result, expected)

        s = pd.Series(["12345678901234567890", "1234567890", "ITEM"])
        expected = pd.Series([12345678901234567890,
                              1234567890, np.nan], dtype=float)
        result = to_numeric(s, errors="coerce")
        tm.assert_series_equal(result, expected)

        # For completeness, check against "ignore" and "raise"
        result = to_numeric(s, errors="ignore")
        tm.assert_series_equal(result, s)

        msg = "Unable to parse string"
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors="raise")

Example #5

Source File: test_numeric.py From vnpy_crypto with MIT License

6 votes

def test_coerce_uint64_conflict(self):
        # see gh-17007 and gh-17125
        #
        # Still returns float despite the uint64-nan conflict,
        # which would normally force the casting to object.
        df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]})
        expected = pd.Series([200, 300, np.nan, np.nan,
                              30000000000000000000], dtype=float, name="a")
        result = to_numeric(df["a"], errors="coerce")
        tm.assert_series_equal(result, expected)

        s = pd.Series(["12345678901234567890", "1234567890", "ITEM"])
        expected = pd.Series([12345678901234567890,
                              1234567890, np.nan], dtype=float)
        result = to_numeric(s, errors="coerce")
        tm.assert_series_equal(result, expected)

        # For completeness, check against "ignore" and "raise"
        result = to_numeric(s, errors="ignore")
        tm.assert_series_equal(result, s)

        msg = "Unable to parse string"
        with tm.assert_raises_regex(ValueError, msg):
            to_numeric(s, errors="raise")

Example #6

Source File: test_numeric.py From recruit with Apache License 2.0

6 votes

def test_error(self):
        s = pd.Series([1, -3.14, 'apple'])
        msg = 'Unable to parse string "apple" at position 2'
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors='raise')

        res = to_numeric(s, errors='ignore')
        expected = pd.Series([1, -3.14, 'apple'])
        tm.assert_series_equal(res, expected)

        res = to_numeric(s, errors='coerce')
        expected = pd.Series([1, -3.14, np.nan])
        tm.assert_series_equal(res, expected)

        s = pd.Series(['orange', 1, -3.14, 'apple'])
        msg = 'Unable to parse string "orange" at position 0'
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors='raise')

Example #7

Source File: consensus.py From Comparative-Annotation-Toolkit with Apache License 2.0

6 votes

def load_metrics_from_db(db_path, tx_mode, aln_mode):
    """
    Loads the alignment metrics for the mRNA/CDS alignments of transMap/AugustusTM/TMR
    """
    session = tools.sqlInterface.start_session(db_path)
    metrics_table = tools.sqlInterface.tables[aln_mode][tx_mode]['metrics']
    metrics_df = tools.sqlInterface.load_metrics(metrics_table, session)
    # unstack flattens the long-form data structure
    metrics_df = metrics_df.set_index(['AlignmentId', 'classifier']).unstack('classifier')
    metrics_df.columns = [col[1] for col in metrics_df.columns]
    metrics_df = metrics_df.reset_index()
    cols = ['AlnCoverage', 'AlnGoodness', 'AlnIdentity', 'PercentUnknownBases']
    metrics_df[cols] = metrics_df[cols].apply(pd.to_numeric)
    metrics_df['OriginalIntrons'] = metrics_df['OriginalIntrons'].fillna('')
    metrics_df['OriginalIntrons'] = [list(map(int, x)) if len(x[0]) > 0 else [] for x in
                                     metrics_df['OriginalIntrons'].str.split(',').tolist()]
    metrics_df['OriginalIntronsPercent'] = metrics_df['OriginalIntrons'].apply(calculate_vector_support, resolve_nan=1)
    session.close()
    return metrics_df

Example #8

Source File: test_numeric.py From vnpy_crypto with MIT License

6 votes

def test_str(self):
        idx = pd.Index(['1', '2', '3'], name='xxx')
        exp = np.array([1, 2, 3], dtype='int64')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp)

        idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx')
        exp = np.array([1.5, 2.7, 3.4])
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp)

Example #9

Source File: test_numeric.py From vnpy_crypto with MIT License

6 votes

def test_numeric_dtypes(self):
        idx = pd.Index([1, 2, 3], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values)

        idx = pd.Index([1., np.nan, 3., np.nan], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values)

Example #10

Source File: test_numeric.py From recruit with Apache License 2.0

6 votes

def test_numeric_lists_and_arrays(self):
        # Test to_numeric with embedded lists and arrays
        df = pd.DataFrame(dict(
            a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 1.6, 0.1],
        ))
        tm.assert_frame_equal(df, expected)

        df = pd.DataFrame(dict(
            a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 0.1],
        ))
        tm.assert_frame_equal(df, expected)

Example #11

Source File: test_numeric.py From vnpy_crypto with MIT License

6 votes

def test_numeric_lists_and_arrays(self):
        # Test to_numeric with embedded lists and arrays
        df = pd.DataFrame(dict(
            a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 1.6, 0.1],
        ))
        tm.assert_frame_equal(df, expected)

        df = pd.DataFrame(dict(
            a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 0.1],
        ))
        tm.assert_frame_equal(df, expected)

Example #12

Source File: vector_to_cube.py From geocube with BSD 3-Clause "New" or "Revised" License

6 votes

def _format_series_data(data_series):
    """
    The purpose of this function is to convert the series data into a rasterizeable
    format if possible.

    Parameters
    ----------
    data_series: :obj:`geopandas.GeoSeries`
        The series to be converted.

    Returns
    -------
    :obj:`geopandas.GeoSeries`: The series that was converted if possible.

    """
    if "datetime" in str(data_series.dtype):
        data_series = pandas.to_numeric(data_series).astype(numpy.float64)
        get_logger().warning(
            f"The series '{data_series.name}' was converted from a date to a number to "
            "rasterize the data. To load the data back in as a date, "
            "use 'pandas.to_datetime()'."
        )
    elif str(data_series.dtype) == "category":
        data_series = data_series.cat.codes
    return data_series

Example #13

Source File: pricing.py From thewarden with MIT License

6 votes

def df_fx(self, currency, fx_provider):
        try:
            # First get the df from this currency
            if currency != 'USD':
                fx = PriceData(currency, fx_provider)
                fx.df = fx.df.rename(columns={'close': 'fx_close'})
                fx.df["fx_close"] = pd.to_numeric(fx.df.fx_close,
                                                  errors='coerce')
                # Merge the two dfs:
                merge_df = pd.merge(self.df, fx.df, on='date', how='inner')
                merge_df['close'] = merge_df['close'].astype(float)
                merge_df['close_converted'] = merge_df['close'] * merge_df[
                    'fx_close']
                return (merge_df)
            else:  # If currency is USD no conversion is needed - prices are all in USD
                self.df['fx_close'] = 1
                self.df['close_converted'] = self.df['close'].astype(float)
                return (self.df)
        except Exception as e:
            self.errors.append(e)
            return (None)

Example #14

Source File: test_numeric.py From recruit with Apache License 2.0

6 votes

def test_str(self):
        idx = pd.Index(['1', '2', '3'], name='xxx')
        exp = np.array([1, 2, 3], dtype='int64')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp)

        idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx')
        exp = np.array([1.5, 2.7, 3.4])
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp)

Example #15

Source File: plotter.py From pygraphistry with BSD 3-Clause "New" or "Revised" License

6 votes

def _sanitize_dataset(self, edges, nodes, nodeid):
        self._check_bound_attribs(edges, ['source', 'destination'], 'Edge')
        elist = edges.reset_index(drop=True) \
                     .dropna(subset=[self._source, self._destination])

        obj_df = elist.select_dtypes(include=[numpy.object_])
        elist[obj_df.columns] = obj_df.apply(pandas.to_numeric, errors='ignore')

        if nodes is None:
            nodes = pandas.DataFrame()
            nodes[nodeid] = pandas.concat([edges[self._source], edges[self._destination]],
                                           ignore_index=True).drop_duplicates()
        else:
            self._check_bound_attribs(nodes, ['node'], 'Vertex')

        nlist = nodes.reset_index(drop=True) \
                     .dropna(subset=[nodeid]) \
                     .drop_duplicates(subset=[nodeid])

        obj_df = nlist.select_dtypes(include=[numpy.object_])
        nlist[obj_df.columns] = obj_df.apply(pandas.to_numeric, errors='ignore')

        return (elist, nlist)

Example #16

Source File: universal.py From xalpha with MIT License

6 votes

def __init__(
        self, code, name=None, start=None, end=None, rate=0, col="close", **kws
    ):
        if not name:
            try:
                name = get_rt(code)["name"]
            except:
                name = code
        self.name = name
        self.code = code
        self.start = start  # None is one year ago
        self.end = end  # None is yesterday
        df = get_daily(code, start=start, end=end)
        df[col] = pd.to_numeric(df[col])  # in case the col is not float
        df["totvalue"] = df[col]
        df["netvalue"] = df[col] / df.iloc[0][col]
        self.price = df
        self.round_label = kws.get("round_label", 0)
        self.dividend_label = kws.get("dividend_label", 0)
        self.value_label = kws.get("value_label", 1)  # 默认按金额赎回
        self.specialdate = []
        self.fenhongdate = []
        self.zhesuandate = []
        self.rate = rate

Example #17

Source File: test_protobuf.py From pygraphistry with BSD 3-Clause "New" or "Revised" License

6 votes

def test_metadata_no_nan(self, mock_etl2, mock_open):
        edges = triangleEdges.copy()
        edges['testNone'] = triangleNodes.a1.map(lambda x: numpy.nan)
        edges['testNone'] = pandas.to_numeric(edges.testNone, errors='ignore')
        edges['testInt'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 0)
        edges['testFloat'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 0.5)
        edges['testString'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 'foo')
        edges['testBool'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else True)
        graphistry.bind(source='src', destination='dst', node='id').plot(edges)
        dataset = mock_etl2.call_args[0][0]

        #for attrib in ['testInt', 'testFloat', 'testString', 'testBool', 'testNone']:
        #    for entry in list(dataset['attributes']['edges'][attrib]['aggregations'].values()):
        #        if entry is None or isinstance(entry, str):
        #            pass
        #        else:
        #            self.assertFalse(numpy.isnan(entry))

Example #18

Source File: test_numeric.py From recruit with Apache License 2.0

6 votes

def test_numeric_dtypes(self):
        idx = pd.Index([1, 2, 3], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values)

        idx = pd.Index([1., np.nan, 3., np.nan], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values)

Example #19

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_numeric(self):
        s = pd.Series([1, -3.14, 7], dtype='O')
        res = to_numeric(s)
        expected = pd.Series([1, -3.14, 7])
        tm.assert_series_equal(res, expected)

        s = pd.Series([1, -3.14, 7])
        res = to_numeric(s)
        tm.assert_series_equal(res, expected)

        # GH 14827
        df = pd.DataFrame(dict(
            a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'],
            b=[1.0, 2.0, 3.0, 4.0],
        ))
        expected = pd.DataFrame(dict(
            a=[1.2, 3.14, np.inf, 0.1],
            b=[1.0, 2.0, 3.0, 4.0],
        ))

        # Test to_numeric over one column
        df_copy = df.copy()
        df_copy['a'] = df_copy['a'].apply(to_numeric)
        tm.assert_frame_equal(df_copy, expected)

        # Test to_numeric over multiple columns
        df_copy = df.copy()
        df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric)
        tm.assert_frame_equal(df_copy, expected)

Example #20

Source File: fix_yahoo_finance.py From StockRecommendSystem with MIT License

5 votes

def parse_ticker_csv(csv_str, auto_adjust):
    df = pd.read_csv(csv_str, index_col=0, error_bad_lines=False, warn_bad_lines=False
                     ).replace('null', np.nan).dropna()

    df.index = pd.to_datetime(df.index)
    df = df.apply(pd.to_numeric)
    df['Volume'] = df['Volume'].fillna(0).astype(int)

    if auto_adjust:
        ratio = df["Close"] / df["Adj Close"]
        df["Adj Open"] = df["Open"] / ratio
        df["Adj High"] = df["High"] / ratio
        df["Adj Low"] = df["Low"] / ratio

        df.drop(
            ["Open", "High", "Low", "Close"],
            axis=1, inplace=True)

        df.rename(columns={
            "Adj Open": "Open", "Adj High": "High",
            "Adj Low": "Low", "Adj Close": "Close"
        }, inplace=True)

        df = df[['Open', 'High', 'Low', 'Close', 'Volume']]

    return df.groupby(df.index).first()

Example #21

Source File: iroc_reader.py From gordo with GNU Affero General Public License v3.0

5 votes

def read_iroc_file(
    file_obj,
    train_start_date: datetime,
    train_end_date: datetime,
    tag_list: List[SensorTag],
) -> pd.DataFrame:
    """
    Reads a single iroc timeseries csv, and returns it as a pandas.DataFrame.
    The returned dataframe has timestamps as a DateTimeIndex, and upto one column
    per tag in tag_list, but excluding any tags which does not exist in the csv.

    Parameters
    ----------
    file_obj: str or path object or file-like object
        File object to read iroc timeseries data from
    train_start_date
        Only keep timestamps later or equal than this
    train_end_date
        Only keep timestamps earlier than this
    tag_list
        Only keep tags in this list.

    Returns
    -------
    pd.DataFrame
        Dataframe with timestamps as a DateTimeIndex, and up to one column
        per tag in tag_list, but excluding any tags which does not exist in the
        csv.

    """
    df = pd.read_csv(file_obj, sep=",", usecols=["tag", "value", "timestamp"])
    df = df[df["tag"].isin(to_list_of_strings(tag_list))]
    # Note, there are some "digital" sensors with string values,
    # now they are just NaN converted
    df["value"] = df["value"].apply(pd.to_numeric, errors="coerce", downcast="float")
    df.dropna(inplace=True, subset=["value"])
    df["timestamp"] = pd.to_datetime(df["timestamp"], utc=True)
    df = df.pivot(index="timestamp", columns="tag")
    df = df[(df.index >= train_start_date) & (df.index < train_end_date)]
    df.columns = df.columns.droplevel(0)
    return df

Example #22

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_datetimelike(self):
        for tz in [None, 'US/Eastern', 'Asia/Tokyo']:
            idx = pd.date_range('20130101', periods=3, tz=tz, name='xxx')
            res = pd.to_numeric(idx)
            tm.assert_index_equal(res, pd.Index(idx.asi8, name='xxx'))

            res = pd.to_numeric(pd.Series(idx, name='xxx'))
            tm.assert_series_equal(res, pd.Series(idx.asi8, name='xxx'))

            res = pd.to_numeric(idx.values)
            tm.assert_numpy_array_equal(res, idx.asi8)

Example #23

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_list_numeric(self):
        s = [1, 3, 4, 5]
        res = to_numeric(s)
        tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64))

        s = [1., 3., 4., 5.]
        res = to_numeric(s)
        tm.assert_numpy_array_equal(res, np.array(s))

        # bool is regarded as numeric
        s = [True, False, True, True]
        res = to_numeric(s)
        tm.assert_numpy_array_equal(res, np.array(s))

Example #24

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_list(self):
        s = ['1', '-3.14', '7']
        res = to_numeric(s)
        expected = np.array([1, -3.14, 7])
        tm.assert_numpy_array_equal(res, expected)

Example #25

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_series_numeric(self):
        s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX')
        res = to_numeric(s)
        tm.assert_series_equal(res, s)

        s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX')
        res = to_numeric(s)
        tm.assert_series_equal(res, s)

        # bool is regarded as numeric
        s = pd.Series([True, False, True, True],
                      index=list('ABCD'), name='XXX')
        res = to_numeric(s)
        tm.assert_series_equal(res, s)

Example #26

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_series(self):
        s = pd.Series(['1', '-3.14', '7'])
        res = to_numeric(s)
        expected = pd.Series([1, -3.14, 7])
        tm.assert_series_equal(res, expected)

        s = pd.Series(['1', '-3.14', 7])
        res = to_numeric(s)
        tm.assert_series_equal(res, expected)

Example #27

Source File: test_numeric.py From vnpy_crypto with MIT License

5 votes

def test_empty(self):
        # see gh-16302
        s = pd.Series([], dtype=object)

        res = to_numeric(s)
        expected = pd.Series([], dtype=np.int64)

        tm.assert_series_equal(res, expected)

        # Original issue example
        res = to_numeric(s, errors='coerce', downcast='integer')
        expected = pd.Series([], dtype=np.int8)

        tm.assert_series_equal(res, expected)

Example #28

Source File: mediation.py From vnpy_crypto with MIT License

5 votes

def summary(self, alpha=0.05):
        """
        Provide a summary of a mediation analysis.
        """

        columns = ["Estimate", "Lower CI bound", "Upper CI bound", "P-value"]
        index = ["ACME (control)", "ACME (treated)", "ADE (control)", "ADE (treated)",
                 "Total effect", "Prop. mediated (control)", "Prop. mediated (treated)",
                 "ACME (average)", "ADE (average)", "Prop. mediated (average)"]
        smry = pd.DataFrame(columns=columns, index=index)

        for i, vec in enumerate([self.ACME_ctrl, self.ACME_tx, self.ADE_ctrl, self.ADE_tx,
                                 self.total_effect, self.prop_med_ctrl,
                                 self.prop_med_tx, self.ACME_avg, self.ADE_avg,
                                 self.prop_med_avg]):

            if ((vec is self.prop_med_ctrl) or (vec is self.prop_med_tx) or
                (vec is self.prop_med_avg)):
                smry.iloc[i, 0] = np.median(vec)
            else:
                smry.iloc[i, 0] = vec.mean()
            smry.iloc[i, 1] = np.percentile(vec, 100 * alpha / 2)
            smry.iloc[i, 2] = np.percentile(vec, 100 * (1 - alpha / 2))
            smry.iloc[i, 3] = _pvalue(vec)

        if pdc.version < '0.17.0':  # pragma: no cover
            smry = smry.convert_objects(convert_numeric=True)
        else:  # pragma: no cover
            smry = smry.apply(pd.to_numeric, errors='coerce')

        return smry

Example #29

Source File: china_etf_day_kdata_recorder.py From zvt with MIT License

5 votes

def fetch_cumulative_net_value(self, security_item, start, end) -> pd.DataFrame:
        query_url = 'http://api.fund.eastmoney.com/f10/lsjz?' \
                    'fundCode={}&pageIndex={}&pageSize=200&startDate={}&endDate={}'

        page = 1
        df = pd.DataFrame()
        while True:
            url = query_url.format(security_item.code, page, to_time_str(start), to_time_str(end))

            response = requests.get(url, headers=EASTMONEY_ETF_NET_VALUE_HEADER)
            response_json = demjson.decode(response.text)
            response_df = pd.DataFrame(response_json['Data']['LSJZList'])

            # 最后一页
            if response_df.empty:
                break

            response_df['FSRQ'] = pd.to_datetime(response_df['FSRQ'])
            response_df['JZZZL'] = pd.to_numeric(response_df['JZZZL'], errors='coerce')
            response_df['LJJZ'] = pd.to_numeric(response_df['LJJZ'], errors='coerce')
            response_df = response_df.fillna(0)
            response_df.set_index('FSRQ', inplace=True, drop=True)

            df = pd.concat([df, response_df])
            page += 1

            self.sleep()

        return df

Example #30

Source File: pipeline.py From xbbg with Apache License 2.0

5 votes

def format_raw(data: pd.DataFrame) -> pd.DataFrame:
    """
    Convert data to datetime if possible

    Examples:
        >>> dvd = pd.read_pickle('xbbg/tests/data/sample_dvd_mc_raw.pkl')
        >>> dvd.dtypes
        Declared Date          object
        Ex-Date                object
        Record Date            object
        Payable Date           object
        Dividend Amount       float64
        Dividend Frequency     object
        Dividend Type          object
        dtype: object
        >>> dvd.pipe(format_raw).dtypes
        Declared Date         datetime64[ns]
        Ex-Date               datetime64[ns]
        Record Date           datetime64[ns]
        Payable Date          datetime64[ns]
        Dividend Amount              float64
        Dividend Frequency            object
        Dividend Type                 object
        dtype: object
    """
    res = data.apply(pd.to_numeric, errors='ignore')
    dtypes = data.dtypes
    cols = dtypes.loc[
        dtypes.isin([np.dtype('O')]) | data.columns.str.contains('UPDATE_STAMP')
    ].index
    if not cols.empty:
        res.loc[:, cols] = data.loc[:, cols].apply(pd.to_datetime, errors='ignore')
    return res