Python pandas.to_numeric() Examples

The following are 30 code examples of pandas.to_numeric(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: toolbox.py    From xalpha with MIT License 7 votes vote down vote up
def __init__(self, code, start=None, end=None):
        """

        :param code: str. 指数代码,eg. SH000016
        :param start:
        :param end:
        """
        df = xu.get_daily("teb-" + code, start=start, end=end)
        df["e"] = pd.to_numeric(df["e"])
        df["b"] = pd.to_numeric(df["b"])
        df["lnb"] = df["b"].apply(lambda s: np.log(s))
        df["lne"] = df["e"].apply(lambda s: np.log(s))
        df["roe"] = df["e"] / df["b"] * 100
        df["date_count"] = (df["date"] - df["date"].iloc[0]).apply(
            lambda s: int(s.days)
        )
        self.df = df
        self.fit(verbose=False) 
Example #2
Source File: universal.py    From xalpha with MIT License 6 votes vote down vote up
def __init__(
        self, code, name=None, start=None, end=None, rate=0, col="close", **kws
    ):
        if not name:
            try:
                name = get_rt(code)["name"]
            except:
                name = code
        self.name = name
        self.code = code
        self.start = start  # None is one year ago
        self.end = end  # None is yesterday
        df = get_daily(code, start=start, end=end)
        df[col] = pd.to_numeric(df[col])  # in case the col is not float
        df["totvalue"] = df[col]
        df["netvalue"] = df[col] / df.iloc[0][col]
        self.price = df
        self.round_label = kws.get("round_label", 0)
        self.dividend_label = kws.get("dividend_label", 0)
        self.value_label = kws.get("value_label", 1)  # 默认按金额赎回
        self.specialdate = []
        self.fenhongdate = []
        self.zhesuandate = []
        self.rate = rate 
Example #3
Source File: pricing.py    From thewarden with MIT License 6 votes vote down vote up
def df_fx(self, currency, fx_provider):
        try:
            # First get the df from this currency
            if currency != 'USD':
                fx = PriceData(currency, fx_provider)
                fx.df = fx.df.rename(columns={'close': 'fx_close'})
                fx.df["fx_close"] = pd.to_numeric(fx.df.fx_close,
                                                  errors='coerce')
                # Merge the two dfs:
                merge_df = pd.merge(self.df, fx.df, on='date', how='inner')
                merge_df['close'] = merge_df['close'].astype(float)
                merge_df['close_converted'] = merge_df['close'] * merge_df[
                    'fx_close']
                return (merge_df)
            else:  # If currency is USD no conversion is needed - prices are all in USD
                self.df['fx_close'] = 1
                self.df['close_converted'] = self.df['close'].astype(float)
                return (self.df)
        except Exception as e:
            self.errors.append(e)
            return (None) 
Example #4
Source File: test_numeric.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_error(self):
        s = pd.Series([1, -3.14, 'apple'])
        msg = 'Unable to parse string "apple" at position 2'
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors='raise')

        res = to_numeric(s, errors='ignore')
        expected = pd.Series([1, -3.14, 'apple'])
        tm.assert_series_equal(res, expected)

        res = to_numeric(s, errors='coerce')
        expected = pd.Series([1, -3.14, np.nan])
        tm.assert_series_equal(res, expected)

        s = pd.Series(['orange', 1, -3.14, 'apple'])
        msg = 'Unable to parse string "orange" at position 0'
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors='raise') 
Example #5
Source File: test_numeric.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_numeric_lists_and_arrays(self):
        # Test to_numeric with embedded lists and arrays
        df = pd.DataFrame(dict(
            a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 1.6, 0.1],
        ))
        tm.assert_frame_equal(df, expected)

        df = pd.DataFrame(dict(
            a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 0.1],
        ))
        tm.assert_frame_equal(df, expected) 
Example #6
Source File: test_numeric.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_numeric_dtypes(self):
        idx = pd.Index([1, 2, 3], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values)

        idx = pd.Index([1., np.nan, 3., np.nan], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values) 
Example #7
Source File: test_numeric.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_str(self):
        idx = pd.Index(['1', '2', '3'], name='xxx')
        exp = np.array([1, 2, 3], dtype='int64')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp)

        idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx')
        exp = np.array([1.5, 2.7, 3.4])
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp) 
Example #8
Source File: test_numeric.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_coerce_uint64_conflict(self):
        # see gh-17007 and gh-17125
        #
        # Still returns float despite the uint64-nan conflict,
        # which would normally force the casting to object.
        df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]})
        expected = pd.Series([200, 300, np.nan, np.nan,
                              30000000000000000000], dtype=float, name="a")
        result = to_numeric(df["a"], errors="coerce")
        tm.assert_series_equal(result, expected)

        s = pd.Series(["12345678901234567890", "1234567890", "ITEM"])
        expected = pd.Series([12345678901234567890,
                              1234567890, np.nan], dtype=float)
        result = to_numeric(s, errors="coerce")
        tm.assert_series_equal(result, expected)

        # For completeness, check against "ignore" and "raise"
        result = to_numeric(s, errors="ignore")
        tm.assert_series_equal(result, s)

        msg = "Unable to parse string"
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors="raise") 
Example #9
Source File: test_protobuf.py    From pygraphistry with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_metadata_no_nan(self, mock_etl2, mock_open):
        edges = triangleEdges.copy()
        edges['testNone'] = triangleNodes.a1.map(lambda x: numpy.nan)
        edges['testNone'] = pandas.to_numeric(edges.testNone, errors='ignore')
        edges['testInt'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 0)
        edges['testFloat'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 0.5)
        edges['testString'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else 'foo')
        edges['testBool'] = triangleNodes.a1.map(lambda x: numpy.nan if x%2 == 1 else True)
        graphistry.bind(source='src', destination='dst', node='id').plot(edges)
        dataset = mock_etl2.call_args[0][0]

        #for attrib in ['testInt', 'testFloat', 'testString', 'testBool', 'testNone']:
        #    for entry in list(dataset['attributes']['edges'][attrib]['aggregations'].values()):
        #        if entry is None or isinstance(entry, str):
        #            pass
        #        else:
        #            self.assertFalse(numpy.isnan(entry)) 
Example #10
Source File: plotter.py    From pygraphistry with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _sanitize_dataset(self, edges, nodes, nodeid):
        self._check_bound_attribs(edges, ['source', 'destination'], 'Edge')
        elist = edges.reset_index(drop=True) \
                     .dropna(subset=[self._source, self._destination])

        obj_df = elist.select_dtypes(include=[numpy.object_])
        elist[obj_df.columns] = obj_df.apply(pandas.to_numeric, errors='ignore')

        if nodes is None:
            nodes = pandas.DataFrame()
            nodes[nodeid] = pandas.concat([edges[self._source], edges[self._destination]],
                                           ignore_index=True).drop_duplicates()
        else:
            self._check_bound_attribs(nodes, ['node'], 'Vertex')

        nlist = nodes.reset_index(drop=True) \
                     .dropna(subset=[nodeid]) \
                     .drop_duplicates(subset=[nodeid])

        obj_df = nlist.select_dtypes(include=[numpy.object_])
        nlist[obj_df.columns] = obj_df.apply(pandas.to_numeric, errors='ignore')

        return (elist, nlist) 
Example #11
Source File: data_loader.py    From PADME with MIT License 6 votes vote down vote up
def get_user_specified_features(df, featurizer, verbose=True):
  """Extract and merge user specified features. 

  Merge features included in dataset provided by user
  into final features dataframe

  Three types of featurization here:

    1) Molecule featurization
      -) Smiles string featurization
      -) Rdkit MOL featurization
    2) Complex featurization
      -) PDB files for interacting molecules.
    3) User specified featurizations.

  """
  time1 = time.time()
  df[featurizer.feature_fields] = df[featurizer.feature_fields].apply(
      pd.to_numeric)
  X_shard = df.as_matrix(columns=featurizer.feature_fields)
  time2 = time.time()
  log("TIMING: user specified processing took %0.3f s" % (time2 - time1),
      verbose)
  return X_shard 
Example #12
Source File: test_numeric.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_error(self):
        s = pd.Series([1, -3.14, 'apple'])
        msg = 'Unable to parse string "apple" at position 2'
        with tm.assert_raises_regex(ValueError, msg):
            to_numeric(s, errors='raise')

        res = to_numeric(s, errors='ignore')
        expected = pd.Series([1, -3.14, 'apple'])
        tm.assert_series_equal(res, expected)

        res = to_numeric(s, errors='coerce')
        expected = pd.Series([1, -3.14, np.nan])
        tm.assert_series_equal(res, expected)

        s = pd.Series(['orange', 1, -3.14, 'apple'])
        msg = 'Unable to parse string "orange" at position 0'
        with tm.assert_raises_regex(ValueError, msg):
            to_numeric(s, errors='raise') 
Example #13
Source File: test_numeric.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_numeric_lists_and_arrays(self):
        # Test to_numeric with embedded lists and arrays
        df = pd.DataFrame(dict(
            a=[[decimal.Decimal(3.14), 1.0], decimal.Decimal(1.6), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 1.6, 0.1],
        ))
        tm.assert_frame_equal(df, expected)

        df = pd.DataFrame(dict(
            a=[np.array([decimal.Decimal(3.14), 1.0]), 0.1]
        ))
        df['a'] = df['a'].apply(to_numeric)
        expected = pd.DataFrame(dict(
            a=[[3.14, 1.0], 0.1],
        ))
        tm.assert_frame_equal(df, expected) 
Example #14
Source File: test_numeric.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_numeric_dtypes(self):
        idx = pd.Index([1, 2, 3], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values)

        idx = pd.Index([1., np.nan, 3., np.nan], name='xxx')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, idx)

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(idx, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, idx.values) 
Example #15
Source File: test_numeric.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_str(self):
        idx = pd.Index(['1', '2', '3'], name='xxx')
        exp = np.array([1, 2, 3], dtype='int64')
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp)

        idx = pd.Index(['1.5', '2.7', '3.4'], name='xxx')
        exp = np.array([1.5, 2.7, 3.4])
        res = pd.to_numeric(idx)
        tm.assert_index_equal(res, pd.Index(exp, name='xxx'))

        res = pd.to_numeric(pd.Series(idx, name='xxx'))
        tm.assert_series_equal(res, pd.Series(exp, name='xxx'))

        res = pd.to_numeric(idx.values)
        tm.assert_numpy_array_equal(res, exp) 
Example #16
Source File: test_numeric.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_coerce_uint64_conflict(self):
        # see gh-17007 and gh-17125
        #
        # Still returns float despite the uint64-nan conflict,
        # which would normally force the casting to object.
        df = pd.DataFrame({"a": [200, 300, "", "NaN", 30000000000000000000]})
        expected = pd.Series([200, 300, np.nan, np.nan,
                              30000000000000000000], dtype=float, name="a")
        result = to_numeric(df["a"], errors="coerce")
        tm.assert_series_equal(result, expected)

        s = pd.Series(["12345678901234567890", "1234567890", "ITEM"])
        expected = pd.Series([12345678901234567890,
                              1234567890, np.nan], dtype=float)
        result = to_numeric(s, errors="coerce")
        tm.assert_series_equal(result, expected)

        # For completeness, check against "ignore" and "raise"
        result = to_numeric(s, errors="ignore")
        tm.assert_series_equal(result, s)

        msg = "Unable to parse string"
        with tm.assert_raises_regex(ValueError, msg):
            to_numeric(s, errors="raise") 
Example #17
Source File: consensus.py    From Comparative-Annotation-Toolkit with Apache License 2.0 6 votes vote down vote up
def load_metrics_from_db(db_path, tx_mode, aln_mode):
    """
    Loads the alignment metrics for the mRNA/CDS alignments of transMap/AugustusTM/TMR
    """
    session = tools.sqlInterface.start_session(db_path)
    metrics_table = tools.sqlInterface.tables[aln_mode][tx_mode]['metrics']
    metrics_df = tools.sqlInterface.load_metrics(metrics_table, session)
    # unstack flattens the long-form data structure
    metrics_df = metrics_df.set_index(['AlignmentId', 'classifier']).unstack('classifier')
    metrics_df.columns = [col[1] for col in metrics_df.columns]
    metrics_df = metrics_df.reset_index()
    cols = ['AlnCoverage', 'AlnGoodness', 'AlnIdentity', 'PercentUnknownBases']
    metrics_df[cols] = metrics_df[cols].apply(pd.to_numeric)
    metrics_df['OriginalIntrons'] = metrics_df['OriginalIntrons'].fillna('')
    metrics_df['OriginalIntrons'] = [list(map(int, x)) if len(x[0]) > 0 else [] for x in
                                     metrics_df['OriginalIntrons'].str.split(',').tolist()]
    metrics_df['OriginalIntronsPercent'] = metrics_df['OriginalIntrons'].apply(calculate_vector_support, resolve_nan=1)
    session.close()
    return metrics_df 
Example #18
Source File: vector_to_cube.py    From geocube with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _format_series_data(data_series):
    """
    The purpose of this function is to convert the series data into a rasterizeable
    format if possible.

    Parameters
    ----------
    data_series: :obj:`geopandas.GeoSeries`
        The series to be converted.

    Returns
    -------
    :obj:`geopandas.GeoSeries`: The series that was converted if possible.

    """
    if "datetime" in str(data_series.dtype):
        data_series = pandas.to_numeric(data_series).astype(numpy.float64)
        get_logger().warning(
            f"The series '{data_series.name}' was converted from a date to a number to "
            "rasterize the data. To load the data back in as a date, "
            "use 'pandas.to_datetime()'."
        )
    elif str(data_series.dtype) == "category":
        data_series = data_series.cat.codes
    return data_series 
Example #19
Source File: pre_submission.py    From MPContribs with MIT License 5 votes vote down vote up
def run(mpfile):
    identifier = mpfile.ids[0]
    xcol, ycol = "V [V]", "J {}°C {} [mA/cm²]"
    full_df = None
    for fn in sorted(glob(os.path.join("Data", "Figure 4", "*_01_DIV.txt"))):
        with open(fn, "r") as f:
            name = os.path.splitext(os.path.basename(fn))[0]
            body = "\n".join(["\t".join([xcol, ycol]), f.read()])
            df = (
                read_csv(body, sep="\t")
                .apply(to_numeric, errors="coerce")
                .sort_values(by=[xcol])
            )
            if full_df is None:
                full_df = df[xcol].to_frame()

            offset = 0.0
            if "fwd_dB_p3" in name:
                offset = -6.70273000e-11
            elif "rev_dB_p3" in name:
                offset = 4.49694000e-10
            elif "fwd_dG_p6" in name:
                offset = -8.90037000e-11
            elif "rev_dG_p6" in name:
                offset = 8.42196000e-10

            temp = name[4:].split("CZnO", 1)[0]
            direction = "fwd" if "fwd" in name else "rev"
            col = ycol.format(temp, direction)
            full_df[col] = (df[ycol] + offset).abs() * 1000.0 / 0.045

    mpfile.add_data_table(identifier, full_df, "JV|dark") 
Example #20
Source File: pre_submission.py    From MPContribs with MIT License 5 votes vote down vote up
def get_concentration_functions(composition_table_dict):

    meta = composition_table_dict["meta"]
    composition_table = Table.from_dict(composition_table_dict["data"])
    elements = [col for col in composition_table.columns if col not in meta]
    x = composition_table["X"].values
    y = composition_table["Y"].values
    cats = composition_table["X"].unique()
    concentration, conc, d, y_c, functions = {}, {}, {}, {}, RecursiveDict()

    for el in elements:
        concentration[el] = to_numeric(composition_table[el].values) / 100.0
        conc[el], d[el], y_c[el] = {}, {}, {}

        if meta["X"] == "category":
            for i in cats:
                k = "{:06.2f}".format(float(i))
                y_c[el][k] = to_numeric(y[where(x == i)])
                conc[el][k] = to_numeric(concentration[el][where(x == i)])
                d[el][k] = interp1d(y_c[el][k], conc[el][k])

            functions[el] = lambda a, b, el=el: d[el][a](b)

        else:
            functions[el] = interp2d(float(x), float(y), concentration[el])

    return functions 
Example #21
Source File: process_vehicles.py    From performance_tracker with GNU General Public License v3.0 5 votes vote down vote up
def process_raw_vehicles(df, track):
    df = df.drop_duplicates(
        subset=["report_time", "latitude", "longitude", "vehicle_id"]
    )
    df = df[df["predictable"] == True]

    df["latitude"] = pd.to_numeric(df.latitude)
    df["longitude"] = pd.to_numeric(df.longitude)
    df = toGDF(df)

    mask_0 = (df["direction"] == 0) | (df["direction"] == 90)
    mask_1 = (df["direction"] == 180) | (df["direction"] == 270)
    df_0 = df.loc[mask_0]
    df_0 = df_0.assign(direction_id = 0)
    df_1 = df.loc[mask_1]
    df_1 = df_1.assign(direction_id = 1)
    df_0["relative_position"] = findRelativePositions(df_0, track[0])
    df_1["relative_position"] = findRelativePositions(df_1, track[1])
    df = pd.concat([df_0, df_1])

    df["datetime"] = pd.to_datetime(df["report_time"], utc=True)
    df["datetime_local_iso8601"] = df.report_time.apply(
        lambda dt: pendulum.parse(dt, tz="UTC")
        .in_tz("America/Los_Angeles")
        .to_iso8601_string()
    )
    df = df.reset_index(drop=True)  # necessary both before and after getTrips
    df = getTrips(df)
    df = df.reset_index(drop=True)  # necessary both before and after getTrips
    df["datetime"] = df["datetime_local_iso8601"]
    df = df[["datetime", "trip_id", "direction_id", "relative_position"]]
    return df 
Example #22
Source File: estimate_arrivals.py    From performance_tracker with GNU General Public License v3.0 5 votes vote down vote up
def estimate_arrivals(trip_id, trip, stations, direction):
    trip.loc[:, "estimate"] = False
    stations.loc[:, "estimate"] = True
    trip_est = stations
    trip_est.loc[:, "trip_id"] = trip_id
    trip_est.loc[:, "direction_id"] = direction
    combined = trip.append(trip_est)
    combined = combined.sort_values("relative_position")
    combined = combined.reset_index(drop=True)
    # shift vals to move adjacent position and date data into each row
    combined.loc[:, "previous_pos"] = combined.relative_position.shift()
    combined.loc[:, "next_pos"] = combined.relative_position.shift(-1)
    combined.loc[:, "previous_dt"] = combined.datetime.shift()
    combined.loc[:, "next_dt"] = combined.datetime.shift(-1)
    select = combined[combined["estimate"] == True]
    select.loc[:, "weight"] = (select.relative_position - select.previous_pos) / (
        select.next_pos - select.previous_pos
    )
    select.loc[:, "time_interpolation"] = (
        select.next_dt - select.previous_dt
    ) * select.weight
    select.loc[:, "datetime"] = select.previous_dt + select.time_interpolation
    select.loc[:, "datetime"] = pd.DatetimeIndex(select.datetime).round("S")
    select.loc[:, "stop_id"] = pd.to_numeric(select.stop_id, downcast="integer")
    # Some station estimates cannot be reliably estimated using this
    # technique and will have datetime = NaT, so we remove them.
    select = select.dropna(subset=["datetime"])
    return select 
Example #23
Source File: plot.py    From sumo-rl with MIT License 5 votes vote down vote up
def plot_df(df, color, xaxis, yaxis, init_time=0, ma=1, acc=False, label=''):
    df[yaxis] = pd.to_numeric(df[yaxis], errors='coerce')  # convert NaN string to NaN value

    mean = df.groupby(xaxis).mean()[yaxis]
    std = df.groupby(xaxis).std()[yaxis]
    if ma > 1:
        mean = moving_average(mean, ma)
        std = moving_average(std, ma)

    x = df.groupby(xaxis)[xaxis].mean().keys().values
    plt.plot(x, mean, label=label, color=color, linestyle=next(dashes_styles))
    plt.fill_between(x, mean + std, mean - std, alpha=0.25, color=color, rasterized=True)
    
    #plt.ylim([0,200])
    #plt.xlim([40000, 70000]) 
Example #24
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_empty(self):
        # see gh-16302
        s = pd.Series([], dtype=object)

        res = to_numeric(s)
        expected = pd.Series([], dtype=np.int64)

        tm.assert_series_equal(res, expected)

        # Original issue example
        res = to_numeric(s, errors='coerce', downcast='integer')
        expected = pd.Series([], dtype=np.int8)

        tm.assert_series_equal(res, expected) 
Example #25
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_series(self):
        s = pd.Series(['1', '-3.14', '7'])
        res = to_numeric(s)
        expected = pd.Series([1, -3.14, 7])
        tm.assert_series_equal(res, expected)

        s = pd.Series(['1', '-3.14', 7])
        res = to_numeric(s)
        tm.assert_series_equal(res, expected) 
Example #26
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_series_numeric(self):
        s = pd.Series([1, 3, 4, 5], index=list('ABCD'), name='XXX')
        res = to_numeric(s)
        tm.assert_series_equal(res, s)

        s = pd.Series([1., 3., 4., 5.], index=list('ABCD'), name='XXX')
        res = to_numeric(s)
        tm.assert_series_equal(res, s)

        # bool is regarded as numeric
        s = pd.Series([True, False, True, True],
                      index=list('ABCD'), name='XXX')
        res = to_numeric(s)
        tm.assert_series_equal(res, s) 
Example #27
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_error_seen_bool(self):
        s = pd.Series([True, False, 'apple'])
        msg = 'Unable to parse string "apple" at position 2'
        with pytest.raises(ValueError, match=msg):
            to_numeric(s, errors='raise')

        res = to_numeric(s, errors='ignore')
        expected = pd.Series([True, False, 'apple'])
        tm.assert_series_equal(res, expected)

        # coerces to float
        res = to_numeric(s, errors='coerce')
        expected = pd.Series([1., 0., np.nan])
        tm.assert_series_equal(res, expected) 
Example #28
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_list_numeric(self):
        s = [1, 3, 4, 5]
        res = to_numeric(s)
        tm.assert_numpy_array_equal(res, np.array(s, dtype=np.int64))

        s = [1., 3., 4., 5.]
        res = to_numeric(s)
        tm.assert_numpy_array_equal(res, np.array(s))

        # bool is regarded as numeric
        s = [True, False, True, True]
        res = to_numeric(s)
        tm.assert_numpy_array_equal(res, np.array(s)) 
Example #29
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_numeric(self):
        s = pd.Series([1, -3.14, 7], dtype='O')
        res = to_numeric(s)
        expected = pd.Series([1, -3.14, 7])
        tm.assert_series_equal(res, expected)

        s = pd.Series([1, -3.14, 7])
        res = to_numeric(s)
        tm.assert_series_equal(res, expected)

        # GH 14827
        df = pd.DataFrame(dict(
            a=[1.2, decimal.Decimal(3.14), decimal.Decimal("infinity"), '0.1'],
            b=[1.0, 2.0, 3.0, 4.0],
        ))
        expected = pd.DataFrame(dict(
            a=[1.2, 3.14, np.inf, 0.1],
            b=[1.0, 2.0, 3.0, 4.0],
        ))

        # Test to_numeric over one column
        df_copy = df.copy()
        df_copy['a'] = df_copy['a'].apply(to_numeric)
        tm.assert_frame_equal(df_copy, expected)

        # Test to_numeric over multiple columns
        df_copy = df.copy()
        df_copy[['a', 'b']] = df_copy[['a', 'b']].apply(to_numeric)
        tm.assert_frame_equal(df_copy, expected) 
Example #30
Source File: test_numeric.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_all_nan(self):
        s = pd.Series(['a', 'b', 'c'])
        res = to_numeric(s, errors='coerce')
        expected = pd.Series([np.nan, np.nan, np.nan])
        tm.assert_series_equal(res, expected)