Python pandas.to_numeric() Examples

The following are code examples for showing how to use pandas.to_numeric(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: actions-for-actions   Author: gsig   File: charades.py    (license) View Source Project 7 votes vote down vote up
def load_submission(self, submission_file):
        loc_submission = pd.read_csv(submission_file, header=None)
        build_proc_sub = loc_submission[0].str.split(' ').values.tolist()
        assert len(build_proc_sub[0]) == self.n_classes + len(self.submission_columns)
        proc_sub = pd.DataFrame.from_records(build_proc_sub, columns=[self.submission_columns + list(range(self.n_classes))])
        if self.subset is not None:
            if type(proc_sub['frame_id'].values[0]) is np.ndarray:
                mask = [True if x[0] in self.subset else False for x in proc_sub['frame_id'].values]
            else:
                # old pandas version
                mask = [True if x in self.subset else False for x in proc_sub['frame_id'].values]
            proc_sub = proc_sub[mask]
            assert np.any(np.array(mask))
        num_proc_sub = proc_sub.apply(pd.to_numeric, errors='ignore')
        grouped_by_vid = num_proc_sub
        self.submission = grouped_by_vid 
Example 2
Project: TADPOLE   Author: noxtoby   File: TADPOLE_D1_D2.py    (license) View Source Project 6 votes vote down vote up
def checkFSXvalsAgainstADNIMERGE(tadpoleDF, mriADNI1FileFSX, otherSSvisCodeStr, ssNameTag,
                                 ignoreMissingCols = False):
  nrRows, nrCols = tadpoleDF.shape
  colListOtherSS = list(ssDF.columns.values)
  colListTadpoleDF = list(tadpoleDF.columns.values)

  tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]] = \
    tadpoleDF[['Hippocampus', 'ST29SV%s' % ssNameTag, 'ST88SV%s' % ssNameTag]].apply(pd.to_numeric, errors='coerce')


  tadpoleDF['HIPPOSUM'] = tadpoleDF['ST29SV%s' % ssNameTag] + tadpoleDF['ST88SV%s' % ssNameTag]
  for r in range(nrRows):

    valsNan = np.isnan(tadpoleDF['Hippocampus'][r]) or (np.isnan(tadpoleDF['ST29SV%s' % ssNameTag][r]) and \
                 np.isnan(tadpoleDF['ST88SV%s' % ssNameTag][r]))
    if valsNan:
      continue

    valsNotEq = tadpoleDF['Hippocampus'][r] != (tadpoleDF['ST29SV%s' % ssNameTag][r] + tadpoleDF['ST88SV%s' % ssNameTag][r])
    if valsNotEq:
      print('entries dont match\n ', tadpoleDF[['RID','VISCODE', 'Hippocampus', 'ST29SV%s' % ssNameTag,\
        'ST88SV%s' % ssNameTag, 'HIPPOSUM']].iloc[r])

  # Conclusion: the reason why entries above don't match is because UCSFFSX has duplicate entries for the same subject and viscode. 
Example 3
Project: betterself   Author: jeffshek   File: dataframe_builders.py    (license) View Source Project 6 votes vote down vote up
def build_dataframe(self):
        if not self.values.exists():
            return pd.DataFrame()

        # Am I really a programmer or just a lego assembler?
        # Pandas makes my life at least 20 times easier.
        df = pd.DataFrame.from_records(self.values, index=self.index_column)

        # make the columns and labels prettier
        if self.rename_columns:
            df = df.rename(columns=self.column_mapping)

        df.index.name = TIME_COLUMN_NAME
        try:
            df.index = df.index.tz_convert(self.user.pytz_timezone)
        except AttributeError:
            # if attribute-error means the index is just a regular Index and
            # that only dates (and not time) was passed
            df.index = pd.DatetimeIndex(df.index, tz=self.user.pytz_timezone)

        # cast it as numerics if possible, otherwise if we're dealing with strings, ignore
        df = df.apply(pd.to_numeric, errors='ignore')

        return df 
Example 4
Project: pyprocessmacro   Author: QuentinAndre   File: models.py    (license) View Source Project 6 votes vote down vote up
def _cond_ind_effects_wrapper(self):
        """
        A wrapper for the conditional indirect effects.
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the conditional indirect effects.
        """
        symb_to_var = self._symb_to_var
        results = self.estimation_results
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T
        cols_stats = ["Effect", "Boot SE", "BootLLCI", "BootULCI"]

        mod_values = self._moderators_values
        med_values = [[symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]]
        values = med_values + mod_values

        rows_levels = np.array([i for i in product(*values)])
        cols_levels = ["Mediator"] + [symb_to_var.get(x, x) for x in self._moderators_symb]

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = cols_levels + cols_stats
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"]) 
Example 5
Project: pyprocessmacro   Author: QuentinAndre   File: models.py    (license) View Source Project 6 votes vote down vote up
def _simple_ind_effects_wrapper(self):
        """
        A wrapper for the indirect effects (and for total/contrast effects if specified)
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the simple/total/constrasts of indirect effects.
        """
        symb_to_var = self._symb_to_var
        results = self.estimation_results
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T

        med_names = [symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]
        rows_levels = []
        if self._options["total"]:
            rows_levels += ["TOTAL"]
        rows_levels += med_names
        if self._options["contrast"]:
            contrasts = ["Contrast: {} vs. {}".format(a, b) for a, b in combinations(med_names, 2)]
            rows_levels += contrasts
        rows_levels = np.array(rows_levels).reshape(-1, 1)

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = ["", "Effect", "Boot SE", "BootLLCI", "BootULCI"]
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"]) 
Example 6
Project: pyprocessmacro   Author: QuentinAndre   File: models.py    (license) View Source Project 6 votes vote down vote up
def _PMM_index_wrapper(self):
        """
        A wrapper for the Partial Moderated Mediation index.
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the PMM index.
        """
        symb_to_var = self._symb_to_var
        results = self._PMM_index()
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T
        cols_stats = ["Index", "Boot SE", "LLCI", "ULCI"]

        mod_names = [[symb_to_var.get(i, i) for i in self._moderators_symb]]
        med_names = [[symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]]
        values = mod_names + med_names
        rows_levels = np.array([i for i in product(*values)])
        cols_levels = ["Moderator", "Mediator"]

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = cols_levels + cols_stats
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"]) 
Example 7
Project: pyprocessmacro   Author: QuentinAndre   File: models.py    (license) View Source Project 6 votes vote down vote up
def _MMM_index_wrapper(self):
        """
        A wrapper for the Moderated Moderated Mediation index.
        :return: pd.DataFrame
            A DataFrame of effects, se, llci, and ulci, for the CMM index.
        """
        symb_to_var = self._symb_to_var
        results = self._MMM_index()
        rows_stats = np.array([results["effect"], results["se"], results["llci"], results["ulci"]]).T
        cols_stats = ["Index", "Boot SE", "BootLLCI", "BootULCI"]

        med_names = [[symb_to_var.get('m{}'.format(i + 1), 'm{}'.format(i + 1)) for i in range(self._n_meds)]]
        rows_levels = np.array([i for i in product(*med_names)])
        cols_levels = ["Mediator"]

        rows = np.concatenate([rows_levels, rows_stats], axis=1)
        cols = cols_levels + cols_stats
        df = pd.DataFrame(rows, columns=cols, index=[""] * rows.shape[0])
        return df.apply(pd.to_numeric, args=["ignore"]) 
Example 8
Project: ML_algorithm   Author: luoshao23   File: DST_unsupervised.py    (license) View Source Project 6 votes vote down vote up
def clean_data(DT_df, attributes):
    """data preprocessing"""
    # DT_df = DT_df.drop(drop_cols, axis=1)
    DT_df["fs_scan_amt_pre"] = DT_df["fs_scan_amt_pre"].astype(float)
    DT_df["fs_scan_amt_pos"] = DT_df["fs_scan_amt_pos"].astype(float)
    DT_df["fs_scan_amt_pos_PF"] = DT_df["fs_scan_amt_pos_PF"].astype(float)
    DT_df["dyn_margin_amt_pre"] = DT_df["dyn_margin_amt_pre"].astype(float)
    DT_df["dyn_margin_amt_pos"] = DT_df["dyn_margin_amt_pos"].astype(float)
    DT_df["dyn_margin_amt_pos_PF"] = DT_df[
        "dyn_margin_amt_pos_PF"].astype(float)
    DT_df["ctl_grp_ind"] = DT_df["ctl_grp_ind"].astype(int)
    DT_df["mailer_version_id"] = DT_df["mailer_version_id"].astype(int)
    DT_df["tcm_redeem_md"] = pd.to_numeric(DT_df["tcm_redeem_md"])
    for attr in attributes:
        DT_df[attr] = DT_df[attr].astype(int)

    fields = attributes + ["fs_scan_amt_pre", "fs_scan_amt_pos", "fs_scan_amt_pos_PF", "dyn_margin_amt_pre", "dyn_margin_amt_pos", "dyn_margin_amt_pos_PF",
                           "ctl_grp_ind", "mailer_version_id", "tcm_redeem_md", "xtra_card_nbr"]
    DT_df = DT_df[fields]
    return DT_df 
Example 9
Project: atropos   Author: jdidion   File: show_job_info.py    (license) View Source Project 6 votes vote down vote up
def _get_table(self, column, is_size=True):
        cols = list(range(5))
        cols.append(self.header.index(column))
        header = [self.header[c] for c in cols]
        rows = [
            [row[c] for c in cols]
            for row in self.rows
        ]
        if is_size:
            for row in rows:
                row[5] = parse_size(row[5])
        table = pd.DataFrame.from_records(rows, columns=header)
        table = table.rename(columns={ 
            'prog' : 'Program',
            'prog2' : 'Program2',
            'threads' : 'Threads',
            'dataset' : 'Dataset',
            'qcut' : 'Quality',
        })
        table['Threads'] = pd.to_numeric(table['Threads'])
        table['Dataset'] = pd.Categorical(table['Dataset'])
        table['Program'] = pd.Categorical(table['Program'])
        table['Program2'] = pd.Categorical(table['Program2'])
        return table 
Example 10
Project: gullikson-scripts   Author: kgullikson88   File: Mamajek_Table.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, filename=TABLE_FILENAME):
        MS = SpectralTypeRelations.MainSequence()

        # Read in the table.
        colspecs=[[0,7], [7,14], [14,21], [21,28], [28,34], [34,40], [40,47], [47,55],
                  [55,63], [63,70], [70,78], [78,86], [86,94], [94,103], [103,110],
                  [110,116], [116,122], [122,130], [130,137], [137,144], [144,151],
                  [151,158]]
        mam_df = pd.read_fwf(filename, header=20, colspecs=colspecs, na_values=['...'])[:92]

        # Strip the * from the logAge column. Probably shouldn't but...
        mam_df['logAge'] = mam_df['logAge'].map(lambda s: s.strip('*') if isinstance(s, basestring) else s)

        # Convert everything to floats
        for col in mam_df.columns:
            mam_df[col] = pd.to_numeric(mam_df[col], errors='ignore')

        # Add the spectral type number for interpolation
        mam_df['SpTNum'] = mam_df['SpT'].map(MS.SpT_To_Number)
        
        self.mam_df = mam_df 
Example 11
Project: Comparative-Annotation-Toolkit   Author: ComparativeGenomicsToolkit   File: consensus.py    (license) View Source Project 6 votes vote down vote up
def load_metrics_from_db(db_path, tx_mode, aln_mode):
    """
    Loads the alignment metrics for the mRNA/CDS alignments of transMap/AugustusTM/TMR
    """
    session = tools.sqlInterface.start_session(db_path)
    metrics_table = tools.sqlInterface.tables[aln_mode][tx_mode]['metrics']
    metrics_df = tools.sqlInterface.load_metrics(metrics_table, session)
    # unstack flattens the long-form data structure
    metrics_df = metrics_df.set_index(['AlignmentId', 'classifier']).unstack('classifier')
    metrics_df.columns = [col[1] for col in metrics_df.columns]
    metrics_df = metrics_df.reset_index()
    cols = ['AlnCoverage', 'AlnGoodness', 'AlnIdentity', 'PercentUnknownBases']
    metrics_df[cols] = metrics_df[cols].apply(pd.to_numeric)
    metrics_df['OriginalIntrons'] = metrics_df['OriginalIntrons'].fillna('')
    metrics_df['OriginalIntrons'] = [list(map(int, x)) if len(x[0]) > 0 else [] for x in
                                     metrics_df['OriginalIntrons'].str.split(',').tolist()]
    metrics_df['OriginalIntronsPercent'] = metrics_df['OriginalIntrons'].apply(calculate_vector_support, resolve_nan=1)
    session.close()
    return metrics_df 
Example 12
Project: MAP-IT   Author: alexmarder   File: routing_table.py    (license) View Source Project 6 votes vote down vote up
def create_routing_table(bgp=None, ixp_prefixes=None, ixp_asns=None, bgp_compression='infer'):
    log.info('Creating IP2AS tool.')
    if bgp_compression == 'infer' and bgp.startswith('http'):
        bgp_compression = infer_compression(bgp, 'infer')
    if not isinstance(ixp_prefixes, pd.DataFrame):
        ixp_prefixes = set(pd.read_csv(ixp_prefixes, comment='#', index_col=0).index.unique()) if ixp_prefixes is not None else set()
    if not isinstance(ixp_asns, pd.DataFrame):
        ixp_asns = set(pd.read_csv(ixp_asns, comment='#', index_col=0).index.unique()) if ixp_asns is not None else set()
    if not isinstance(bgp, pd.DataFrame):
        bgp_original = pd.read_table(bgp, comment='#', names=['Address', 'Prefixlen', 'ASN'], compression=bgp_compression)
        bgp = bgp_original[~bgp_original.ASN.str.contains(',|_')].copy()
        bgp['ASN'] = pd.to_numeric(bgp.ASN)
    rt = RoutingTable()
    for address, prefixlen, asn in bgp[~bgp.ASN.isin(ixp_asns)].itertuples(index=False):
        rt.add_prefix(asn.item(), address, prefixlen)
    for address, prefixlen, asn in bgp[bgp.ASN.isin(ixp_asns)].itertuples(index=False):
        rt.add_ixp(address, prefixlen)
    for prefix in ixp_prefixes:
        rt.add_ixp(prefix)
    rt.add_private()
    rt.add_multicast()
    rt.add_default()
    return rt 
Example 13
Project: cmapPy   Author: cmap   File: parse_gct.py    (license) View Source Project 6 votes vote down vote up
def assemble_row_metadata(full_df, num_col_metadata, num_data_rows, num_row_metadata):
    # Extract values
    row_metadata_row_inds = range(num_col_metadata + 1, num_col_metadata + num_data_rows + 1)
    row_metadata_col_inds = range(1, num_row_metadata + 1)
    row_metadata = full_df.iloc[row_metadata_row_inds, row_metadata_col_inds]

    # Create index from the first column of full_df (after the filler block)
    row_metadata.index = full_df.iloc[row_metadata_row_inds, 0]

    # Create columns from the top row of full_df (before cids start)
    row_metadata.columns = full_df.iloc[0, row_metadata_col_inds]

    # Rename the index name and columns name
    row_metadata.index.name = row_index_name
    row_metadata.columns.name = row_header_name

    # Convert metadata to numeric if possible
    row_metadata = row_metadata.apply(lambda x: pd.to_numeric(x, errors="ignore"))

    return row_metadata 
Example 14
Project: cmapPy   Author: cmap   File: parse_gct.py    (license) View Source Project 6 votes vote down vote up
def assemble_col_metadata(full_df, num_col_metadata, num_row_metadata, num_data_cols):

    # Extract values
    col_metadata_row_inds = range(1, num_col_metadata + 1)
    col_metadata_col_inds = range(num_row_metadata + 1, num_row_metadata + num_data_cols + 1)
    col_metadata = full_df.iloc[col_metadata_row_inds, col_metadata_col_inds]

    # Transpose so that samples are the rows and headers are the columns
    col_metadata = col_metadata.T

    # Create index from the top row of full_df (after the filler block)
    col_metadata.index = full_df.iloc[0, col_metadata_col_inds]

    # Create columns from the first column of full_df (before rids start)
    col_metadata.columns = full_df.iloc[col_metadata_row_inds, 0]

    # Rename the index name and columns name
    col_metadata.index.name = column_index_name
    col_metadata.columns.name = column_header_name

    # Convert metadata to numeric if possible
    col_metadata = col_metadata.apply(lambda x: pd.to_numeric(x, errors="ignore"))

    return col_metadata 
Example 15
Project: pyiem   Author: rheineke   File: session.py    (license) View Source Project 6 votes vote down vote up
def _orderbook_tag_frame(text):
    # This function can be removed if this pandas feature request is implemented
    # https://github.com/pandas-dev/pandas/issues/14608
    table_str = _table_text(text)
    root = etree.fromstring(table_str)
    table_body = root.find('tbody')
    index = []
    data = defaultdict(list)
    # Iterator of tr objects
    qty_path = "td[@class='change-cell quantity']"
    tr_iter = table_body.iter(tag='tr')
    for tr in tr_iter:
        index.append(tr.find(path='td').text.strip())
        # Quantity Held
        pos = pd.to_numeric(tr.find(path=qty_path).attrib['value'])
        data[iem.QUANTITY_HELD].append(pos)
        # Your Bids
        data[iem.YOUR_BIDS].append(_num_open_orders(tr, 'yourBidsCell'))
        # Your Asks
        data[iem.YOUR_ASKS].append(_num_open_orders(tr, 'yourAsksCell'))

    return pd.DataFrame(data=data, index=index) 
Example 16
Project: memex_ad_features   Author: giantoak   File: run_daily.py    (license) View Source Project 6 votes vote down vote up
def apply_ht_scores(dataframe):
    # Load the ht score dataframe
    ht_scores = pandas.read_csv('{0}ht_scores.csv'.format(config['result_data']), index_col=0)
    dataframe['phone'] = dataframe['phone'].map(lambda x: re.sub('[^0-9]', '', str(x)))
    # Make the column a numeric column for merging
    dataframe['phone'] = pandas.to_numeric(dataframe['phone'])
    final = dataframe.merge(ht_scores, how='left', left_on='phone', right_index=True)

    # Drop the content column and drop the index column
    final.drop('content', axis=1, inplace=True)

    if os.path.isfile('{0}ad_chars_final.csv'.format(config['result_data'])):
        lock.acquire()
        print 'lock has been set for file {0}'.format(file)
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), mode='a', header=False, encoding='utf-8')
        lock.release()
        print 'lock has been released for file {0}'.format(file)
    else:
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), header=True, encoding='utf-8') 
Example 17
Project: memex_ad_features   Author: giantoak   File: create_location_files.py    (license) View Source Project 6 votes vote down vote up
def apply_ht_scores(dataframe):
    # Load the ht score dataframe
    ht_scores = pandas.read_csv('{0}ht_scores.csv'.format(config['result_data']), index_col=0)
    dataframe['phone'] = dataframe['phone'].map(lambda x: re.sub('[^0-9]', '', str(x)))
    # Make the column a numeric column for merging
    #dataframe['phone'] = pandas.to_numeric(dataframe['phone'])
    final = dataframe.merge(ht_scores, how='left', left_on='phone', right_index=True)

    # Drop the content column and drop the index column
    final.drop('content', axis=1, inplace=True)

    if os.path.isfile('{0}ad_chars_final.csv'.format(config['result_data'])):
        lock.acquire()
        print 'lock has been set for file {0}'.format(file)
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), mode='a', header=False, encoding='utf-8', index=False)
        lock.release()
    else:
        final.to_csv('{0}ad_chars_final.csv'.format(config['result_data']), header=True, encoding='utf-8', index=False) 
Example 18
Project: PIEFACE   Author: jcumby   File: calcellipsoid.py    (license) View Source Project 6 votes vote down vote up
def makeDataFrame(phases):
    """ Return Pandas DataFrame object, with CIF files as index and ellipsoid parameters as columns (hierarchical by centre atom)"""
    
    import pandas as pd
    from pieface.readcoords import Crystal
    
    if isinstance(phases, dict):
        if isinstance( phases[phases.keys()[0]], Crystal):      # We are reading a dict of Crystals: convert to nested dict first
            alldata = makenesteddict(phases)
        elif isinstance( phases[phases.keys()[0]], dict ):      # Looking at a dict of dicts: assume correct for pandas...
            alldata = phases
            
        d = dict([ (i, pd.DataFrame(alldata[i]).set_index('files')) for i in alldata.keys() ])        # Make dict of DataFrames
        
        frame = pd.concat(d, axis=1)
        
        if len(frame.index) == 1:   # We're looking at a single cif file - unstack DataFrame with atoms as index
            return frame.ix[frame.index[0]].unstack().apply(pd.to_numeric, errors='ignore')        # Need to convert back to float/int when unstacking
        else:
            return frame
    else:
        raise TypeError("Unknown data format for conversion to DataFrame (expected dict)") 
Example 19
Project: chemcoord   Author: mcocdawc   File: _cartesian_class_core.py    (license) View Source Project 6 votes vote down vote up
def _return_appropiate_type(self, selected):
        if isinstance(selected, pd.Series):
            frame = pd.DataFrame(selected).T
            if self._required_cols <= set(frame.columns):
                selected = frame.apply(pd.to_numeric, errors='ignore')
            else:
                return selected

        if (isinstance(selected, pd.DataFrame)
                and self._required_cols <= set(selected.columns)):
            molecule = self.__class__(selected)
            molecule.metadata = self.metadata.copy()
            molecule._metadata = copy.deepcopy(self._metadata)
            return molecule
        else:
            return selected 
Example 20
Project: pybroom   Author: tritemio   File: pybroom.py    (license) View Source Project 6 votes vote down vote up
def _augment_lmfit_modelresult(result):
    """Tidy data values and fitted model from `lmfit.model.ModelResult`.
    """
    columns = ['x', 'data', 'best_fit', 'residual']
    d = pd.DataFrame(index=range(result.ndata), columns=columns)
    for col in columns[1:]:
        d.loc[:, col] = getattr(result, col)

    independent_vars = result.model.independent_vars
    if len(independent_vars) == 1:
        independent_var = independent_vars[0]
    else:
        msg = ('Only 1 independent variable is currently supported.\n'
               'Found independent variables: %s' % str(independent_vars))
        raise NotImplementedError(msg)

    x_array = result.userkws[independent_var]
    d.loc[:, 'x'] = x_array

    if len(result.components) > 1:
        comp_names = [c.name for c in result.components]
        for cname, comp in zip(comp_names, result.components):
            d.loc[:, cname] = comp.eval(x=d.x, **result.values)
    return d.apply(pd.to_numeric, errors='ignore') 
Example 21
Project: equipy   Author: kallinikator   File: Stock.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, symbol, *args):
        super().__init__()

        self.data = pd.read_csv(open(r"Stock_Data/{}.csv".format(symbol)))
        self.data = self.data.apply(pd.to_numeric, errors="ignore")
        self.data.index = self.data["Quarter end"]
        self.name = symbol

        if self.data["Price"].dtype in (int, float) and self.data["Cumulative dividends per share"].dtype in (int, float):
            self.data["Value"] = self.data["Price"] + self.data["Cumulative dividends per share"]
            # Calculation of the estimated return
            self.data["Estimated Return"] = self.data["Value"].pct_change()
            # Calculation of the standard deviation
            self.data["Standard Deviation"] = self.data["Value"].std()
        else:
            self.complete_pricelist = False 
Example 22
Project: uscensus   Author: nkrishnaswami   File: model.py    (license) View Source Project 6 votes vote down vote up
def __call__(self, fields, geo_for, geo_in=None, cache=NopCache()):
        """Special method to make API object invocable.

        Arguments:
          * fields: list of variables to return.
          * geo_* fields must be given as dictionaries, eg:
            `{'county': '*'}`
          * cache: cache in which to store results. Not cached by default.
        """
        params = {
            'get': ','.join(fields),
            'key': self.key,
            'for': self._geo2str(geo_for),
        }
        if geo_in:
            params['in'] = self._geo2str(geo_in)

        j = fetchjson(self.endpoint, cache, self.session, params=params)
        ret = pd.DataFrame(data=j[1:], columns=j[0])
        for field in fields:
            if self.variables[field].get('predicateType') == 'int':
                ret[field] = pd.to_numeric(ret[field])
        return ret 
Example 23
Project: Test-stock-prediction-algorithms   Author: timestocome   File: LoadAndMatchDates.py    (license) View Source Project 6 votes vote down vote up
def read_data(file_name):

    stock = pd.read_csv(file_name, parse_dates=True, index_col=0)     
    n_samples = len(stock)
    
    # ditch samples with NAN values
    stock = stock.dropna(axis=0)

    # flip order from newest to oldest to oldest to newest
    #stock = stock.iloc[::-1]

    # trim data
    stock = stock[['Open']]

    # convert object to floats
    stock['Open'] = pd.to_numeric(stock['Open'], errors='coerce')

    # all stock is needed to walk back dates for testing hold out data
    return stock


#############################################################################################
# load and combine stock indexes, matching the dates 
Example 24
Project: erna   Author: fact-project   File: qsub.py    (license) View Source Project 6 votes vote down vote up
def get_qstat_as_df():
    """Get the current users output of qstat as a DataFrame.
    """
    user = os.environ.get("USER")
    try:
        ret = subprocess.Popen(
            ["qstat", "-u", str(user)],
            stdout=subprocess.PIPE,
        )
        df = pd.read_csv(ret.stdout, delimiter="\s+")
        # drop the first line since it is just one long line
        df = df.drop(df.index[0]).copy()
        # convert objects to numeric otherwise numbers are strings
        df["JOBID"] = pd.to_numeric(df["job-ID"], errors='coerce')
        # df.set_index("JOBID")
        df = df.drop('job-ID', 1)

    except ValueError:
        logger.exception("No jobs in queues for user {}".format(user))
        df = pd.DataFrame()
    return df 
Example 25
Project: Informed-Finance-Canary   Author: Darthone   File: stockData.py    (license) View Source Project 6 votes vote down vote up
def get_data_from_google(ticker_sym, start, end):
    """ Returns a data frame of data for a given stock between two dates """
    url = "https://www.google.com/finance/historical?q=%s&startdate=%s&enddate=%s&output=csv" % (ticker_sym, start, end)
    s = requests.get(url).content
    df = pd.read_csv(io.StringIO(s.decode('utf-8')))
    df['Date'] = pd.to_datetime(df['Date'])
    df['epoch'] = (df['Date'] - datetime(1970,1,1)).dt.total_seconds() * 1000
    df.set_index('Date')
    df['Adj_Close'] = df['Close'] # google's api doens't provide so just assume it's the same
    cols = ['High', 'Low', 'Volume', 'Open', 'Close', 'Adj_Close']
    for c in cols: # cast columns to numeric
        df[c] = pd.to_numeric(df[c])
    return df.iloc[::-1] # reverse the dataframe so index 0 is the earliest date

#@memoize
#def get_data_for_sym(ticker_sym, start, end):
#    return list(reversed(get_data_for_sym_from_yahoo(ticker_sym, start, end)))
#	#res = StockFeature.select().where(Relationship.from_user == self)) 
Example 26
Project: pdVCF   Author: superDross   File: vcf2dataframe.py    (license) View Source Project 6 votes vote down vote up
def calc_AB(vcf):
    ''' Calculate allele balance for all samples in a given 
        pdVCF. Also converts DP & GQ to numeric type.
    
    Args:
        vcf: pdVCF with genotype information extracted
        
    Notes:
        ONLY WORKS FOR BIALLELIC VARIANTS
    '''
    sam = vcf.columns.levels[0][0]
    vcf[sam,'DP'] = pd.to_numeric(vcf[sam,'DP'].str.replace('.', '0')) # bcftools places '.' in empty fields
    vcf[sam,'GQ'] = pd.to_numeric(vcf[sam,'GQ'].str.replace('.', '0'))
    AD = vcf.xs('AD', level=1, axis=1).unstack().str.split(",", n=2)
    DP = vcf.xs('DP', level=1, axis=1).unstack()
    AB = round(pd.to_numeric(AD.str[1]) / pd.to_numeric(DP), 2)
    vcf[sam, 'AB'] = AB.tolist()
    return vcf 
Example 27
Project: pastas   Author: pastas   File: project.py    (license) View Source Project 6 votes vote down vote up
def update_distances(self):
        """
        Calculate the distances between the observed series and the stresses.

        Returns
        -------
        distances: pandas.DataFrame
            pandas dataframe with the distances between the oseries (index)
            and the stresses (columns).

        """
        # Make sure these are values, even when actually objects.
        xo = pd.to_numeric(self.oseries.x)
        xt = pd.to_numeric(self.stresses.x)
        yo = pd.to_numeric(self.oseries.y)
        yt = pd.to_numeric(self.stresses.y)

        xh, xi = np.meshgrid(xt, xo)
        yh, yi = np.meshgrid(yt, yo)

        self.distances = pd.DataFrame(np.sqrt((xh - xi) ** 2 + (yh - yi) ** 2),
                                      index=self.oseries.index,
                                      columns=self.stresses.index) 
Example 28
Project: soundDB   Author: gjoseph92   File: parsers.py    (license) View Source Project 6 votes vote down vote up
def parse(self, entry):      
        data = pd.read_csv(str(entry),
                           engine= "c",
                           sep= "\t",
                           parse_dates= False,
                           index_col= [0, 1])

        data.index.names = ["date", "srcid"]

        # Check for AMT bug that adds row of ('nvsplDate', 'Total_All') with all 0s, drop if exists
        if data.index[-1][0] == 'nvsplDate':
            data = data.iloc[:-1, :]

        ## Pandas cannot seem to handle a MultiIndex with dates;
        ## slicing syntax becomes even crazier, and often doesn't even work.
        ## So date conversion is disabled for now.

        # # Convert dates
        # datetimes = data.index.get_level_values('date').to_datetime()
        # data.index.set_levels(datetimes, level= 'date', inplace= True)

        # Ensure MultiIndex sortedness
        data.sortlevel(inplace= True)

        return data.apply(pd.to_numeric, raw= True, errors= "coerce") 
Example 29
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 5 votes vote down vote up
def to_numeric(self,columns):
        '''
        Args
            columns (string or list):
                column names needed to be converted
        Returns
            -
        '''
        if isinstance(columns,str):
            self.data_df[columns] = pd.to_numeric(self.data_df[columns],errors='coerce')
        elif isinstance(columns,list):
            for column in columns:
                self.data_df[column] = pd.to_numeric(self.data_df[column],errors='coerce')

    # rename certain columns 
Example 30
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 5 votes vote down vote up
def to_numeric(self,columns):
        '''
        Args
            columns (string or list):
                column names needed to be converted
        Returns
            -
        '''
        if isinstance(columns,str):
            self.data_df[columns] = pd.to_numeric(self.data_df[columns],errors='coerce')
        elif isinstance(columns,list):
            for column in columns:
                self.data_df[column] = pd.to_numeric(self.data_df[column],errors='coerce')

    # rename certain columns 
Example 31
Project: ssbio   Author: SBRG   File: itasserprop.py    (MIT License) View Source Project 5 votes vote down vote up
def df_coach_bsites(self):
        df_cols = ['site_num', 'c_score', 'cluster_size', 'algorithm',
                   'pdb_template_id', 'pdb_template_chain', 'pdb_ligand',
                   'binding_location_coords', 'c_score_method', 'binding_residues',
                   'ligand_cluster_counts']

        bsites_inf_df = pd.DataFrame.from_records(self.coach_bsites, columns=df_cols).drop_duplicates().reset_index(drop=True)

        if bsites_inf_df.empty:
            log.warning('Empty dataframe')
            return bsites_inf_df
        else:
            bsites_inf_df['c_score'] = pd.to_numeric(bsites_inf_df.c_score, errors='coerce')
            bsites_inf_df['cluster_size'] = pd.to_numeric(bsites_inf_df.cluster_size, errors='coerce')
            return ssbio.utils.clean_df(bsites_inf_df) 
Example 32
Project: ssbio   Author: SBRG   File: itasserprop.py    (MIT License) View Source Project 5 votes vote down vote up
def df_coach_go(self):
        cols = ['go_id', 'go_term', 'c_score']

        go_all_df = pd.DataFrame()

        for go_list in [self.coach_go_mf, self.coach_go_cc, self.coach_go_bp]:
            go_df = pd.DataFrame.from_records(go_list, columns=cols).drop_duplicates().reset_index(drop=True)
            go_df['c_score'] = pd.to_numeric(go_df.c_score, errors='coerce')

            if go_all_df.empty:
                go_all_df = go_df
            else:
                go_all_df.append(go_df)

        return go_all_df 
Example 33
Project: ssbio   Author: SBRG   File: itasserprop.py    (MIT License) View Source Project 5 votes vote down vote up
def parse_coach_ec_df(infile):
    """Parse the EC.dat output file of COACH and return a dataframe of results

    EC.dat contains the predicted EC number and active residues.
        The columns are: PDB_ID, TM-score, RMSD, Sequence identity,
        Coverage, Confidence score, EC number, and Active site residues

    Args:
        infile (str): Path to EC.dat

    Returns:
        DataFrame: Pandas DataFrame summarizing EC number predictions

    """

    ec_df = pd.read_table(infile, delim_whitespace=True,
                          names=['pdb_template', 'tm_score', 'rmsd', 'seq_ident', 'seq_coverage',
                                 'c_score', 'ec_number', 'binding_residues'])

    ec_df['pdb_template_id'] = ec_df['pdb_template'].apply(lambda x: x[:4])
    ec_df['pdb_template_chain'] = ec_df['pdb_template'].apply(lambda x: x[4])

    ec_df = ec_df[['pdb_template_id', 'pdb_template_chain', 'tm_score', 'rmsd',
                   'seq_ident', 'seq_coverage', 'c_score', 'ec_number', 'binding_residues']]
    ec_df['c_score'] = pd.to_numeric(ec_df.c_score, errors='coerce')

    return ec_df 
Example 34
Project: pydov   Author: DOV-Vlaanderen   File: dovseries.py    (MIT License) View Source Project 5 votes vote down vote up
def _get_peilmetingen_df(self):
        """"""
        doc_df = pd.DataFrame(list(self.get_peilmetingen()),
                              columns=["grondwaterlocatie",
                                       "filternummer",
                                       "datum",
                                       "diepte",
                                       "methode",
                                       "betrouwbaarheid"])
        doc_df["datum"] = pd.to_datetime(doc_df["datum"])
        doc_df["diepte"] = pd.to_numeric(doc_df["diepte"])
        doc_df = doc_df.set_index("datum")
        return doc_df 
Example 35
Project: pydov   Author: DOV-Vlaanderen   File: dovseries.py    (MIT License) View Source Project 5 votes vote down vote up
def _get_observaties_df(self):
        """"""
        doc_df = pd.DataFrame(list(self.get_observaties()),
                              columns=["grondwaterlocatie",
                                       "filternummer",
                                       "monsternummer",
                                       "datum",
                                       "parameter",
                                       "waarde",
                                       "eenheid",
                                       "betrouwbaarheid"])
        doc_df["datum"] = pd.to_datetime(doc_df["datum"])
        doc_df["waarde"] = pd.to_numeric(doc_df["waarde"])
        return doc_df 
Example 36
Project: pauvre   Author: conchoecia   File: functions.py    (license) View Source Project 5 votes vote down vote up
def filter_fastq_length_meanqual(df, min_len, max_len,
                                 min_mqual, max_mqual):
    querystring = "length >= {0} and meanQual >= {1}".format(min_len, min_mqual)
    if max_len != None:
        querystring += " and length <= {}".format(max_len)
    if max_mqual != None:
        querystring += " and meanQual <= {}".format(max_mqual)
    print("Keeping reads that satisfy: {}".format(querystring), file=stderr)
    filtdf = df.query(querystring)
    #filtdf["length"] = pd.to_numeric(filtdf["length"], errors='coerce')
    #filtdf["meanQual"] = pd.to_numeric(filtdf["meanQual"], errors='coerce')
    return filtdf 
Example 37
Project: q2-diversity   Author: qiime2   File: _visualizer.py    (license) View Source Project 5 votes vote down vote up
def bioenv(output_dir: str, distance_matrix: skbio.DistanceMatrix,
           metadata: qiime2.Metadata) -> None:
    # convert metadata to numeric values where applicable, drop the non-numeric
    # values, and then drop samples that contain NaNs
    df = metadata.to_dataframe()
    df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

    # filter categorical columns
    pre_filtered_cols = set(df.columns)
    df = df.select_dtypes([numpy.number]).dropna()
    filtered_categorical_cols = pre_filtered_cols - set(df.columns)

    # filter 0 variance numerical columns
    pre_filtered_cols = set(df.columns)
    df = df.loc[:, df.var() != 0]
    filtered_zero_variance_cols = pre_filtered_cols - set(df.columns)

    # filter the distance matrix to exclude samples that were dropped from
    # the metadata, and keep track of how many samples survived the filtering
    # so that information can be presented to the user.
    initial_dm_length = distance_matrix.shape[0]
    distance_matrix = distance_matrix.filter(df.index, strict=False)
    filtered_dm_length = distance_matrix.shape[0]

    result = skbio.stats.distance.bioenv(distance_matrix, df)
    result = q2templates.df_to_html(result)

    index = os.path.join(TEMPLATES, 'bioenv_assets', 'index.html')
    q2templates.render(index, output_dir, context={
        'initial_dm_length': initial_dm_length,
        'filtered_dm_length': filtered_dm_length,
        'filtered_categorical_cols': ', '.join(filtered_categorical_cols),
        'filtered_zero_variance_cols': ', '.join(filtered_zero_variance_cols),
        'result': result}) 
Example 38
Project: quantrocket-client   Author: quantrocket-llc   File: zipline.py    (license) View Source Project 5 votes vote down vote up
def from_csv(cls, filepath_or_buffer):

        # Import pandas lazily since it can take a moment to import
        try:
            import pandas as pd
        except ImportError:
            raise ImportError("pandas must be installed to use ZiplineBacktestResult")

        zipline_result = cls()

        results = pd.read_csv(
            filepath_or_buffer,
            parse_dates=["date"],
            index_col=["dataframe", "index", "date", "column"])["value"]

        # Extract returns
        returns = results.loc["returns"].unstack()
        returns.index = returns.index.droplevel(0).tz_localize("UTC")
        zipline_result.returns = returns["returns"].astype(float)

        # Extract positions
        positions = results.loc["positions"].unstack()
        positions.index = positions.index.droplevel(0).tz_localize("UTC")
        zipline_result.positions = positions.astype(float)

        # Extract transactions
        transactions = results.loc["transactions"].unstack()
        transactions.index = transactions.index.droplevel(0).tz_localize("UTC")
        zipline_result.transactions = transactions.apply(pd.to_numeric, errors='ignore')

        # Extract benchmark returns
        benchmark_returns = results.loc["benchmark"].unstack()
        benchmark_returns.index = benchmark_returns.index.droplevel(0).tz_localize("UTC")
        zipline_result.benchmark_returns = benchmark_returns["benchmark"].astype(float)

        # Extract performance dataframe
        perf = results.loc["perf"].unstack()
        perf.index = perf.index.droplevel(0).tz_localize("UTC")
        zipline_result.perf = perf.apply(pd.to_numeric, errors='ignore')

        return zipline_result 
Example 39
Project: Python-Scripts-Repo-on-Data-Science   Author: qalhata   File: DataClean_GS_Analysis5.py    (license) View Source Project 5 votes vote down vote up
def check_null_or_valid(row_data):
    """Function that takes a row of data,
    drops all missing values,
    and checks if all remaining values are greater than or equal to 0
    """
    no_na = row_data.dropna()[1:-1]
    numeric = pd.to_numeric(no_na)
    ge0 = numeric >= 0
    return ge0

# Check whether the first column is 'Life expectancy' 
Example 40
Project: desert-mirage   Author: valentour   File: desert_mirage_lib.py    (license) View Source Project 5 votes vote down vote up
def eliminate_invalids(df, cols):
    """Eliminate invalid data in ``cols`` of ``df``."""
    numdf = df.drop(cols, axis=1).join(df[cols].apply(pd.to_numeric,
                                                      errors='coerce'))
    numdf = numdf[~numdf[cols].isnull().apply(np.any, axis=1)]
    return numdf 
Example 41
Project: desert-mirage   Author: valentour   File: desert_mirage_lib.py    (license) View Source Project 5 votes vote down vote up
def partial_convert_only_numerics(df):
    """Convert ``df`` numeric cols and try to coerce any errors encountered."""
    col_dict = df_cols_by_type(df)
    partial_convert = partial(pd.to_numeric, errors='coerce')
    df[col_dict['numeric']].apply(partial_convert)
    return df

# Useful one-liners.
# df.select_dtypes(include=['bool'])
# list(df.select_dtypes(include=['bool']).columns) 
Example 42
Project: PyOnSSET   Author: KTH-dESA   File: onsset.py    (license) View Source Project 5 votes vote down vote up
def condition_df(self):
        """
        Do any initial data conditioning that may be required.
        """

        logging.info('Ensure that columns that are supposed to be numeric are numeric')
        self.df[SET_GHI] = pd.to_numeric(self.df[SET_GHI], errors='coerce')
        self.df[SET_WINDVEL] = pd.to_numeric(self.df[SET_WINDVEL], errors='coerce')
        self.df[SET_NIGHT_LIGHTS] = pd.to_numeric(self.df[SET_NIGHT_LIGHTS], errors='coerce')
        self.df[SET_ELEVATION] = pd.to_numeric(self.df[SET_ELEVATION], errors='coerce')
        self.df[SET_SLOPE] = pd.to_numeric(self.df[SET_SLOPE], errors='coerce')
        self.df[SET_LAND_COVER] = pd.to_numeric(self.df[SET_LAND_COVER], errors='coerce')
        self.df[SET_GRID_DIST_CURRENT] = pd.to_numeric(self.df[SET_GRID_DIST_CURRENT], errors='coerce')
        self.df[SET_GRID_DIST_PLANNED] = pd.to_numeric(self.df[SET_GRID_DIST_PLANNED], errors='coerce')
        self.df[SET_SUBSTATION_DIST] = pd.to_numeric(self.df[SET_SUBSTATION_DIST], errors='coerce')
        self.df[SET_ROAD_DIST] = pd.to_numeric(self.df[SET_ROAD_DIST], errors='coerce')
        self.df[SET_HYDRO_DIST] = pd.to_numeric(self.df[SET_HYDRO_DIST], errors='coerce')
        self.df[SET_HYDRO] = pd.to_numeric(self.df[SET_HYDRO], errors='coerce')
        self.df[SET_SOLAR_RESTRICTION] = pd.to_numeric(self.df[SET_SOLAR_RESTRICTION], errors='coerce')

        logging.info('Replace null values with zero')
        self.df.fillna(0, inplace=True)

        logging.info('Sort by country, Y and X')
        self.df.sort_values(by=[SET_COUNTRY, SET_Y, SET_X], inplace=True)

        logging.info('Add columns with location in degrees')
        project = Proj('+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs')

        def get_x(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return x

        def get_y(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return y

        self.df[SET_X_DEG] = self.df.apply(get_x, axis=1)
        self.df[SET_Y_DEG] = self.df.apply(get_y, axis=1) 
Example 43
Project: PyOnSSET   Author: KTH-dESA   File: onsset.py    (license) View Source Project 5 votes vote down vote up
def condition_df(self):
        """
        Do any initial data conditioning that may be required.
        """

        logging.info('Ensure that columns that are supposed to be numeric are numeric')
        self.df[SET_GHI] = pd.to_numeric(self.df[SET_GHI], errors='coerce')
        self.df[SET_WINDVEL] = pd.to_numeric(self.df[SET_WINDVEL], errors='coerce')
        self.df[SET_NIGHT_LIGHTS] = pd.to_numeric(self.df[SET_NIGHT_LIGHTS], errors='coerce')
        self.df[SET_ELEVATION] = pd.to_numeric(self.df[SET_ELEVATION], errors='coerce')
        self.df[SET_SLOPE] = pd.to_numeric(self.df[SET_SLOPE], errors='coerce')
        self.df[SET_LAND_COVER] = pd.to_numeric(self.df[SET_LAND_COVER], errors='coerce')
        self.df[SET_GRID_DIST_CURRENT] = pd.to_numeric(self.df[SET_GRID_DIST_CURRENT], errors='coerce')
        self.df[SET_GRID_DIST_PLANNED] = pd.to_numeric(self.df[SET_GRID_DIST_PLANNED], errors='coerce')
        self.df[SET_SUBSTATION_DIST] = pd.to_numeric(self.df[SET_SUBSTATION_DIST], errors='coerce')
        self.df[SET_ROAD_DIST] = pd.to_numeric(self.df[SET_ROAD_DIST], errors='coerce')
        self.df[SET_HYDRO_DIST] = pd.to_numeric(self.df[SET_HYDRO_DIST], errors='coerce')
        self.df[SET_HYDRO] = pd.to_numeric(self.df[SET_HYDRO], errors='coerce')
        self.df[SET_SOLAR_RESTRICTION] = pd.to_numeric(self.df[SET_SOLAR_RESTRICTION], errors='coerce')

        logging.info('Replace null values with zero')
        self.df.fillna(0, inplace=True)

        logging.info('Sort by country, Y and X')
        self.df.sort_values(by=[SET_COUNTRY, SET_Y, SET_X], inplace=True)

        logging.info('Add columns with location in degrees')
        project = Proj('+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs')

        def get_x(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return x

        def get_y(row):
            x, y = project(row[SET_X] * 1000, row[SET_Y] * 1000, inverse=True)
            return y

        self.df[SET_X_DEG] = self.df.apply(get_x, axis=1)
        self.df[SET_Y_DEG] = self.df.apply(get_y, axis=1) 
Example 44
Project: VC3D   Author: AlexanderWard1   File: addViscous_Bowcutt.py    (license) View Source Project 5 votes vote down vote up
def saveSlice_CSV(self, outputFilename=outputFilename, xSlice=[], ySlice=[], zSlice=[]):
        """ Take a slice and save it to csv """
        outputFilename += '_slice.csv'
        
#        # This defines how 'narrow' slice we want. Why am I writing this if ParaView will do it fark
#        tol = 1e-2
#        
#        # Pre allocate empty DF here?
#        slicedData = pd.DataFrame()
#        
#        if not xSlice:
#            # We have some slices along x to make
#            for point in xSlice:
#                # we want to slice at all of these points
#                > xSlice[point] - tol
#            self.flowData.transpose().loc[(self.flowData.transpose()["x"] > 0.599 & self.flowData.transpose()["x"] < 0.601 &  self.flowData.transpose()["z"] == 0), "cf"]
#        elif not ySlice:
#            # Slices along y to take
#        elif not zSlice:
#            # And slices aong z
        
        flowData = self.flowData.apply(pd.to_numeric, errors='ignore')
        
        slicedData_indices = (flowData["z"] > -0.01) & (flowData["z"] < 0.01)
        
        slicedData = flowData.loc[slicedData_indices]
        
        slicedData.to_csv(outputFilename, sep=',', index=0, index_label=0)
        
        print "Slices saved in", outputFilename 
Example 45
Project: VC3D   Author: AlexanderWard1   File: addViscous.py    (license) View Source Project 5 votes vote down vote up
def saveSlice_CSV(self, outputFilename=outputFilename, xSlice=[], ySlice=[], zSlice=[]):
        """ Take a slice and save it to csv """
        outputFilename += '_slice.csv'
        
#        # This defines how 'narrow' slice we want. Why am I writing this if ParaView will do it fark
#        tol = 1e-2
#        
#        # Pre allocate empty DF here?
#        slicedData = pd.DataFrame()
#        
#        if not xSlice:
#            # We have some slices along x to make
#            for point in xSlice:
#                # we want to slice at all of these points
#                > xSlice[point] - tol
#            self.flowData.transpose().loc[(self.flowData.transpose()["x"] > 0.599 & self.flowData.transpose()["x"] < 0.601 &  self.flowData.transpose()["z"] == 0), "cf"]
#        elif not ySlice:
#            # Slices along y to take
#        elif not zSlice:
#            # And slices aong z
        
        flowData = self.flowData.apply(pd.to_numeric, errors='ignore')
        
        slicedData_indices = (flowData["y"] > 0.598) & (flowData["y"] < 0.602) & (flowData["z"] == 0)
        
        slicedData = flowData.loc[slicedData_indices]
        
        slicedData.to_csv(outputFilename, sep=',', index=0, index_label=0)
        
        print "Slices saved in", outputFilename 
Example 46
Project: singlecell-dash   Author: czbiohub   File: common.py    (license) View Source Project 5 votes vote down vote up
def maybe_to_numeric(series):
    try:
        return pd.to_numeric(series)
    except ValueError:
        return series 
Example 47
Project: SSieve   Author: davidimprovz   File: core.py    (license) View Source Project 5 votes vote down vote up
def createPriceHistoryReport(self, stock):
        """
        Calls get10YrPriceHistory() to package a price history report into a PANDAS dataframe, then cleans and returns the data.

        This function will acquire a price history for the provided symbol, which must be a string and a valid stock symbol
        along with the symbol's exchange, e.g., ('MMM', 'NYSE'). The get10YrPriceHistory() function requires the exchange.
        
        After the data is loaded, the function adds a Symbol field to the price history for tracking in the database, reindexes 
        and renames some fields, properly formats the dates into datetime fields, and converts prices from strings to floats.

        Returns the report as a PANDAS dataframe if successful, otherwise a tuple (False, error message).

        Example Usage: createPriceHistoryReport(('MMM', 'NYSE'))
        """
        try:
            # get the raw data from morningstar    
            price_history = self.get10YrPriceHistory(stock)
            
            if isinstance(price_history, pd.DataFrame): # the price_history has to exist, or else return the err msg of the function called
                
                price_history['Symbol'] = stock[0]
                # reorganize header order
                price_history = price_history.reindex(columns=['Symbol','Date','Open','High','Low','Close','Volume'])
                # rename the Date column for easier processing through SQLite's Date functionality
                price_history.rename(columns={'Date':'Reference'}, inplace=True)
                # convert all dates to ISO formatted yyyy-mm-dd strings
                price_history['Reference'] = price_history['Reference'].apply(lambda x: time.strftime("%Y-%m-%d", time.strptime(x, "%m/%d/%Y")))
                
                # convert volumes to integers # unicode err on ??? value for some volumes goes to NaN

                price_history['Volume'] = pd.to_numeric(price_history['Volume'].str.replace(',',''), errors='coerce')
                # set index b/f db commit so no duplicate numeric index columns
                price_history.set_index(['Symbol'], inplace=True)
            
            return price_history

        except Exception as e:
            return (False, e)

    # get10YrPriceHistory
    # ******************* # 
Example 48
Project: actions-for-actions   Author: gsig   File: charades.py    (license) View Source Project 5 votes vote down vote up
def load_groundtruth(self):
        gt_labels = pd.read_csv(self.data_path)
        if self.subset is not None:
            mask = [True if x in self.subset else False for x in gt_labels['id'].values]
            gt_labels = gt_labels[mask]
            assert np.any(np.array(mask))
        gt_labels['length'] = pd.to_numeric(gt_labels['length'])
        gt_labels['actions'].fillna('', inplace=True)
        self.gt_labels = gt_labels 
Example 49
Project: georges   Author: chernals   File: tracking.py    (license) View Source Project 5 votes vote down vote up
def read_madx_tracking(file):
    """Read a MAD-X Tracking onetable=true file to a dataframe."""
    column_names = ['ID', 'TURN', 'X', 'PX', 'Y', 'PY', 'T', 'PT', 'S', 'E']
    data = pd.read_csv(file, skiprows=MADX_TRACKING_SKIP_ROWS, delim_whitespace=True, names=column_names)
    return data.apply(pd.to_numeric, errors="ignore").dropna() 
Example 50
Project: qiime2   Author: qiime2   File: metadata.py    (license) View Source Project 5 votes vote down vote up
def to_dataframe(self, cast_numeric=False):
        df = self._dataframe.copy()

        if cast_numeric:
            df = df.apply(lambda x: pd.to_numeric(x, errors='ignore'))

        return df