Python pandas.Dataframe() Examples

The following are code examples for showing how to use pandas.Dataframe(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: ILTIS   Author: grg2rsr   File: gioIO.py    GNU General Public License v2.0 6 votes vote down vote up
def write_gloDatamix(Meta,Data,outpath):
    """ creates a .gloDatamix from the metadata (pd.Dataframe) and data
    (np.array) in the format that they were generated by the read_gloDatamix()
    function """
    
    labels = '\t'.join(Meta.columns.tolist() + ['data'+ str(number) for number in range(Data.shape[1])]) # this generates a "real" glodatamix header
    
    fh = open(outpath,'w')
    fh.write(labels)
    fh.write('\n')
    
    for i in range(Data.shape[0]):
        metainfo = Meta.loc[i].tolist()
        data = Data[i,:].astype('S20').tolist()
        values = '\t'.join(metainfo + data)

        fh.write(values)
        fh.write('\n')
    fh.close() 
Example 2
Project: nistats   Author: nistats   File: glm_reporter.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def _dataframe_to_html(df, precision, **kwargs):
    """ Makes HTML table from provided dataframe.
    Removes HTML5 non-compliant attributes (ex: `border`).

    Parameters
    ----------
    df: pandas.Dataframe
        Dataframe to be converted into HTML table.

    precision: int
        The display precision for float values in the table.

    **kwargs: keyworded arguments
        Supplies keyworded arguments for func: pandas.Dataframe.to_html()

    Returns
    -------
    html_table: String
        Code for HTML table.
    """
    with pd.option_context('display.precision', precision):
        html_table = df.to_html(**kwargs)
    html_table = html_table.replace('border="1" ', '')
    return html_table 
Example 3
Project: code-for-the-world   Author: jennirinker   File: io.py    MIT License 6 votes vote down vote up
def read_selig(path):
    """Read a Selig-style airfoil file

    Parameters
    -----------
    path : str
        Path to the Selig-stle .dat file.

    Returns
    -------
    air_df : pd.Dataframe
        Pandas Dataframe containing x- and y-coordinates of airfoil data.
    """
    air_df = pd.read_csv(path, delim_whitespace=True,
                         header=0)
    air_df.columns = ['x', 'y']
    return air_df 
Example 4
Project: prophet   Author: facebook   File: forecaster.py    MIT License 6 votes vote down vote up
def add_group_component(self, components, name, group):
        """Adds a component with given name that contains all of the components
        in group.

        Parameters
        ----------
        components: Dataframe with components.
        name: Name of new group component.
        group: List of components that form the group.

        Returns
        -------
        Dataframe with components.
        """
        new_comp = components[components['component'].isin(set(group))].copy()
        group_cols = new_comp['col'].unique()
        if len(group_cols) > 0:
            new_comp = pd.DataFrame({'col': group_cols, 'component': name})
            components = components.append(new_comp)
        return components 
Example 5
Project: prophet   Author: facebook   File: forecaster.py    MIT License 6 votes vote down vote up
def predictive_samples(self, df):
        """Sample from the posterior predictive distribution.

        Parameters
        ----------
        df: Dataframe with dates for predictions (column ds), and capacity
            (column cap) if logistic growth.

        Returns
        -------
        Dictionary with keys "trend" and "yhat" containing
        posterior predictive samples for that component.
        """
        df = self.setup_dataframe(df.copy())
        sim_values = self.sample_posterior_predictive(df)
        return sim_values 
Example 6
Project: prophet   Author: facebook   File: forecaster.py    MIT License 6 votes vote down vote up
def predict_uncertainty(self, df):
        """Prediction intervals for yhat and trend.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with uncertainty intervals.
        """
        sim_values = self.sample_posterior_predictive(df)

        lower_p = 100 * (1.0 - self.interval_width) / 2
        upper_p = 100 * (1.0 + self.interval_width) / 2

        series = {}
        for key in ['yhat', 'trend']:
            series['{}_lower'.format(key)] = np.nanpercentile(
                sim_values[key], lower_p, axis=1)
            series['{}_upper'.format(key)] = np.nanpercentile(
                sim_values[key], upper_p, axis=1)

        return pd.DataFrame(series) 
Example 7
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 6 votes vote down vote up
def add_group_component(self, components, name, group):
        """Adds a component with given name that contains all of the components
        in group.

        Parameters
        ----------
        components: Dataframe with components.
        name: Name of new group component.
        group: List of components that form the group.

        Returns
        -------
        Dataframe with components.
        """
        new_comp = components[components['component'].isin(set(group))].copy()
        group_cols = new_comp['col'].unique()
        if len(group_cols) > 0:
            new_comp = pd.DataFrame({'col': group_cols, 'component': name})
            components = components.append(new_comp)
        return components 
Example 8
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 6 votes vote down vote up
def predictive_samples(self, df):
        """Sample from the posterior predictive distribution.

        Parameters
        ----------
        df: Dataframe with dates for predictions (column ds), and capacity
            (column cap) if logistic growth.

        Returns
        -------
        Dictionary with keys "trend" and "yhat" containing
        posterior predictive samples for that component.
        """
        df = self.setup_dataframe(df.copy())
        sim_values = self.sample_posterior_predictive(df)
        return sim_values 
Example 9
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 6 votes vote down vote up
def predict_uncertainty(self, df):
        """Prediction intervals for yhat and trend.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with uncertainty intervals.
        """
        sim_values = self.sample_posterior_predictive(df)

        lower_p = 100 * (1.0 - self.interval_width) / 2
        upper_p = 100 * (1.0 + self.interval_width) / 2

        series = {}
        for key in ['yhat', 'trend']:
            series['{}_lower'.format(key)] = np.nanpercentile(
                sim_values[key], lower_p, axis=1)
            series['{}_upper'.format(key)] = np.nanpercentile(
                sim_values[key], upper_p, axis=1)

        return pd.DataFrame(series) 
Example 10
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 6 votes vote down vote up
def add_group_component(self, components, name, group):
        """Adds a component with given name that contains all of the components
        in group.

        Parameters
        ----------
        components: Dataframe with components.
        name: Name of new group component.
        group: List of components that form the group.

        Returns
        -------
        Dataframe with components.
        """
        new_comp = components[components['component'].isin(set(group))].copy()
        group_cols = new_comp['col'].unique()
        if len(group_cols) > 0:
            new_comp = pd.DataFrame({'col': group_cols, 'component': name})
            components = components.append(new_comp)
        return components 
Example 11
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 6 votes vote down vote up
def predictive_samples(self, df):
        """Sample from the posterior predictive distribution.

        Parameters
        ----------
        df: Dataframe with dates for predictions (column ds), and capacity
            (column cap) if logistic growth.

        Returns
        -------
        Dictionary with keys "trend" and "yhat" containing
        posterior predictive samples for that component.
        """
        df = self.setup_dataframe(df.copy())
        sim_values = self.sample_posterior_predictive(df)
        return sim_values 
Example 12
Project: opt_trade   Author: ejpjapan   File: spx_data_update.py    MIT License 6 votes vote down vote up
def scrape_sp5_div_yield():
    """Scrape S&P 500 dividend yield from www.multpl.com
    :rtype: pd.Dataframe
    """
    url = 'http://www.multpl.com/s-p-500-dividend-yield/table?f=m'
    # Package the request, send the request and catch the response: r
    raw_html_tbl = pd.read_html(url)
    dy_df = raw_html_tbl[0]
    # Clear dataframe
    dy_df.columns = dy_df.iloc[0]
    dy_df = dy_df.drop([0])
    dy_df[dy_df.columns[0]] = pd.to_datetime(dy_df.loc[:, dy_df.columns[0]],
                                             format='%b %d, %Y')
    dy_df = dy_df.set_index(dy_df.columns[0])
    dy_df = dy_df[dy_df.columns[0]]
    spx_dividend_yld = pd.to_numeric(dy_df.str.replace('%', '').str.replace('estimate', '').str.strip())
    spx_dividend_yld = spx_dividend_yld.reindex(spx_dividend_yld.index[::-1])
    spx_dividend_yld = spx_dividend_yld.resample('MS').bfill()
    return spx_dividend_yld 
Example 13
Project: kryptos   Author: produvia   File: manager.py    MIT License 6 votes vote down vote up
def plot(self, results, pos, skip_indicators=False, **kw):
        """Calls for plotting of recored external data and registered indicators

        This method is called by a Strategy object once after algo execution has finished.
        The plotting each indicator is handled by the underlying Indicator objects.

        Arguments:
            results {pandas.Dataframe} -- Catalyst algo results of all recored data
            pos {int} -- 3 digit integer used to represent matplotlib subplot position (ex. 212)
        """
        for col in self.columns:
            ax = viz.plot_column(results, col, pos, label=col, y_label=self.name, **kw)

        if not skip_indicators:
            self.plot_dataset_indicators(results, pos)
        plt.legend() 
Example 14
Project: datamonster-api   Author: adaptivemgmt   File: datamonster.py    MIT License 6 votes vote down vote up
def _avro_to_df(self, avro_buffer, data_types):
        """Read an avro structure into a dataframe and minimially parse it

        returns: (schema, pandas.Dataframe)
        """

        def parse_row(row):
            return {
                col["name"]: pandas.to_datetime(row[col["name"]])
                if col["data_type"] == "date"
                else row[col["name"]]
                for col in data_types
            }

        reader = fastavro.reader(six.BytesIO(avro_buffer))
        metadata = reader.writer_schema.get("structure", ())

        if not metadata:
            raise DataMonsterError(
                "DataMonster does not currently support this request"
            )

        records = [parse_row(r) for r in reader]
        return metadata, pandas.DataFrame.from_records(records) 
Example 15
Project: QUANTAXIS   Author: QUANTAXIS   File: QADataStruct.py    MIT License 6 votes vote down vote up
def __init__(self, DataFrame):
        """Stock Transaction

        Arguments:
            DataFrame {pd.Dataframe} -- [input is one/multi day transaction]
        """

        self.type = 'stock_transaction'


        self.data = DataFrame
        if 'amount' not in DataFrame.columns:
            if 'vol' in DataFrame.columns:
                self.data['amount'] = self.data.vol * self.data.price * 100
            elif 'volume' in DataFrame.columns:
                self.data['amount'] = self.data.volume * self.data.price * 100
        if '_id' in DataFrame.columns:
            self.data = self.data.drop(["_id"], axis=1)
        self.mongo_coll = DATABASE.stock_transaction 
Example 16
Project: QUANTAXIS   Author: QUANTAXIS   File: QADataStruct.py    MIT License 6 votes vote down vote up
def __init__(self, DataFrame):
        """Index Transaction

        Arguments:
            DataFrame {pd.Dataframe} -- [input is one/multi day transaction]
        """

        self.type = 'index_transaction'

        self.data = DataFrame
        if 'amount' not in DataFrame.columns:
            if 'vol' in DataFrame.columns:
                self.data['amount'] = self.data.vol * self.data.price * 100
            elif 'volume' in DataFrame.columns:
                self.data['amount'] = self.data.volume * self.data.price * 100
        if '_id' in DataFrame.columns:
            self.data = self.data.drop(["_id"], axis=1)
        self.mongo_coll = DATABASE.index_transaction 
Example 17
Project: pyiron   Author: pyiron   File: generic.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_jobs(self, recursive=True, columns=None):
        """
        Internal function to return the jobs as dictionary rather than a pandas.Dataframe

        Args:
            recursive (bool): search subprojects [True/False]
            columns (list): by default only the columns ['id', 'project'] are selected, but the user can select a subset
                            of ['id', 'status', 'chemicalformula', 'job', 'subjob', 'project', 'projectpath',
                            'timestart', 'timestop', 'totalcputime', 'computer', 'hamilton', 'hamversion', 'parentid',
                            'masterid']

        Returns:
            dict: columns are used as keys and point to a list of the corresponding values
        """
        if not isinstance(self.db, FileTable):
            return get_jobs(
                database=self.db,
                sql_query=self.sql_query,
                user=self.user,
                project_path=self.project_path,
                recursive=recursive,
                columns=columns,
            )
        else:
            return self.db.get_jobs(project=self.project_path, recursive=recursive, columns=columns) 
Example 18
Project: pyiron   Author: pyiron   File: generic.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def queue_table(self, project_only=True, recursive=True, full_table=False):
        """
        Display the queuing system table as pandas.Dataframe

        Args:
            project_only (bool): Query only for jobs within the current project - True by default
            recursive (bool): Include jobs from sub projects
            full_table (bool): Whether to show the entire pandas table

        Returns:
            pandas.DataFrame: Output from the queuing system - optimized for the Sun grid engine
        """
        return queue_table(
            job_ids=self.get_job_ids(recursive=recursive), project_only=project_only,
            full_table=full_table
        ) 
Example 19
Project: pyiron   Author: pyiron   File: generic.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def queue_table_global(self, full_table=False):
        """
        Display the queuing system table as pandas.Dataframe

        Args:
            full_table (bool): Whether to show the entire pandas table

        Returns:
            pandas.DataFrame: Output from the queuing system - optimized for the Sun grid engine
        """
        df = queue_table(job_ids=[], project_only=False, full_table=full_table)
        if len(df) != 0 and self.db is not None:
            return pandas.DataFrame(
                [
                    self.db.get_item_by_id(
                        int(str(queue_ID).replace("pi_", "").replace(".sh", ""))
                    )
                    for queue_ID in df["jobname"]
                    if str(queue_ID).startswith("pi_")
                ]
            )
        else:
            return None 
Example 20
Project: pyiron   Author: pyiron   File: hdfio.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def get_from_table(self, path, name):
        """
        Get a specific value from a pandas.Dataframe

        Args:
            path (str): relative path to the data object
            name (str): parameter key

        Returns:
            dict, list, float, int: the value associated to the specific parameter key
        """
        df_table = self.get(path)
        keys = df_table["Parameter"]
        if name in keys:
            job_id = keys.index(name)
            return df_table["Value"][job_id]
        raise ValueError("Unknown name: {0}".format(name)) 
Example 21
Project: pyiron   Author: pyiron   File: parallel.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def output_to_pandas(self, sort_by=None, h5_path="output"):
        """
        Convert output of all child jobs to a pandas Dataframe object.

        Args:
            sort_by (str): sort the output using pandas.DataFrame.sort_values(by=sort_by)
            h5_path (str): select child output to include - default='output'

        Returns:
            pandas.Dataframe: output as dataframe
        """
        # TODO: The output to pandas function should no longer be required
        with self.project_hdf5.open(h5_path) as hdf:
            for key in hdf.list_nodes():
                self._output[key] = hdf[key]
        df = pandas.DataFrame(self._output)
        if sort_by is not None:
            df = df.sort_values(by=sort_by)
        return df

    # TODO: make it more general and move it then into genericJob 
Example 22
Project: SNPmatch   Author: Gregor-Mendel-Institute   File: snp_genotype.py    MIT License 6 votes vote down vote up
def get_sq_diversity_np(snps, acc_ix=None):
    assert type(snps) is pd.core.frame.DataFrame, "please provide pd.Dataframe as input"
    if isinstance(acc_ix, numbers.Integral):
        assert acc_ix < snps.shape[1], "index of a reference to get sq diversity for all the other"
        kin_mat = np.zeros(snps.shape[1], dtype=float)
        for i in range(snps.shape[1]):
            if i == acc_ix:
                kin_mat[acc_ix] = 0
            else:
                t_s = snps.iloc[:,[acc_ix,i]]
                kin_mat[i] = allel.sequence_diversity(range(snps.shape[0]), allel.AlleleCountsArray(np.column_stack((np.sum(t_s == 0, axis =1) * 2, np.sum(t_s == 0.5, axis =1) * 2, np.sum(t_s == 1, axis =1) * 2))))
        return(kin_mat)
    if acc_ix is None:
        acc_ix = np.arange(snps.shape[1])
    assert type(acc_ix) is np.ndarray, "provide an index for samples to get pairwise scores"
    kin_mat = pd.DataFrame(0, index = acc_ix, columns = acc_ix, dtype = float)
    for i,j in itertools.combinations(acc_ix, 2):
        t_k = allel.sequence_diversity(range(snps.shape[0]), allel.AlleleCountsArray(np.column_stack((np.sum(snps.iloc[:,[i,j]] == 0, axis =1) * 2, np.sum(snps.iloc[:,[i,j]] == 0.5, axis =1) * 2, np.sum(snps.iloc[:,[i,j]] == 1, axis =1) * 2))))
        #t_k = np.sum(snps.iloc[:,i] == snps.iloc[:,j])/float(snps.shape[0])
        kin_mat.loc[i,j] = t_k
        kin_mat.loc[j,i] = t_k
    return(kin_mat) 
Example 23
Project: immuno-probs   Author: penuts7644   File: test_anchor_locator.py    GNU General Public License v3.0 6 votes vote down vote up
def test_anchor_locator(gene, motif, expected):
    """Test if correct indices of conserved motif regions are returned.

    Parameters
    ----------
    gene : str
        A gene identifier, either V or J, specifying the alignment's origin.
    motif : str
        A custom motif string to use for the search.
    expected : pandas.DataFrame
        The expected output pandas.Dataframe with correct columns and values.

    Raises
    -------
    AssertionError
        If the performed test failed.

    """
    locator = AnchorLocator(alignment=create_alignment(), gene=gene)
    if motif is not None:
        result = locator.get_indices_motifs(1, motif).head()
    else:
        result = locator.get_indices_motifs(1).head()
    assert (result == expected).all().all() 
Example 24
Project: immuno-probs   Author: penuts7644   File: test_adaptive_sequence_convertor.py    GNU General Public License v3.0 6 votes vote down vote up
def test_find_longest_substring(full, partial, expected):
    """Test if correct substring is returned.

    Parameters
    ----------
    full : str
        A full length sequence string.
    partial : str
        A partial length sequence string to compare against the full length
        sequence.
    expected : pandas.DataFrame
        The expected output pandas.Dataframe with correct columns and values.

    Raises
    -------
    AssertionError
        If the performed test failed.

    """
    asc = AdaptiveSequenceConvertor()
    substring = asc.find_longest_substring(full, partial)
    assert substring == expected 
Example 25
Project: EQanalytics   Author: AntonMu   File: Location.py    MIT License 6 votes vote down vote up
def seperate_house_numbers(df, column = 'address', city ='SAN FRANCISCO'):
    """Returns a df that splits the 'address'
    column in house number, street number and 
    building
    
    Parameters
    ----------
    df : pd.Dataframe 
        The input dataframe that contains an
        'address' column

    Returns
    -------
    pd.DataFrame
        The dataframe with seperate columns
        for NUMBER, STREET, BUILDING
    """
    #Seperate Street and number
    df[['NUMBER','STREET']] = pd.DataFrame(df[column].str.split(' ',1).tolist(),
                                       columns = ['NUMBER','STREET'])
    df['CITY']=city
    return df
# Split the odd numbers into seperate columns 
Example 26
Project: xalpha   Author: refraction-ray   File: indicator.py    MIT License 5 votes vote down vote up
def comparison(self, date=yesterdayobj()):
        """
        :returns: tuple of two pd.Dataframe, the first is for aim and the second if for the benchmark index
            all netvalues are normalized and set equal 1.00 on the self.start date
        """
        partp = self.price[self.price["date"] <= date]
        partm = self.bmprice[self.bmprice["date"] <= date]
        normp = partp.iloc[0].netvalue
        normm = partm.iloc[0].netvalue
        partp["netvalue"] = partp["netvalue"] / normp
        partm["netvalue"] = partm["netvalue"] / normm
        return (partp, partm) 
Example 27
Project: ILTIS   Author: grg2rsr   File: gioIO.py    GNU General Public License v2.0 5 votes vote down vote up
def read_gloDatamix(path,nTags=None,tagList=None):
    """ reads a .gloDatamix file and puts the metadata into a pd.Dataframe and
    the numerical data into a np.array.
    Data is a flat 2d representation of nROI*nMeasurements x time, the order in 
    the first dim is cycle first through ROIs and then through measurements """
    
    # read line by line
    fh = open(path,'r')
    lines = fh.readlines()


    if tagList == None:
        if nTags == None:
            # infer nTags
            nTags = sp.sum([field[:4] != 'data' for field in lines[0].strip().split('\t')]) # this breaks with Ana style gloDatamix
    
    # read data into np.array and medadata into pd.DataFrame
    Data = sp.zeros((len(lines)-1,len(lines[0].split('\t')) - nTags))
    
    for i,line in enumerate(lines):
        if i == 0:
            tags = line.split('\t')[:nTags]
            Meta = pd.DataFrame(columns = tags, index=range(len(lines)-1))
        else:
            meta = line.split('\t')[:nTags]
            Meta.iloc[i-1] = meta
            Data[i-1,:] = sp.array(line.split('\t')[nTags:],dtype='float32')
        
    return Meta, Data 
Example 28
Project: toolkit   Author: SciLensProject   File: csv_fetcher.py    GNU General Public License v3.0 5 votes vote down vote up
def _fetch(self, **kwargs):
        """
        Implements the main logic of the class. It reads csv file and return in to pd Dataframe.

        :param kwargs: -separator: str. the delimiter of the file
        :return: pd.Dataframe
        """
        return self._read_csv(kwargs.get('separator', None)) 
Example 29
Project: toolkit   Author: SciLensProject   File: csv_fetcher.py    GNU General Public License v3.0 5 votes vote down vote up
def _read_csv(self, separator):
        """
        Reads a csv file and returns the pd Dataframe with its content

        :param separator: str, the delimiter of the file.
        :return: pd.Dataframe
        """
        df = pd.read_csv(self.path, sep=separator)

        return df 
Example 30
Project: toolkit   Author: SciLensProject   File: twitter_fetcher.py    GNU General Public License v3.0 5 votes vote down vote up
def _fetch(self, **kwargs):
        """
        Implements the main logic of the class.

        :param kwargs: - url: str
                       - sleep_time: int
        :return: pd.Dataframe
        """
        return pd.DataFrame(self._scrap_twitter_replies(kwargs['url'], kwargs['sleep_time'])) 
Example 31
Project: toolkit   Author: SciLensProject   File: cwur_fetcher.py    GNU General Public License v3.0 5 votes vote down vote up
def _fetch(self, **kwargs):
        """
        Implements the main logic of the class.

        :param kwargs: - year: str
        :return: pd.Dataframe
        """
        return self._scrap_cwur(kwargs['year']) 
Example 32
Project: nistats   Author: nistats   File: glm_reporter.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _dmtx_to_svg_url(design_matrices):
    """ Accepts a FirstLevelModel or SecondLevelModel object
    with fitted design matrices & generates SVG Image URL,
    which can be inserted into an HTML template.

    Parameters
    ----------
    design_matrices: List[pd.Dataframe]
        Design matrices computed in the model.

    Returns
    -------
    svg_url_design_matrices: String
        SVG Image URL for the plotted design matrices,
    """
    html_design_matrices = []
    dmtx_template_path = os.path.join(HTML_TEMPLATE_ROOT_PATH,
                                      'design_matrix_template.html'
                                      )
    with open(dmtx_template_path) as html_template_obj:
        dmtx_template_text = html_template_obj.read()

    for dmtx_count, design_matrix in enumerate(design_matrices, start=1):
        dmtx_text_ = string.Template(dmtx_template_text)
        dmtx_plot = plot_design_matrix(design_matrix)
        dmtx_title = 'Session {}'.format(dmtx_count)
        plt.title(dmtx_title, y=0.987)
        dmtx_plot = _resize_plot_inches(dmtx_plot, height_change=.3)
        url_design_matrix_svg = plot_to_svg(dmtx_plot)
        # prevents sphinx-gallery & jupyter from scraping & inserting plots
        plt.close()
        dmtx_text_ = dmtx_text_.safe_substitute(
                {'design_matrix': url_design_matrix_svg,
                 'dmtx_title': dmtx_title,
                 }
                )
        html_design_matrices.append(dmtx_text_)
    svg_url_design_matrices = ''.join(html_design_matrices)
    return svg_url_design_matrices 
Example 33
Project: scattertext   Author: JasonKessler   File: TermDocMatrixWithoutCategories.py    Apache License 2.0 5 votes vote down vote up
def apply_ranker(self, term_ranker, use_non_text_features):
        '''
        Parameters
        ----------
        term_ranker : TermRanker

        Returns
        -------
        pd.Dataframe
        '''
        if use_non_text_features:
            return term_ranker(self).use_non_text_features().get_ranks()
        return term_ranker(self).get_ranks() 
Example 34
Project: scattertext   Author: JasonKessler   File: TermCategoryFrequencies.py    Apache License 2.0 5 votes vote down vote up
def apply_ranker(self, term_ranker):
		'''
		Parameters
		----------
		term_ranker : TermRanker
			We'll ignore this

		Returns
		-------
		pd.Dataframe
		'''
		return self.get_term_category_frequencies(None) 
Example 35
Project: prophet   Author: facebook   File: forecaster.py    MIT License 5 votes vote down vote up
def predict_seasonal_components(self, df):
        """Predict seasonality components, holidays, and added regressors.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with seasonal components.
        """
        seasonal_features, _, component_cols, _ = (
            self.make_all_seasonality_features(df)
        )
        if self.uncertainty_samples:
            lower_p = 100 * (1.0 - self.interval_width) / 2
            upper_p = 100 * (1.0 + self.interval_width) / 2

        X = seasonal_features.values
        data = {}
        for component in component_cols.columns:
            beta_c = self.params['beta'] * component_cols[component].values

            comp = np.matmul(X, beta_c.transpose())
            if component in self.component_modes['additive']:
                comp *= self.y_scale
            data[component] = np.nanmean(comp, axis=1)
            if self.uncertainty_samples:
                data[component + '_lower'] = np.nanpercentile(
                    comp, lower_p, axis=1,
                )
                data[component + '_upper'] = np.nanpercentile(
                    comp, upper_p, axis=1,
                )
        return pd.DataFrame(data) 
Example 36
Project: prophet   Author: facebook   File: forecaster.py    MIT License 5 votes vote down vote up
def make_future_dataframe(self, periods, freq='D', include_history=True):
        """Simulate the trend using the extrapolated generative model.

        Parameters
        ----------
        periods: Int number of periods to forecast forward.
        freq: Any valid frequency for pd.date_range, such as 'D' or 'M'.
        include_history: Boolean to include the historical dates in the data
            frame for predictions.

        Returns
        -------
        pd.Dataframe that extends forward from the end of self.history for the
        requested number of periods.
        """
        if self.history_dates is None:
            raise Exception('Model must be fit before this can be used.')
        last_date = self.history_dates.max()
        dates = pd.date_range(
            start=last_date,
            periods=periods + 1,  # An extra in case we include start
            freq=freq)
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:periods]  # Return correct number of periods

        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))

        return pd.DataFrame({'ds': dates}) 
Example 37
Project: featuretools   Author: FeatureLabs   File: entity.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def df(self):
        '''Dataframe providing the data for the entity.'''
        return self.data["df"] 
Example 38
Project: featuretools   Author: FeatureLabs   File: entity.py    BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def _vals_to_series(self, instance_vals, variable_id):
        """
        instance_vals may be a pd.Dataframe, a pd.Series, a list, a single
        value, or None. This function always returns a Series or None.
        """
        if instance_vals is None:
            return None

        # If this is a single value, make it a list
        if not hasattr(instance_vals, '__iter__'):
            instance_vals = [instance_vals]

        # convert iterable to pd.Series
        if type(instance_vals) == pd.DataFrame:
            out_vals = instance_vals[variable_id]
        elif type(instance_vals) == pd.Series:
            out_vals = instance_vals.rename(variable_id)
        else:
            out_vals = pd.Series(instance_vals)

        # no duplicates or NaN values
        out_vals = out_vals.drop_duplicates().dropna()

        # want index to have no name for the merge in query_by_values
        out_vals.index.name = None

        return out_vals 
Example 39
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def predict(self, df=None):
        """Predict using the prophet model.

        Parameters
        ----------
        df: pd.DataFrame with dates for predictions (column ds), and capacity
            (column cap) if logistic growth. If not provided, predictions are
            made on the history.

        Returns
        -------
        A pd.DataFrame with the forecast components.
        """
        if df is None:
            df = self.history.copy()
        else:
            if df.shape[0] == 0:
                raise ValueError('Dataframe has no rows.')
            df = self.setup_dataframe(df.copy())

        df['trend'] = self.predict_trend(df)
        seasonal_components = self.predict_seasonal_components(df)
        intervals = self.predict_uncertainty(df)

        # Drop columns except ds, cap, floor, and trend
        cols = ['ds', 'trend']
        if 'cap' in df:
            cols.append('cap')
        if self.logistic_floor:
            cols.append('floor')
        # Add in forecast components
        df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
        df2['yhat'] = (
            df2['trend'] * (1 + df2['multiplicative_terms'])
            + df2['additive_terms']
        )
        return df2 
Example 40
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def predict_seasonal_components(self, df):
        """Predict seasonality components, holidays, and added regressors.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with seasonal components.
        """
        seasonal_features, _, component_cols, _ = (
            self.make_all_seasonality_features(df)
        )
        lower_p = 100 * (1.0 - self.interval_width) / 2
        upper_p = 100 * (1.0 + self.interval_width) / 2

        X = seasonal_features.values
        data = {}
        for component in component_cols.columns:
            beta_c = self.params['beta'] * component_cols[component].values

            comp = np.matmul(X, beta_c.transpose())
            if component in self.component_modes['additive']:
                 comp *= self.y_scale
            data[component] = np.nanmean(comp, axis=1)
            data[component + '_lower'] = np.nanpercentile(
                comp, lower_p, axis=1,
            )
            data[component + '_upper'] = np.nanpercentile(
                comp, upper_p, axis=1,
            )
        return pd.DataFrame(data) 
Example 41
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def make_future_dataframe(self, periods, freq='D', include_history=True):
        """Simulate the trend using the extrapolated generative model.

        Parameters
        ----------
        periods: Int number of periods to forecast forward.
        freq: Any valid frequency for pd.date_range, such as 'D' or 'M'.
        include_history: Boolean to include the historical dates in the data
            frame for predictions.

        Returns
        -------
        pd.Dataframe that extends forward from the end of self.history for the
        requested number of periods.
        """
        if self.history_dates is None:
            raise Exception('Model must be fit before this can be used.')
        last_date = self.history_dates.max()
        dates = pd.date_range(
            start=last_date,
            periods=periods + 1,  # An extra in case we include start
            freq=freq)
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:periods]  # Return correct number of periods

        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))

        return pd.DataFrame({'ds': dates}) 
Example 42
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def predict(self, df=None):
        """Predict using the prophet model.

        Parameters
        ----------
        df: pd.DataFrame with dates for predictions (column ds), and capacity
            (column cap) if logistic growth. If not provided, predictions are
            made on the history.

        Returns
        -------
        A pd.DataFrame with the forecast components.
        """
        if df is None:
            df = self.history.copy()
        else:
            if df.shape[0] == 0:
                raise ValueError('Dataframe has no rows.')
            df = self.setup_dataframe(df.copy())

        df['trend'] = self.predict_trend(df)
        seasonal_components = self.predict_seasonal_components(df)
        intervals = self.predict_uncertainty(df)

        # Drop columns except ds, cap, floor, and trend
        cols = ['ds', 'trend']
        if 'cap' in df:
            cols.append('cap')
        if self.logistic_floor:
            cols.append('floor')
        # Add in forecast components
        df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1)
        df2['yhat'] = (
            df2['trend'] * (1 + df2['multiplicative_terms'])
            + df2['additive_terms']
        )
        return df2 
Example 43
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def predict_seasonal_components(self, df):
        """Predict seasonality components, holidays, and added regressors.

        Parameters
        ----------
        df: Prediction dataframe.

        Returns
        -------
        Dataframe with seasonal components.
        """
        seasonal_features, _, component_cols, _ = (
            self.make_all_seasonality_features(df)
        )
        lower_p = 100 * (1.0 - self.interval_width) / 2
        upper_p = 100 * (1.0 + self.interval_width) / 2

        X = seasonal_features.values
        data = {}
        for component in component_cols.columns:
            beta_c = self.params['beta'] * component_cols[component].values

            comp = np.matmul(X, beta_c.transpose())
            if component in self.component_modes['additive']:
                 comp *= self.y_scale
            data[component] = np.nanmean(comp, axis=1)
            data[component + '_lower'] = np.nanpercentile(
                comp, lower_p, axis=1,
            )
            data[component + '_upper'] = np.nanpercentile(
                comp, upper_p, axis=1,
            )
        return pd.DataFrame(data) 
Example 44
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def sample_model(self, df, seasonal_features, iteration, s_a, s_m):
        """Simulate observations from the extrapolated generative model.

        Parameters
        ----------
        df: Prediction dataframe.
        seasonal_features: pd.DataFrame of seasonal features.
        iteration: Int sampling iteration to use parameters from.
        s_a: Indicator vector for additive components
        s_m: Indicator vector for multiplicative components

        Returns
        -------
        Dataframe with trend and yhat, each like df['t'].
        """
        trend = self.sample_predictive_trend(df, iteration)

        beta = self.params['beta'][iteration]
        Xb_a = np.matmul(seasonal_features.values, beta * s_a) * self.y_scale
        Xb_m = np.matmul(seasonal_features.values, beta * s_m)

        sigma = self.params['sigma_obs'][iteration]
        noise = np.random.normal(0, sigma, df.shape[0]) * self.y_scale

        return pd.DataFrame({
            'yhat': trend * (1 + Xb_m) + Xb_a + noise,
            'trend': trend
        }) 
Example 45
Project: ALGORITHM-TRADING-AND-STOCK-PREDICTION-USING-MACHINE-LEARNING   Author: kennedyCzar   File: forecaster.py    MIT License 5 votes vote down vote up
def make_future_dataframe(self, periods, freq='D', include_history=True):
        """Simulate the trend using the extrapolated generative model.

        Parameters
        ----------
        periods: Int number of periods to forecast forward.
        freq: Any valid frequency for pd.date_range, such as 'D' or 'M'.
        include_history: Boolean to include the historical dates in the data
            frame for predictions.

        Returns
        -------
        pd.Dataframe that extends forward from the end of self.history for the
        requested number of periods.
        """
        if self.history_dates is None:
            raise Exception('Model must be fit before this can be used.')
        last_date = self.history_dates.max()
        dates = pd.date_range(
            start=last_date,
            periods=periods + 1,  # An extra in case we include start
            freq=freq)
        dates = dates[dates > last_date]  # Drop start if equals last_date
        dates = dates[:periods]  # Return correct number of periods

        if include_history:
            dates = np.concatenate((np.array(self.history_dates), dates))

        return pd.DataFrame({'ds': dates}) 
Example 46
Project: SimpleSQLite   Author: thombashi   File: core.py    MIT License 5 votes vote down vote up
def select_as_dataframe(self, table_name, columns=None, where=None, extra=None):
        """
        Get data in the database and return fetched data as a
        :py:class:`pandas.Dataframe` instance.

        :param str table_name: |arg_select_table_name|
        :param list columns: |arg_select_as_xx_columns|
        :param str where: |arg_select_where|
        :param str extra: |arg_select_extra|
        :return: Table data as a :py:class:`pandas.Dataframe` instance.
        :rtype: pandas.DataFrame
        :raises simplesqlite.NullDatabaseConnectionError:
            |raises_check_connection|
        :raises simplesqlite.TableNotFoundError:
            |raises_verify_table_existence|
        :raises simplesqlite.OperationalError: |raises_operational_error|

        :Example:
            :ref:`example-select-as-dataframe`

        .. note::
            ``pandas`` package required to execute this method.
        """

        import pandas

        if columns is None:
            columns = self.fetch_attr_names(table_name)

        result = self.select(
            select=AttrList(columns), table_name=table_name, where=where, extra=extra
        )

        if result is None:
            return pandas.DataFrame()

        return pandas.DataFrame(result.fetchall(), columns=columns) 
Example 47
Project: opt_trade   Author: ejpjapan   File: spx_data_update.py    MIT License 5 votes vote down vote up
def get_vix():
    """Fetch vix from Interactive Brokers and append to history'''
    :return: Dataframe
    """
    ibw = IbWrapper()
    ib = ibw.ib
    vix = Index('VIX')
    cds = ib.reqContractDetails(vix)

    # contracts = [cd.contract for cd in cds]
    bars = ib.reqHistoricalData(cds[0].contract,
                                endDateTime='',
                                durationStr='1 Y',
                                barSizeSetting='1 day',
                                whatToShow='TRADES',
                                useRTH=True,
                                formatDate=1)
    ib.disconnect()
    vix = util.df(bars)
    vix = vix.set_index('date')
    vix = vix[['open', 'high', 'low', 'close']]

    vix_history = read_feather(str(UpdateSP500Data.TOP_LEVEL_PATH / 'vix_history'))

    full_hist = vix.combine_first(vix_history)
    write_feather(full_hist, str(UpdateSP500Data.TOP_LEVEL_PATH / 'vix_history'))
    return full_hist['close'] 
Example 48
Project: opt_trade   Author: ejpjapan   File: spx_data_update.py    MIT License 5 votes vote down vote up
def get_sp5_dividend_yield():
    """Fetch dividend yield from Quandl'''
    :return: Dataframe
    """
    quandl.ApiConfig.api_key = quandle_api()
    # try:
    spx_dividend_yld = quandl.get('MULTPL/SP500_DIV_YIELD_MONTH', collapse='monthly')
    spx_dividend_yld = spx_dividend_yld.resample('MS').bfill()
    # except:
    #     print('Quandl failed - Scraping dividend yield from Mutlp.com')
    # else:
    #     print('Quandl failed - Scraping dividend yield from Mutlp.com')
    #     spx_dividend_yld = scrape_sp5_div_yield()
    return spx_dividend_yld 
Example 49
Project: kryptos   Author: produvia   File: manager.py    MIT License 5 votes vote down vote up
def current_data(self, date):
        """Grabs datset info for the provided data

        Arguments:
            date {pandas.tslib.Timestamp}

        Returns:
            pandas.Dataframe
        """
        return self.df.loc[date] 
Example 50
Project: kryptos   Author: produvia   File: manager.py    MIT License 5 votes vote down vote up
def calculate(self, context):
        """Calls for calculation of indicators currently registered with the DataManager

        This method is called by a Strategy object at every algo iteration.
        The outputs and calculation for each indicator is handled and stored by
        the underlying Indicator objects.

        Arguments:
            context {pd.Dataframe} -- Catalyst peristent algo context object
        """
        self.current_date = context.blotter.current_dt.date()

        # Assuming only use of basic indicators for now
        # Basic indicators accept a series as opposed to a df with technical indicators
        for i in self._indicators:
            for col in self._indicator_map[i.name]:
                self.log.debug("Calculating {} for {}".format(i.name, col))
                try:
                    col_vals = self.df_to_date(self.current_date)[col]
                    i.calculate(col_vals)
                    i.record()
                except KeyError:
                    msg = """{} is set as the column for {}, but it is not found in the dataset.
                    Does the config look right?:
                    {}
                        """.format(
                        col, i.name, json.dumps(self.serialize(), indent=2)
                    )
                    e = Exception(msg)
                    self.log.exception(e)
                    raise e