Python pandas.DataFrame() Examples

The following are 30 code examples for showing how to use pandas.DataFrame(). These examples are extracted from open source projects. You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example.

You may want to check out the right sidebar which shows the related API usage.

You may also want to check out all available functions/classes of the module pandas , or try the search function .

Example 1
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 7 votes vote down vote up
def average_true_range(df, n):
    """
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    TR_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        i = i + 1
    TR_s = pd.Series(TR_l)
    ATR = pd.Series(TR_s.ewm(span=n, min_periods=n).mean(), name='ATR_' + str(n))
    df = df.join(ATR)
    return df 
Example 2
Project: indras_net   Author: gcallah   File: display_methods.py    License: GNU General Public License v3.0 6 votes vote down vote up
def create_lines(self, x, varieties):
        """
        Draw just the data portion.
        """
        lines = pd.DataFrame()
        for i, var in enumerate(varieties):
            self.legend.append(var)
            data = varieties[var]["data"]
            color = get_color(varieties[var], i)
            x_array = np.array(x)
            y_array = np.array(data)
            line = pd.DataFrame({"x": x_array,
                                 "y": y_array,
                                 "color": color,
                                 "var": var})
            lines = lines.append(line, ignore_index=True, sort=False)
        return lines 
Example 3
Project: indras_net   Author: gcallah   File: display_methods.py    License: GNU General Public License v3.0 6 votes vote down vote up
def create_scats(self, varieties):
        self.scats = pd.DataFrame(columns=["x", "y", "color", "marker", "var"])
        for i, var in enumerate(varieties):
            self.legend.append(var)
            (x_array, y_array) = self.get_arrays(varieties, var)
            if len(x_array) <= 0:  # no data to graph!
                '''
                I am creating a single "position" for an agent that cannot
                be seen. This seems to fix the issue of colors being
                missmatched in the occasion that a group has no agents.
                '''
                x_array = [-1]
                y_array = [-1]
            elif len(x_array) != len(y_array):
                logging.debug("Array length mismatch in scatter plot")
                return
            color = get_color(varieties[var], i)
            marker = get_marker(varieties[var], i)
            scat = pd.DataFrame({"x": pd.Series(x_array),
                                 "y": pd.Series(y_array),
                                 "color": color,
                                 "marker": marker,
                                 "var": var})
            self.scats = self.scats.append(scat, ignore_index=True,
                                           sort=False) 
Example 4
Project: svviz   Author: svviz   File: runTests.py    License: MIT License 6 votes vote down vote up
def run(which):
    print("running all tests...")
    summary = pandas.DataFrame(columns=["pass", "info", "timing"])

    # Test chromosome ends
    if len(which)==0 or "chrom_ends" in which:
        summary.loc["chrom_ends"] = _runTest(runTestIssues, "issues")

    # Run the demos
    if len(which)==0 or "demos" in which:
        summary.loc["demos"] = _runTest(testDemos.run, "demos")

    # Run regression testing on ref/alt/amb counts
    if len(which)==0 or "counts" in which:
        summary.loc["counts"] = _runTest(runTestCounts, "counts")

    # Run the render regression tests
    if len(which)==0 or "rendering" in which:
        summary.loc["rendering"] = _runTest(rendertest.run, "rendering")    

    summary["timing"] = summary["timing"].apply(lambda x: "{}".format(datetime.timedelta(seconds=int(x))))
    print(summary)

    saveTimingInfo(summary) 
Example 5
Project: backtrader-cn   Author: pandalibin   File: utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def write_daily_alert(cls, symbol, stock_id, action):
        """
        write daily stock alert to MongoDB.
        :param symbol: Arctic symbol
        :param data: dict, like: {'stock': '000651', 'action': 'buy/sell'}
        :return: None
        """

        lib = get_or_create_library(conf.DAILY_STOCK_ALERT_LIBNAME)

        data = {
            'stock': stock_id,
            'action': action
        }
        df = pd.DataFrame([data], columns=data.keys())
        if symbol in lib.list_symbols():
            lib.append(symbol, df)
        else:
            lib.write(symbol, df) 
Example 6
Project: backtrader-cn   Author: pandalibin   File: test_datas_utils.py    License: GNU General Public License v3.0 6 votes vote down vote up
def _test_strip_unused_cols(self):
        data = pd.DataFrame({
            'name': ['tom', 'jack'],
            'age': [24, 56],
            'gender': ['male', 'male'],
            'address': ['cn', 'us']
        })
        data.index = pd.date_range(start='2017-01-01', periods=2)

        origin_cols = ['name', 'age', 'gender', 'address']
        unused_cols = ['address', 'gender']
        new_cols = ['name', 'age']

        self.assertEqual(list(data.columns).sort(), origin_cols.sort())

        bdu.Utils.strip_unused_cols(data, *unused_cols)

        self.assertEqual(list(data.columns).sort(), new_cols.sort()) 
Example 7
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def ppsr(df):
    """Calculate Pivot Points, Supports and Resistances for given data
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    PP = pd.Series((df['High'] + df['Low'] + df['Close']) / 3)
    R1 = pd.Series(2 * PP - df['Low'])
    S1 = pd.Series(2 * PP - df['High'])
    R2 = pd.Series(PP + df['High'] - df['Low'])
    S2 = pd.Series(PP - df['High'] + df['Low'])
    R3 = pd.Series(df['High'] + 2 * (PP - df['Low']))
    S3 = pd.Series(df['Low'] - 2 * (df['High'] - PP))
    psr = {'PP': PP, 'R1': R1, 'S1': S1, 'R2': R2, 'S2': S2, 'R3': R3, 'S3': S3}
    PSR = pd.DataFrame(psr)
    df = df.join(PSR)
    return df 
Example 8
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def trix(df, n):
    """Calculate TRIX for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    EX1 = df['Close'].ewm(span=n, min_periods=n).mean()
    EX2 = EX1.ewm(span=n, min_periods=n).mean()
    EX3 = EX2.ewm(span=n, min_periods=n).mean()
    i = 0
    ROC_l = [np.nan]
    while i + 1 <= df.index[-1]:
        ROC = (EX3[i + 1] - EX3[i]) / EX3[i]
        ROC_l.append(ROC)
        i = i + 1
    Trix = pd.Series(ROC_l, name='Trix_' + str(n))
    df = df.join(Trix)
    return df 
Example 9
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def vortex_indicator(df, n):
    """Calculate the Vortex Indicator for given data.
    
    Vortex Indicator described here:
        http://www.vortexindicator.com/VFX_VORTEX.PDF
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.loc[i + 1, 'High'] - df.loc[i, 'Low']) - abs(df.loc[i + 1, 'Low'] - df.loc[i, 'High'])
        VM.append(Range)
        i = i + 1
    VI = pd.Series(pd.Series(VM).rolling(n).sum() / pd.Series(TR).rolling(n).sum(), name='Vortex_' + str(n))
    df = df.join(VI)
    return df 
Example 10
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def true_strength_index(df, r, s):
    """Calculate True Strength Index (TSI) for given data.
    
    :param df: pandas.DataFrame
    :param r: 
    :param s: 
    :return: pandas.DataFrame
    """
    M = pd.Series(df['Close'].diff(1))
    aM = abs(M)
    EMA1 = pd.Series(M.ewm(span=r, min_periods=r).mean())
    aEMA1 = pd.Series(aM.ewm(span=r, min_periods=r).mean())
    EMA2 = pd.Series(EMA1.ewm(span=s, min_periods=s).mean())
    aEMA2 = pd.Series(aEMA1.ewm(span=s, min_periods=s).mean())
    TSI = pd.Series(EMA2 / aEMA2, name='TSI_' + str(r) + '_' + str(s))
    df = df.join(TSI)
    return df 
Example 11
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def money_flow_index(df, n):
    """Calculate Money Flow Index and Ratio for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    PP = (df['High'] + df['Low'] + df['Close']) / 3
    i = 0
    PosMF = [0]
    while i < df.index[-1]:
        if PP[i + 1] > PP[i]:
            PosMF.append(PP[i + 1] * df.loc[i + 1, 'Volume'])
        else:
            PosMF.append(0)
        i = i + 1
    PosMF = pd.Series(PosMF)
    TotMF = PP * df['Volume']
    MFR = pd.Series(PosMF / TotMF)
    MFI = pd.Series(MFR.rolling(n, min_periods=n).mean(), name='MFI_' + str(n))
    df = df.join(MFI)
    return df 
Example 12
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def on_balance_volume(df, n):
    """Calculate On-Balance Volume for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    OBV = [0]
    while i < df.index[-1]:
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] > 0:
            OBV.append(df.loc[i + 1, 'Volume'])
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] == 0:
            OBV.append(0)
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] < 0:
            OBV.append(-df.loc[i + 1, 'Volume'])
        i = i + 1
    OBV = pd.Series(OBV)
    OBV_ma = pd.Series(OBV.rolling(n, min_periods=n).mean(), name='OBV_' + str(n))
    df = df.join(OBV_ma)
    return df 
Example 13
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def coppock_curve(df, n):
    """Calculate Coppock Curve for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    M = df['Close'].diff(int(n * 11 / 10) - 1)
    N = df['Close'].shift(int(n * 11 / 10) - 1)
    ROC1 = M / N
    M = df['Close'].diff(int(n * 14 / 10) - 1)
    N = df['Close'].shift(int(n * 14 / 10) - 1)
    ROC2 = M / N
    Copp = pd.Series((ROC1 + ROC2).ewm(span=n, min_periods=n).mean(), name='Copp_' + str(n))
    df = df.join(Copp)
    return df 
Example 14
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def keltner_channel(df, n):
    """Calculate Keltner Channel for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    KelChM = pd.Series(((df['High'] + df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChM_' + str(n))
    KelChU = pd.Series(((4 * df['High'] - 2 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChU_' + str(n))
    KelChD = pd.Series(((-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChD_' + str(n))
    df = df.join(KelChM)
    df = df.join(KelChU)
    df = df.join(KelChD)
    return df 
Example 15
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def ultimate_oscillator(df):
    """Calculate Ultimate Oscillator for given data.
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    i = 0
    TR_l = [0]
    BP_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        BP = df.loc[i + 1, 'Close'] - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        BP_l.append(BP)
        i = i + 1
    UltO = pd.Series((4 * pd.Series(BP_l).rolling(7).sum() / pd.Series(TR_l).rolling(7).sum()) + (
                2 * pd.Series(BP_l).rolling(14).sum() / pd.Series(TR_l).rolling(14).sum()) + (
                                 pd.Series(BP_l).rolling(28).sum() / pd.Series(TR_l).rolling(28).sum()),
                     name='Ultimate_Osc')
    df = df.join(UltO)
    return df 
Example 16
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    License: MIT License 6 votes vote down vote up
def donchian_channel(df, n):
    """Calculate donchian channel of given pandas data frame.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    i = 0
    dc_l = []
    while i < n - 1:
        dc_l.append(0)
        i += 1

    i = 0
    while i + n - 1 < df.index[-1]:
        dc = max(df['High'].ix[i:i + n - 1]) - min(df['Low'].ix[i:i + n - 1])
        dc_l.append(dc)
        i += 1

    donchian_chan = pd.Series(dc_l, name='Donchian_' + str(n))
    donchian_chan = donchian_chan.shift(n - 1)
    return df.join(donchian_chan) 
Example 17
Project: MPContribs   Author: materialsproject   File: tdata.py    License: MIT License 6 votes vote down vote up
def render(self, total_records=None):
        """use BackGrid JS library to render Pandas DataFrame"""
        # if project given, this will result in an overview table of contributions
        # TODO check for index column in df other than the default numbering
        jtable = json.dumps(self.to_backgrid_dict())
        if total_records is None:
            total_records = self.shape[0]
        config = {"total_records": total_records}
        config["uuids"] = [str(uuid.uuid4()) for i in range(4)]
        if self.tid:
            config["tid"] = self.tid
            config["per_page"] = self.per_page
        else:
            config["project"] = self.project
        config["api_key"] = self.api_key
        config["ncols"] = self.ncols
        config["filters"] = self.filters
        jconfig = json.dumps(config)
        html = '<div class="col-md-6" id="{}"></div>'.format(config["uuids"][0])
        html += '<div class="pull-right" id="{}"></div>'.format(config["uuids"][3])
        html += '<div id="{}" style="width:100%;"></div>'.format(config["uuids"][1])
        html += '<div id="{}"></div>'.format(config["uuids"][2])
        html += f"<script>render_table({{table: {jtable}, config: {jconfig}}})</script>"
        return html 
Example 18
Project: MPContribs   Author: materialsproject   File: pre_submission.py    License: MIT License 6 votes vote down vote up
def get_table(results, letter):
    y = "Δ{}".format(letter)
    df = Table(
        RecursiveDict([("δ", results[0]), (y, results[1]), (y + "ₑᵣᵣ", results[2])])
    )
    x0, x1 = map(float, df["δ"].iloc[[0, -1]])
    pad = 0.15 * (x1 - x0)
    mask = (results[3] > x0 - pad) & (results[3] < x1 + pad)
    x, fit = results[3][mask], results[4][mask]
    df.set_index("δ", inplace=True)
    df2 = pd.DataFrame(RecursiveDict([("δ", x), (y + " Fit", fit)]))
    df2.set_index("δ", inplace=True)
    cols = ["δ", y, y + "ₑᵣᵣ", y + " Fit"]
    return (
        pd.concat([df, df2], sort=True)
        .sort_index()
        .reset_index()
        .rename(columns={"index": "δ"})
        .fillna("")[cols]
    ) 
Example 19
Project: pylivy   Author: acroz   File: test_integration.py    License: MIT License 6 votes vote down vote up
def test_session(integration_url, capsys, session_kind, params):

    assert livy_available(integration_url)

    with LivySession.create(integration_url, kind=session_kind) as session:

        assert session.state == SessionState.IDLE

        session.run(params.print_foo_code)
        assert capsys.readouterr() == (params.print_foo_output, "")

        session.run(params.create_dataframe_code)
        capsys.readouterr()

        session.run(params.dataframe_count_code)
        assert capsys.readouterr() == (params.dataframe_count_output, "")

        with pytest.raises(SparkRuntimeError):
            session.run(params.error_code)

        expected = pandas.DataFrame({"value": range(100)})
        assert session.read("df").equals(expected)

    assert session_stopped(integration_url, session.session_id) 
Example 20
Project: models   Author: kipoi   File: dataloader_m.py    License: MIT License 6 votes vote down vote up
def prepro_pos_table(pos_tables):
    """Extracts unique positions and sorts them."""
    if not isinstance(pos_tables, list):
        pos_tables = [pos_tables]

    pos_table = None
    for next_pos_table in pos_tables:
        if pos_table is None:
            pos_table = next_pos_table
        else:
            pos_table = pd.concat([pos_table, next_pos_table])
        pos_table = pos_table.groupby('chromo').apply(
            lambda df: pd.DataFrame({'pos': np.unique(df['pos'])}))
        pos_table.reset_index(inplace=True)
        pos_table = pos_table[['chromo', 'pos']]
        pos_table.sort_values(['chromo', 'pos'], inplace=True)
    return pos_table 
Example 21
Project: models   Author: kipoi   File: gather.py    License: MIT License 6 votes vote down vote up
def get_df(vcf_file, model_name):
    df = pd.DataFrame(list(KipoiVCFParser(vcf_file)))
    meta_info  = df[["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]]
    meta_info["variant_uid"] = df["variant_chr"].astype(str) + ':' + df["variant_pos"].astype(str) + ':' + df["variant_ref"] + ':' + df["variant_alt"]
    df.index = meta_info["variant_uid"]
    meta_info.index = meta_info["variant_uid"]
    obsolete_variant_columns = ["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]
    df = df[[col for col in df.columns if col not in obsolete_variant_columns]]
    df = df[[col for col in df.columns if "rID" not in col]]
    col_types = ["_LOGIT_REF", "_LOGIT_ALT", "_REF", "_ALT", "_DIFF", "_LOGIT"]
    if model_name == "labranchor":
        df = average_labranchor(df, model_name, col_types)
    else:
        df.columns = [refmt_col(col, model_name, col_types) for col in df.columns]
    # clump variants together
    df = deduplicate_vars(df)
    # subset meta_info like df and add variant_uid as common ID
    meta_info=meta_info.loc[df.index,:]
    return df, meta_info 
Example 22
Project: models   Author: kipoi   File: gather.py    License: MIT License 6 votes vote down vote up
def get_df(vcf_file, model_name):
    df = pd.DataFrame(list(KipoiVCFParser(vcf_file)))
    meta_info  = df[["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]]
    meta_info["variant_uid"] = df["variant_chr"].astype(str) + ':' + df["variant_pos"].astype(str) + ':' + df["variant_ref"] + ':' + df["variant_alt"]
    df.index = meta_info["variant_uid"]
    meta_info.index = meta_info["variant_uid"]
    obsolete_variant_columns = ["variant_chr", "variant_pos", "variant_ref", "variant_alt", "variant_id"]
    df = df[[col for col in df.columns if col not in obsolete_variant_columns]]
    df = df[[col for col in df.columns if "rID" not in col]]
    col_types = ["_LOGIT_REF", "_LOGIT_ALT", "_REF", "_ALT", "_DIFF", "_LOGIT"]
    if model_name == "labranchor":
        df = average_labranchor(df, model_name, col_types)
    else:
        df.columns = [refmt_col(col, model_name, col_types) for col in df.columns]
    # clump variants together
    df = deduplicate_vars(df)
    # subset meta_info like df and add variant_uid as common ID
    meta_info=meta_info.loc[df.index,:]
    return df, meta_info 
Example 23
Project: svviz   Author: svviz   File: summarystats.py    License: MIT License 5 votes vote down vote up
def display(self):
        try:
            import pandas
            pandas.options.display.width = 250
            pandas.options.display.max_rows = 400
            df = pandas.DataFrame(self.stats, columns=self.header)
            print(df.pivot_table(values="value", index=["variant","sample","allele"], columns="key"))
        except:
            print(str(self)) 
Example 24
Project: svviz   Author: svviz   File: testDemos.py    License: MIT License 5 votes vote down vote up
def run():
    commands = ["svviz demo 1 -a --no-web",
                "svviz demo 2 -a --no-web",
                "svviz demo 2 -a --no-web --auto-export",
                "svviz demo 3 -a --no-web"]
    times = []

    for command in commands:
        t0 = time.time()
        app.run(command.split(" "))
        times.append(time.time() - t0)

    print(pandas.DataFrame({"command":commands, "time (s)":times}))

    return (True, "") 
Example 25
Project: incubator-spot   Author: apache   File: dns_oa.py    License: Apache License 2.0 5 votes vote down vote up
def _ingest_summary(self):
        # get date parameters.
        yr = self._date[:4]
        mn = self._date[4:6]
        dy = self._date[6:]

        self._logger.info("Getting ingest summary data for the day")
        
        ingest_summary_cols = ["date","total"]		
        result_rows = []        
        df_filtered =  pd.DataFrame()

        query_to_load = ("""
            SELECT frame_time, COUNT(*) as total FROM {0}.{1}
            WHERE y={2} AND m={3} AND d={4} AND unix_tstamp IS NOT NULL
            AND frame_time IS NOT NULL AND frame_len IS NOT NULL
            AND dns_qry_name IS NOT NULL AND ip_src IS NOT NULL
            AND (dns_qry_class IS NOT NULL AND dns_qry_type IS NOT NULL
            AND dns_qry_rcode IS NOT NULL ) GROUP BY frame_time;
        """).format(self._db,self._table_name, yr, mn, dy)

        results = impala.execute_query_as_list(query_to_load)
        df = pd.DataFrame(results)

        # Forms a new dataframe splitting the minutes from the time column
        df_new = pd.DataFrame([["{0}-{1}-{2} {3}:{4}".format(yr, mn, dy,\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[0].zfill(2),\
            val['frame_time'].replace("  "," ").split(" ")[3].split(":")[1].zfill(2)),\
            int(val['total']) if not math.isnan(val['total']) else 0 ] for key,val in df.iterrows()],columns = ingest_summary_cols)

        #Groups the data by minute
        sf = df_new.groupby(by=['date'])['total'].sum()
        df_per_min = pd.DataFrame({'date':sf.index, 'total':sf.values})

        df_final = df_filtered.append(df_per_min, ignore_index=True).to_records(False,False)

        if len(df_final) > 0:
            query_to_insert=("""
                INSERT INTO {0}.dns_ingest_summary PARTITION (y={1}, m={2}, d={3}) VALUES {4};
            """).format(self._db, yr, mn, dy, tuple(df_final))
            impala.execute_query(query_to_insert) 
Example 26
Project: Financial-NLP   Author: Coldog2333   File: calculate_nlp_vector.py    License: Apache License 2.0 5 votes vote down vote up
def calculate_nlpv(min_count,size,saveflag):
    nlp=NLP()
    nlp.load_model(model_place+str(min_count)+'_'+str(size)+'\\wiki_nlp_'+str(min_count)+'_'+str(size)+'.model')
    words=[]
    fp=open(common_words_filename,'r',encoding='utf-8')
    for line in fp.readlines():
        try:
            temp=nlp.model.wv.__getitem__(line.strip('\n')) # 检查词是否在word2vec模型中
            words.append(line.strip('\n'))
        except:
            continue
    try:
        vector=nlp.nlp_vector(words)
    except:
        vector=nlp.safe_nlp_vector(words)
    
    
    if saveflag:
        vector_transform = vector.T
        names=[]
        for index in nlp.Label_index:
            names.append(index+'w2v')
        for index in nlp.Label_index:
            names.append(index+'wn')
        text = pd.DataFrame(columns = names, data = vector_transform)
        #生成的csv文件的地址
        text.to_csv(save_result_place+'common_words_vector_'+str(min_count)+'_'+str(size)+'.csv') 
Example 27
Project: Financial-NLP   Author: Coldog2333   File: Senti.py    License: Apache License 2.0 5 votes vote down vote up
def score_of_common_words(self, Min_count, Size, saveflag=1, savefilename=''):
        """
        calculate scores of common words, and save the results as you like.
        
        p.s. please make sure you have set savefilename.
        """
        self.set_model_parameters(Min_count, Size)
        table=pd.read_csv(self.nlp_vector_filename)
        #table=table.abs() # 余弦相似度直接取绝对值
        result=['']*table.shape[0]
        score=[0]*table.shape[0]
        label_num=(table.shape[1]-1)/2
        for i in range(table.shape[0]): 
            w2v=table.iloc[i,1:label_num+1]
            wn=table.iloc[i,len(label_num)+1:len(label_num)*2+1]
            result[i]=self.get_topn_topm(w2v, wn, n=9, m=3) # 这是一个字符串Index
            for reword in result[i]:
                score[i]+=table.loc[i, reword+'w2v']*self.nlp.Label_dict[reword]
            score[i]/=len(result[i])
        
        if saveflag:
            try:
                fp=open(self.valid_word_filename,'r',encoding='utf-8')
                txtlist=fp.readlines()
            except:
                fp=open(self.valid_word_filename,'r',encoding='gbk')
                txtlist=fp.readlines()
            valid_words=[]
            for t in txtlist:
                t=t.split('\n')[0]
                valid_words.append(t)
            fp.close()
            rawdata=pd.DataFrame(score, valid_words)
            pd.DataFrame.to_csv(rawdata, savefilename,encoding='gkb') 
Example 28
Project: Financial-NLP   Author: Coldog2333   File: Senti.py    License: Apache License 2.0 5 votes vote down vote up
def calculate_scores_of_all(self, saveflag=0, savefilename=''):
        dates = os.listdir(self.article_dir)
        all_date_score=[]
        for date in dates:
            try:
                score,info=self.score_of_date(date)
                all_date_score.append((date,score))
            except:
                continue
        if saveflag:
            rawdata=pd.DataFrame(all_date_score)
            pd.DataFrame.to_csv(rawdata, savefilename)
        return all_date_score,dates 
Example 29
Project: backtrader-cn   Author: pandalibin   File: utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def split_data(cls, data, percent=0.3):
        """
        Split the data into training data and test data.
        :param data(DataFrame): data to be split.
        :param percent(float): percent of data used as training data.
        :return: training data(DataFrame) and testing data(DataFrame)
        """

        rows = len(data)
        train_rows = math.floor(rows * percent)
        test_rows = rows - train_rows

        return data.iloc[:train_rows], data.iloc[-test_rows:] 
Example 30
Project: backtrader-cn   Author: pandalibin   File: utils.py    License: GNU General Public License v3.0 5 votes vote down vote up
def get_best_params(cls, al_results):
        """
        Get the best params, current algorithm is the largest total return rate.
        :param al_results(list): all the optional params and corresponding analysis data.
        :return: best params and corresponding analysis data(dict)
        """
        al_results_df = pd.DataFrame.from_dict(al_results)
        al_results_df = al_results_df.sort_values('total_return_rate', ascending=False)

        al_result_dict = al_results_df.iloc[0].to_dict()

        return al_result_dict