Python pandas.Series() Examples

The following are code examples for showing how to use pandas.Series(). They are from open source Python projects. You can vote up the examples you like or vote down the ones you don't like.

Example 1
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def average_true_range(df, n):
    """
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    TR_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        i = i + 1
    TR_s = pd.Series(TR_l)
    ATR = pd.Series(TR_s.ewm(span=n, min_periods=n).mean(), name='ATR_' + str(n))
    df = df.join(ATR)
    return df 
Example 2
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def ppsr(df):
    """Calculate Pivot Points, Supports and Resistances for given data
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    PP = pd.Series((df['High'] + df['Low'] + df['Close']) / 3)
    R1 = pd.Series(2 * PP - df['Low'])
    S1 = pd.Series(2 * PP - df['High'])
    R2 = pd.Series(PP + df['High'] - df['Low'])
    S2 = pd.Series(PP - df['High'] + df['Low'])
    R3 = pd.Series(df['High'] + 2 * (PP - df['Low']))
    S3 = pd.Series(df['Low'] - 2 * (df['High'] - PP))
    psr = {'PP': PP, 'R1': R1, 'S1': S1, 'R2': R2, 'S2': S2, 'R3': R3, 'S3': S3}
    PSR = pd.DataFrame(psr)
    df = df.join(PSR)
    return df 
Example 3
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def trix(df, n):
    """Calculate TRIX for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    EX1 = df['Close'].ewm(span=n, min_periods=n).mean()
    EX2 = EX1.ewm(span=n, min_periods=n).mean()
    EX3 = EX2.ewm(span=n, min_periods=n).mean()
    i = 0
    ROC_l = [np.nan]
    while i + 1 <= df.index[-1]:
        ROC = (EX3[i + 1] - EX3[i]) / EX3[i]
        ROC_l.append(ROC)
        i = i + 1
    Trix = pd.Series(ROC_l, name='Trix_' + str(n))
    df = df.join(Trix)
    return df 
Example 4
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def vortex_indicator(df, n):
    """Calculate the Vortex Indicator for given data.
    
    Vortex Indicator described here:
        http://www.vortexindicator.com/VFX_VORTEX.PDF
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    TR = [0]
    while i < df.index[-1]:
        Range = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR.append(Range)
        i = i + 1
    i = 0
    VM = [0]
    while i < df.index[-1]:
        Range = abs(df.loc[i + 1, 'High'] - df.loc[i, 'Low']) - abs(df.loc[i + 1, 'Low'] - df.loc[i, 'High'])
        VM.append(Range)
        i = i + 1
    VI = pd.Series(pd.Series(VM).rolling(n).sum() / pd.Series(TR).rolling(n).sum(), name='Vortex_' + str(n))
    df = df.join(VI)
    return df 
Example 5
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def true_strength_index(df, r, s):
    """Calculate True Strength Index (TSI) for given data.
    
    :param df: pandas.DataFrame
    :param r: 
    :param s: 
    :return: pandas.DataFrame
    """
    M = pd.Series(df['Close'].diff(1))
    aM = abs(M)
    EMA1 = pd.Series(M.ewm(span=r, min_periods=r).mean())
    aEMA1 = pd.Series(aM.ewm(span=r, min_periods=r).mean())
    EMA2 = pd.Series(EMA1.ewm(span=s, min_periods=s).mean())
    aEMA2 = pd.Series(aEMA1.ewm(span=s, min_periods=s).mean())
    TSI = pd.Series(EMA2 / aEMA2, name='TSI_' + str(r) + '_' + str(s))
    df = df.join(TSI)
    return df 
Example 6
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def money_flow_index(df, n):
    """Calculate Money Flow Index and Ratio for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    PP = (df['High'] + df['Low'] + df['Close']) / 3
    i = 0
    PosMF = [0]
    while i < df.index[-1]:
        if PP[i + 1] > PP[i]:
            PosMF.append(PP[i + 1] * df.loc[i + 1, 'Volume'])
        else:
            PosMF.append(0)
        i = i + 1
    PosMF = pd.Series(PosMF)
    TotMF = PP * df['Volume']
    MFR = pd.Series(PosMF / TotMF)
    MFI = pd.Series(MFR.rolling(n, min_periods=n).mean(), name='MFI_' + str(n))
    df = df.join(MFI)
    return df 
Example 7
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def on_balance_volume(df, n):
    """Calculate On-Balance Volume for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    OBV = [0]
    while i < df.index[-1]:
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] > 0:
            OBV.append(df.loc[i + 1, 'Volume'])
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] == 0:
            OBV.append(0)
        if df.loc[i + 1, 'Close'] - df.loc[i, 'Close'] < 0:
            OBV.append(-df.loc[i + 1, 'Volume'])
        i = i + 1
    OBV = pd.Series(OBV)
    OBV_ma = pd.Series(OBV.rolling(n, min_periods=n).mean(), name='OBV_' + str(n))
    df = df.join(OBV_ma)
    return df 
Example 8
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def coppock_curve(df, n):
    """Calculate Coppock Curve for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    M = df['Close'].diff(int(n * 11 / 10) - 1)
    N = df['Close'].shift(int(n * 11 / 10) - 1)
    ROC1 = M / N
    M = df['Close'].diff(int(n * 14 / 10) - 1)
    N = df['Close'].shift(int(n * 14 / 10) - 1)
    ROC2 = M / N
    Copp = pd.Series((ROC1 + ROC2).ewm(span=n, min_periods=n).mean(), name='Copp_' + str(n))
    df = df.join(Copp)
    return df 
Example 9
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def keltner_channel(df, n):
    """Calculate Keltner Channel for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    KelChM = pd.Series(((df['High'] + df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChM_' + str(n))
    KelChU = pd.Series(((4 * df['High'] - 2 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChU_' + str(n))
    KelChD = pd.Series(((-2 * df['High'] + 4 * df['Low'] + df['Close']) / 3).rolling(n, min_periods=n).mean(),
                       name='KelChD_' + str(n))
    df = df.join(KelChM)
    df = df.join(KelChU)
    df = df.join(KelChD)
    return df 
Example 10
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def ultimate_oscillator(df):
    """Calculate Ultimate Oscillator for given data.
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    i = 0
    TR_l = [0]
    BP_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        BP = df.loc[i + 1, 'Close'] - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        BP_l.append(BP)
        i = i + 1
    UltO = pd.Series((4 * pd.Series(BP_l).rolling(7).sum() / pd.Series(TR_l).rolling(7).sum()) + (
                2 * pd.Series(BP_l).rolling(14).sum() / pd.Series(TR_l).rolling(14).sum()) + (
                                 pd.Series(BP_l).rolling(28).sum() / pd.Series(TR_l).rolling(28).sum()),
                     name='Ultimate_Osc')
    df = df.join(UltO)
    return df 
Example 11
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 6 votes vote down vote up
def donchian_channel(df, n):
    """Calculate donchian channel of given pandas data frame.
    :param df: pandas.DataFrame
    :param n:
    :return: pandas.DataFrame
    """
    i = 0
    dc_l = []
    while i < n - 1:
        dc_l.append(0)
        i += 1

    i = 0
    while i + n - 1 < df.index[-1]:
        dc = max(df['High'].ix[i:i + n - 1]) - min(df['Low'].ix[i:i + n - 1])
        dc_l.append(dc)
        i += 1

    donchian_chan = pd.Series(dc_l, name='Donchian_' + str(n))
    donchian_chan = donchian_chan.shift(n - 1)
    return df.join(donchian_chan) 
Example 12
Project: DataComp   Author: Cojabi   File: utils.py    Apache License 2.0 6 votes vote down vote up
def get_cat_frequencies(series):
    """
    Counts the occurrences for each factor of a categorical variable and calculates the relative frequencies.

    :param series: Iterable storing the realisations of a categorical random variable / feature.
    :return freqs: Pandas Series storing the relative frequencies using the corresponding factor as index
    :return counts.sum(): Total number of realisations of the categorical variable
    :return counts: Pandas Series storing the counts using the corresponding factor as index
    """

    # count occurrences and store in Series
    counts = pd.Series(Counter(series))
    # calculate frequencies
    freqs = counts / counts.sum()

    return freqs, counts.sum(), counts 
Example 13
Project: AlitaNet   Author: iFe1er   File: utils.py    MIT License 6 votes vote down vote up
def multihot_padder(col,sep='|',padding_len=None):
    assert isinstance(col,pd.Series)

    if not padding_len:
        t = col.apply(lambda x: np.array([int(i) for i in x.split('|')]))
        lens = np.array([len(i) for i in t])
        padding_len=max(lens)
    else:
        t = col.apply(lambda x: np.array([int(i) for i in x.split('|')][:padding_len]))
        lens = np.array([len(i) for i in t])

    print("Padding Len: %s"%padding_len)
    mask=np.arange(padding_len)<lens.reshape([-1,1])
    result=np.zeros([col.shape[0],padding_len])
    result[mask]=np.concatenate(t.values)#变成一位向量 填入 print(result[14,:])
    return result,padding_len 
Example 14
Project: featkit   Author: ryadzenine   File: test_categorical.py    MIT License 6 votes vote down vote up
def test_other_label(self):
        tests = [
            {"serie": ["1", "2", "2", "0", "0", "0"],
             "replace": "3",
             "nb_cls": 3,
             "result": ["1", "2", "2", "0", "0", "0"]},
            {"serie": ["1", "2", "2", "0", "0", "0"],
             "replace": "3",
             "nb_cls": 2,
             "result": ["3", "2", "2", "0", "0", "0"]},
            {"serie": [1, 2, 2, 0, 0, 0],
             "replace": 3,
             "nb_cls": 2,
             "result": [3, 2, 2, 0, 0, 0]}]
        for case in tests:
            self.assertTrue(
                ThresholdLabelBinarizer.other_label(Series(case["serie"]), case["nb_cls"], case["replace"]),
                Series(case["result"])) 
Example 15
Project: didi_competition   Author: Heipiao   File: operate_load_poi_data.py    MIT License 6 votes vote down vote up
def remove_error_poi_each_line(line_data):
    ## from 1 to len(..), because the first one is district hash
    ### why I need a temp_line_data here!!!!
    ### Please see the property of the remove() function

    standard_style = re.compile(r"\d+#\d+:\d+")

    line_data = list(line_data[0])
    temp_line_data = line_data.copy()
    for poi_in_line in temp_line_data:
        if len(poi_in_line) == 32: # this is the district hash
            continue
        if not re.match(standard_style, poi_in_line):
            #print(poi_in_line)
            line_data.remove(poi_in_line)
    return pd.Series([line_data])

# the input line_data is a serise!! 
Example 16
Project: CIMtools   Author: stsouko   File: fragmentor.py    GNU General Public License v3.0 6 votes vote down vote up
def __parse_svm(svm_file, head_dict):
        head_size = len(head_dict)
        vector, ad = [], []
        with svm_file.open() as sf:
            for frag in sf:
                _, *x = frag.split()
                ad.append(True)
                tmp = {}  # X vector
                for i in x:
                    k, v = i.split(':')
                    k, v = int(k), int(v)
                    if k <= head_size:
                        tmp[head_dict[k]] = v
                    elif v != 0:
                        ad[-1] = False
                        break
                vector.append(tmp)

        return DataFrame(vector, columns=list(head_dict.values())).fillna(0), Series(ad) 
Example 17
Project: loman   Author: janushendersonassetallocation   File: computeengine.py    BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def to_df(self):
        """
        Get a dataframe containing the states and value of all nodes of computation

        ::

            >>> comp = loman.Computation()
            >>> comp.add_node('foo', value=1)
            >>> comp.add_node('bar', value=2)
            >>> comp.to_df()
                           state  value  is_expansion
            bar  States.UPTODATE      2           NaN
            foo  States.UPTODATE      1           NaN
        """
        df = pd.DataFrame(index=nx.topological_sort(self.dag))
        df[NodeAttributes.STATE] = pd.Series(nx.get_node_attributes(self.dag, NodeAttributes.STATE))
        df[NodeAttributes.VALUE] = pd.Series(nx.get_node_attributes(self.dag, NodeAttributes.VALUE))
        df_timing = pd.DataFrame.from_dict(nx.get_node_attributes(self.dag, 'timing'), orient='index')
        df = pd.merge(df, df_timing, left_index=True, right_index=True, how='left')
        return df 
Example 18
Project: PEAKachu   Author: tbischler   File: window.py    ISC License 5 votes vote down vote up
def _convert_to_data_frame(self):
        self._window_df = pd.DataFrame()
        for replicon in sorted(self._replicon_dict):
            for strand in ["+", "-"]:
                # add window positions to data frame
                row_number = len(self._replicon_dict[replicon]["window_list"])
                df = pd.concat([
                    pd.Series([replicon] * row_number),
                    pd.Series([strand] * row_number),
                    pd.Series([window[0]+1 for window in
                               self._replicon_dict[
                                   replicon]["window_list"]]),
                    pd.Series([window[1] for window in
                               self._replicon_dict[
                        replicon]["window_list"]])], axis=1)
                df.columns = ["replicon", "strand", "w_start", "w_end"]
                # add library counts to data frame
                for lib_name, lib in self._lib_dict.items():
                    df[lib_name] = (pd.Series(lib.replicon_dict[
                        replicon]["window_counts"].loc[:, strand]))
                self._window_df = self._window_df.append(df,
                                                         ignore_index=True)
            del self._replicon_dict[replicon]["window_list"]
        # remove windows without expression in any library
        print("Removing empty windows from DataFrame with {} rows...".format(
            len(self._window_df.index)), flush=True)
        t_start = time()
        self._window_df = self._window_df.loc[
            (self._window_df.loc[:, self._lib_names_list].sum(axis=1) > 0), :]
        t_end = time()
        print("Removal took {} seconds. DataFrame contains now {} rows.".
              format((t_end-t_start), len(self._window_df.index)), flush=True)
        if self._window_df.empty:
            print("**Dataframe empty**", flush=True)
            return
        if self._stat_test == "gtest":
            self._run_gtest_preprocessing()
        elif self._stat_test == "deseq":
            self._run_deseq_preprocessing() 
Example 19
Project: PEAKachu   Author: tbischler   File: window.py    ISC License 5 votes vote down vote up
def _single_g_test(self, counts):
        ctr_counts = counts[self._ctr_lib_list]
        ctr_counts = ctr_counts.reset_index(drop=True)
        exp_counts = counts[self._exp_lib_list]
        exp_counts = exp_counts.reset_index(drop=True)
        g_test = GTest(ctr_counts, exp_counts, self._pairwise_replicates)
        if len(exp_counts) > 1:
            return pd.Series(g_test.run_with_repl())
        else:
            return pd.Series(g_test.run_without_repl()) 
Example 20
Project: PEAKachu   Author: tbischler   File: window.py    ISC License 5 votes vote down vote up
def _correct_p_values(self, p_values):
        return pd.Series(multipletests(p_values, method="fdr_bh")[1]) 
Example 21
Project: PEAKachu   Author: tbischler   File: window.py    ISC License 5 votes vote down vote up
def _check_significance_with_repl(self, p_and_padj_values):
        replicate_G_p_values = pd.Series(p_and_padj_values[
            "replicate_G_p_values"].split('/')).astype('float')
        if (p_and_padj_values.loc["heterogenous_G_p_value"] >=
            self._het_p_val_threshold and
            p_and_padj_values.loc["pooled_G_padj_value"] <
                self._padj_threshold):
            return True
        if (p_and_padj_values.loc["total_G_padj_value"] <
            self._padj_threshold) and ((replicate_G_p_values <
                                        self._rep_pair_p_val_threshold).all()):
            return True
        return False 
Example 22
Project: PEAKachu   Author: tbischler   File: adaptive.py    ISC License 5 votes vote down vote up
def _call_cluster_peaks(self, cluster, min_cluster_expr_frac,
                            min_block_overlap, min_max_block_expr_frac):
        cluster_entries = cluster["header"].strip().split('\t')
        cluster_expr = float(cluster_entries[5])
        cluster_strand = cluster_entries[4]
        cluster_replicon = cluster_entries[1]
        peak_df = pd.DataFrame()

        if len(cluster["blocks"]) == 1:
            block_entries = cluster["blocks"][0].strip().split('\t')
            peak_start = int(block_entries[2]) + 1
            peak_end = int(block_entries[3])
            peak_df = peak_df.append(pd.Series([peak_start, peak_end], index=[
                "peak_start", "peak_end"]), ignore_index=True)
        else:
            blocks = [block.strip().split('\t') for block in cluster["blocks"]]
            block_df = pd.DataFrame(
                blocks, columns=["blockNb", "blockChrom", "blockStart",
                                 "blockEnd", "blockStrand", "blockExpression",
                                 "readCount"])
            block_df[["blockNb", "blockStart", "blockEnd", "blockExpression",
                      "readCount"]] = block_df[
                    ["blockNb", "blockStart", "blockEnd", "blockExpression",
                     "readCount"]].apply(pd.to_numeric)
            peak_df = self._split_cluster_peaks(block_df, cluster_expr,
                                                peak_df, min_cluster_expr_frac,
                                                min_block_overlap,
                                                min_max_block_expr_frac)
        if peak_df.empty:
            return
        peak_df = peak_df.astype(np.int64)
        peak_df["peak_strand"] = cluster_strand
        self._replicon_dict[cluster_replicon]["peak_df"] = self._replicon_dict[
            cluster_replicon]["peak_df"].append(peak_df, ignore_index=True) 
Example 23
Project: PEAKachu   Author: tbischler   File: adaptive.py    ISC License 5 votes vote down vote up
def _split_cluster_peaks(self, block_df, cluster_expr, peak_df,
                             min_cluster_expr_frac, min_block_overlap,
                             min_max_block_expr_frac):
        if block_df.empty:
            return peak_df
        max_block_ix = block_df["blockExpression"].idxmax()
        max_block_expr = block_df.loc[max_block_ix, "blockExpression"]
        if max_block_expr/cluster_expr < min_cluster_expr_frac:
            return peak_df
        min_overlap = round(
            (block_df.loc[max_block_ix, "blockEnd"] -
                block_df.loc[max_block_ix, "blockStart"]) * min_block_overlap)
        overlaps_with_max_block = (block_df.loc[:, "blockEnd"].apply(
            min, args=(block_df.loc[
                max_block_ix, "blockEnd"],)) - block_df.loc[
                    :, "blockStart"].apply(
                        max, args=(block_df.loc[
                            max_block_ix, "blockStart"],))).apply(
                                max, args=(0,))
        peak_blocks = block_df.loc[overlaps_with_max_block >= min_overlap, :]
        peak_blocks = peak_blocks.loc[
            (peak_blocks["blockExpression"] /
                max_block_expr) >= min_max_block_expr_frac, :]
        peak_start = peak_blocks["blockStart"].min()
        peak_end = peak_blocks["blockEnd"].max()
        overlaps_with_peak = (block_df.loc[:, "blockEnd"].apply(min, args=(
            peak_end,)) - block_df.loc[:, "blockStart"].apply(max, args=(
                peak_start,))).apply(max, args=(0,))
        next_block_df = block_df.loc[overlaps_with_peak == 0, :].reset_index(
            drop=True)
        peak_df = peak_df.append(pd.Series([peak_start + 1, peak_end], index=[
            "peak_start", "peak_end"]), ignore_index=True)
        return self._split_cluster_peaks(next_block_df, cluster_expr, peak_df,
                                         min_cluster_expr_frac,
                                         min_block_overlap,
                                         min_max_block_expr_frac) 
Example 24
Project: PEAKachu   Author: tbischler   File: library.py    ISC License 5 votes vote down vote up
def merge_reads(self):
        bam_to_bed = BamToBed(self.paired_end, self.max_insert_size)
        for replicon, reads in bam_to_bed.generate_bed_format(self.bam_file):
            self.replicon_dict[replicon]["reads"] = pd.Series(reads)
        return self.replicon_dict  # it seems that a copy is returned! 
Example 25
Project: PEAKachu   Author: tbischler   File: deseq2.py    ISC License 5 votes vote down vote up
def run_deseq2(self, exp_lib_list, ctr_lib_list, size_factors,
                   pairwise_replicates):
        self._count_df = np.round(self._count_df, decimals=0)
        self._count_df = self._count_df.astype(int)
        conds = ["exp"] * len(exp_lib_list) + ["ctr"] * len(ctr_lib_list)
        if pairwise_replicates:
            samples = [str(sample) for sample in (
                       list(range(1, len(exp_lib_list) + 1)) +
                       list(range(1, len(ctr_lib_list) + 1)))]
            colData = robjects.DataFrame({
                    "conditions": robjects.StrVector(conds),
                    "samples": robjects.StrVector(samples)})
            design = Formula('~ samples + conditions')
        else:
            colData = robjects.DataFrame(
                    {"conditions": robjects.StrVector(conds)})
            design = Formula('~ conditions')
        r_count_df = robjects.DataFrame(self._count_df)
        r_count_df.colnames = robjects.rinterface.NULL
        dds = r.DESeqDataSetFromMatrix(countData=r_count_df,
                                       colData=colData, design=design)
        if size_factors is None:
            dds = r.estimateSizeFactors(dds)
        else:
            assign_sf = r["sizeFactors<-"]
            dds = assign_sf(object=dds, value=robjects.FloatVector(
                size_factors))
        dds = r.estimateDispersions(dds, quiet=True)
        dds = r.nbinomWaldTest(dds, quiet=True)
        size_factors = pd.Series(r.sizeFactors(dds),
                                 index=self._count_df.columns)
        results = r.results(dds, contrast=robjects.StrVector(
            ("conditions", "exp", "ctr")), altHypothesis="greater")
        with localconverter(robjects.default_converter + pandas2ri.converter):
            results_df = robjects.conversion.rpy2py(
                r['as.data.frame'](results))
        results_df.index = self._count_df.index
        return(results_df, size_factors) 
Example 26
Project: PEAKachu   Author: tbischler   File: deseq2.py    ISC License 5 votes vote down vote up
def calc_size_factors(self):
        self._count_df = np.round(self._count_df, decimals=0)
        self._count_df = self._count_df.astype(int)
        r_count_df = robjects.DataFrame(self._count_df)
        r_count_df.colnames = robjects.rinterface.NULL
        r_size_factors = r.estimateSizeFactorsForMatrix(r_count_df)
        return pd.Series(r_size_factors, index=self._count_df.columns) 
Example 27
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def moving_average(df, n):
    """Calculate the moving average for the given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    MA = pd.Series(df['Close'].rolling(n, min_periods=n).mean(), name='MA_' + str(n))
    df = df.join(MA)
    return df 
Example 28
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def exponential_moving_average(df, n):
    """
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    EMA = pd.Series(df['Close'].ewm(span=n, min_periods=n).mean(), name='EMA_' + str(n))
    df = df.join(EMA)
    return df 
Example 29
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def momentum(df, n):
    """
    
    :param df: pandas.DataFrame 
    :param n: 
    :return: pandas.DataFrame
    """
    M = pd.Series(df['Close'].diff(n), name='Momentum_' + str(n))
    df = df.join(M)
    return df 
Example 30
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def rate_of_change(df, n):
    """
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    M = df['Close'].diff(n - 1)
    N = df['Close'].shift(n - 1)
    ROC = pd.Series(M / N, name='ROC_' + str(n))
    df = df.join(ROC)
    return df 
Example 31
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def stochastic_oscillator_k(df):
    """Calculate stochastic oscillator %K for given data.
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    SOk = pd.Series((df['Close'] - df['Low']) / (df['High'] - df['Low']), name='SO%k')
    df = df.join(SOk)
    return df 
Example 32
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def stochastic_oscillator_d(df, n):
    """Calculate stochastic oscillator %D for given data.
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    SOk = pd.Series((df['Close'] - df['Low']) / (df['High'] - df['Low']), name='SO%k')
    SOd = pd.Series(SOk.ewm(span=n, min_periods=n).mean(), name='SO%d_' + str(n))
    df = df.join(SOd)
    return df 
Example 33
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def average_directional_movement_index(df, n, n_ADX):
    """Calculate the Average Directional Movement Index for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :param n_ADX: 
    :return: pandas.DataFrame
    """
    i = 0
    UpI = []
    DoI = []
    while i + 1 <= df.index[-1]:
        UpMove = df.loc[i + 1, 'High'] - df.loc[i, 'High']
        DoMove = df.loc[i, 'Low'] - df.loc[i + 1, 'Low']
        if UpMove > DoMove and UpMove > 0:
            UpD = UpMove
        else:
            UpD = 0
        UpI.append(UpD)
        if DoMove > UpMove and DoMove > 0:
            DoD = DoMove
        else:
            DoD = 0
        DoI.append(DoD)
        i = i + 1
    i = 0
    TR_l = [0]
    while i < df.index[-1]:
        TR = max(df.loc[i + 1, 'High'], df.loc[i, 'Close']) - min(df.loc[i + 1, 'Low'], df.loc[i, 'Close'])
        TR_l.append(TR)
        i = i + 1
    TR_s = pd.Series(TR_l)
    ATR = pd.Series(TR_s.ewm(span=n, min_periods=n).mean())
    UpI = pd.Series(UpI)
    DoI = pd.Series(DoI)
    PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean() / ATR)
    NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean() / ATR)
    ADX = pd.Series((abs(PosDI - NegDI) / (PosDI + NegDI)).ewm(span=n_ADX, min_periods=n_ADX).mean(),
                    name='ADX_' + str(n) + '_' + str(n_ADX))
    df = df.join(ADX)
    return df 
Example 34
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def mass_index(df):
    """Calculate the Mass Index for given data.
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    Range = df['High'] - df['Low']
    EX1 = Range.ewm(span=9, min_periods=9).mean()
    EX2 = EX1.ewm(span=9, min_periods=9).mean()
    Mass = EX1 / EX2
    MassI = pd.Series(Mass.rolling(25).sum(), name='Mass Index')
    df = df.join(MassI)
    return df 
Example 35
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def kst_oscillator(df, r1, r2, r3, r4, n1, n2, n3, n4):
    """Calculate KST Oscillator for given data.
    
    :param df: pandas.DataFrame
    :param r1: 
    :param r2: 
    :param r3: 
    :param r4: 
    :param n1: 
    :param n2: 
    :param n3: 
    :param n4: 
    :return: pandas.DataFrame
    """
    M = df['Close'].diff(r1 - 1)
    N = df['Close'].shift(r1 - 1)
    ROC1 = M / N
    M = df['Close'].diff(r2 - 1)
    N = df['Close'].shift(r2 - 1)
    ROC2 = M / N
    M = df['Close'].diff(r3 - 1)
    N = df['Close'].shift(r3 - 1)
    ROC3 = M / N
    M = df['Close'].diff(r4 - 1)
    N = df['Close'].shift(r4 - 1)
    ROC4 = M / N
    KST = pd.Series(
        ROC1.rolling(n1).sum() + ROC2.rolling(n2).sum() * 2 + ROC3.rolling(n3).sum() * 3 + ROC4.rolling(n4).sum() * 4,
        name='KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(
            n2) + '_' + str(n3) + '_' + str(n4))
    df = df.join(KST)
    return df 
Example 36
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def relative_strength_index(df, n):
    """Calculate Relative Strength Index(RSI) for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    i = 0
    UpI = [0]
    DoI = [0]
    while i + 1 <= df.index[-1]:
        UpMove = df.loc[i + 1, 'High'] - df.loc[i, 'High']
        DoMove = df.loc[i, 'Low'] - df.loc[i + 1, 'Low']
        if UpMove > DoMove and UpMove > 0:
            UpD = UpMove
        else:
            UpD = 0
        UpI.append(UpD)
        if DoMove > UpMove and DoMove > 0:
            DoD = DoMove
        else:
            DoD = 0
        DoI.append(DoD)
        i = i + 1
    UpI = pd.Series(UpI)
    DoI = pd.Series(DoI)
    PosDI = pd.Series(UpI.ewm(span=n, min_periods=n).mean())
    NegDI = pd.Series(DoI.ewm(span=n, min_periods=n).mean())
    RSI = pd.Series(PosDI / (PosDI + NegDI), name='RSI_' + str(n))
    df = df.join(RSI)
    return df 
Example 37
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def chaikin_oscillator(df):
    """Calculate Chaikin Oscillator for given data.
    
    :param df: pandas.DataFrame
    :return: pandas.DataFrame
    """
    ad = (2 * df['Close'] - df['High'] - df['Low']) / (df['High'] - df['Low']) * df['Volume']
    Chaikin = pd.Series(ad.ewm(span=3, min_periods=3).mean() - ad.ewm(span=10, min_periods=10).mean(), name='Chaikin')
    df = df.join(Chaikin)
    return df 
Example 38
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def force_index(df, n):
    """Calculate Force Index for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    F = pd.Series(df['Close'].diff(n) * df['Volume'].diff(n), name='Force_' + str(n))
    df = df.join(F)
    return df 
Example 39
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def ease_of_movement(df, n):
    """Calculate Ease of Movement for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    EoM = (df['High'].diff(1) + df['Low'].diff(1)) * (df['High'] - df['Low']) / (2 * df['Volume'])
    Eom_ma = pd.Series(EoM.rolling(n, min_periods=n).mean(), name='EoM_' + str(n))
    df = df.join(Eom_ma)
    return df 
Example 40
Project: pandas-technical-indicators   Author: Crypto-toolbox   File: technical_indicators.py    MIT License 5 votes vote down vote up
def standard_deviation(df, n):
    """Calculate Standard Deviation for given data.
    
    :param df: pandas.DataFrame
    :param n: 
    :return: pandas.DataFrame
    """
    df = df.join(pd.Series(df['Close'].rolling(n, min_periods=n).std(), name='STD_' + str(n)))
    return df 
Example 41
Project: ieml   Author: IEMLdev   File: ieml_database.py    GNU General Public License v3.0 5 votes vote down vote up
def get_values(self, ieml, language, descriptor):
        ieml, language, descriptor = _normalize_key(ieml, language, descriptor,
                                                    parse_ieml=False, partial=False)
        try:
            res = self.df.loc(axis=0)[(str(ieml), language, descriptor)]
            if isinstance(res, pandas.Series):
                return res.to_list()
            else:
                return res.to_dict('list')['value']
        except KeyError:
            return []

    # @monitor_decorator('get_values_partial') 
Example 42
Project: DataComp   Author: Cojabi   File: utils.py    Apache License 2.0 5 votes vote down vote up
def _categorical_table(data):
    """
    Returns the number of occurrences for the categories. Is used to build the observation table
    for a chi square test.

    :param data:
    :return:
    """
    # count occurences
    c = Counter(data)
    # delete NaNs
    c = {key: c[key] for key in c if not pd.isnull(key)}

    return pd.Series(c) 
Example 43
Project: jiji-with-tensorflow-example   Author: unageanu   File: trade_results_loader.py    MIT License 5 votes vote down vote up
def __up_down(self, profit_or_loss):
        return profit_or_loss.apply(
            lambda p: pd.Series([
                1 if p >  0  else 0,
                1 if p <= 0  else 0
            ], index=['up', 'down'])) 
Example 44
Project: GreenGuard   Author: D3-AI   File: data.py    MIT License 5 votes vote down vote up
def make_targets(target_times, window_size, target, new_targets=None):
    target_times = target_times.sort_values('cutoff_time', ascending=True)
    cutoff_times = target_times.cutoff_time
    window_size = pd.to_timedelta(window_size)
    original_size = len(target_times)
    current_size = original_size
    new_targets = new_targets or current_size

    for index in range(len(cutoff_times) - 1):
        timestamp = cutoff_times.iloc[index]
        next_time = cutoff_times.iloc[index + 1]

        if timestamp + (window_size * 2) >= next_time:
            continue

        span_start = timestamp + window_size
        span_end = next_time - window_size
        span_length = (span_end - span_start).total_seconds()

        delay = pd.to_timedelta(np.random.randint(span_length), unit='s')
        cutoff_time = span_start + delay

        target_times = target_times.append(pd.Series({
            'turbine_id': target_times.iloc[index].turbine_id,
            'cutoff_time': cutoff_time,
            'target': target
        }), ignore_index=True)

        current_size = len(target_times)
        if current_size == original_size + new_targets:
            return target_times.sort_values('cutoff_time', ascending=True)

    if current_size == original_size:
        warnings.warn('There is no space left between to add more targets.')
        return target_times

    new_targets = new_targets - (current_size - original_size)
    return make_targets(target_times, window_size, target, new_targets) 
Example 45
Project: models   Author: kipoi   File: gather.py    MIT License 5 votes vote down vote up
def get_vep_scores(vcf_name, vep_vcf_key="CSQ", sel_vep_keys=["phyloP46way_placental", "phyloP46way_primate", "CADD_PHRED", "CADD_RAW"]):
    vcf_fh = cyvcf2.VCF(vcf_name)
    # get the correct elements
    for hdr in vcf_fh.header_iter():
        hdr_info = hdr.info()
        if 'ID' in hdr_info:
            if hdr_info['ID'] == vep_vcf_key:
                vep_keys = hdr_info['Description'].split(": ")[-1].rstrip('"').split("|")
                break
    sel_vep_elms = [vep_keys.index(k) for k in sel_vep_keys]
    info_tags = []
    entries = []
    # Iterate over all entries and extract the `info_tag` if set, otherwise return all INFO tags
    for rec in vcf_fh:
        info_dict = dict(rec.INFO)
        if vep_vcf_key in info_dict:
            vep_entries = info_dict[vep_vcf_key].split(",")[0].split("|")
            variant_uid = ":".join([rec.CHROM, str(rec.POS), rec.REF, rec.ALT[0]])
            vals = [vep_entries[i] for i in sel_vep_elms]
            entries.append(pd.Series([vep_entries[i] for i in sel_vep_elms], name = variant_uid, index = sel_vep_keys))
    # Turn into a data frame
    df = pd.DataFrame(entries,)
    df = df.replace("", "nan").astype(float)
    # dedup
    df = df.loc[~pd.Series(df.index.values).duplicated().values,:]
    return df 
Example 46
Project: models   Author: kipoi   File: gather.py    MIT License 5 votes vote down vote up
def get_vep_scores(vcf_name,
                   vep_vcf_key="CSQ",
                   sel_vep_keys=["phyloP46way_placental",
                                 "phyloP46way_primate",
                                 "CADD_phred",
                                 "CADD_raw"]):
    vcf_fh = cyvcf2.VCF(vcf_name)
    # get the correct elements
    for hdr in vcf_fh.header_iter():
        hdr_info = hdr.info()
        if 'ID' in hdr_info:
            if hdr_info['ID'] == vep_vcf_key:
                vep_keys = hdr_info['Description'].split(": ")[-1].rstrip('"').split("|")
                break
    sel_vep_elms = [vep_keys.index(k) for k in sel_vep_keys]
    info_tags = []
    entries = []
    # Iterate over all entries and extract the `info_tag` if set, otherwise return all INFO tags
    for rec in vcf_fh:
        info_dict = dict(rec.INFO)
        if vep_vcf_key in info_dict:
            vep_entries = info_dict[vep_vcf_key].split(",")[0].split("|")
            variant_uid = ":".join([rec.CHROM, str(rec.POS), rec.REF, rec.ALT[0]])
            vals = [vep_entries[i] for i in sel_vep_elms]
            entries.append(pd.Series([vep_entries[i] for i in sel_vep_elms], name = variant_uid, index = sel_vep_keys))
    # Turn into a data frame
    df = pd.DataFrame(entries,)
    df = df.replace("", "nan").astype(float)
    # dedup
    df = df.loc[~pd.Series(df.index.values).duplicated().values,:]
    return df 
Example 47
Project: AlitaNet   Author: iFe1er   File: utils.py    MIT License 5 votes vote down vote up
def fit(self,col):
        if isinstance(col,pd.Series):
            unique_values=col.unique()
        elif isinstance(col,list):
            unique_values=pd.Series(col).unique()
        else:
            raise Exception('Only Series and list supported')
        #编码从1起步
        self.encoding_dict={value:encoded for value,encoded in zip(unique_values,range(1,len(unique_values)+1))}
        self.status = 'fitted' 
Example 48
Project: pymapd-examples   Author: omnisci   File: OKR_oss_git.py    Apache License 2.0 5 votes vote down vote up
def get_views(r):
    # retrieve views information
    r_views = r.get_views_traffic()
    df = pd.DataFrame.from_dict(r_views)
    # iterate through individual view objects nested in the contents
    i = 0
    ts = pd.Series('ts', index=[i])
    cnt = pd.Series('cnt', index=[i])
    uni = pd.Series('uni', index=[i])
    repo = pd.Series('repo', index=[i])
    for view in df['views']: # this column contains a list of view objects
        i += 1
        repo[i] = r.name
        ts[i] = getattr(view, 'timestamp')
        ts[i] = ts[i]/1000000000
        ts[i] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(ts[i]))
        cnt[i] = getattr(view, 'count')
        uni[i] = getattr(view, 'uniques')

    # setup dataframe by concatenating the series together as columns
    list_of_series = [repo, ts, cnt, uni]
    # drop the column names before concatenating
    repo.drop([0], inplace = True)
    ts.drop([0], inplace = True)
    cnt.drop([0], inplace = True)
    uni.drop([0], inplace = True)
    df_views = pd.concat(list_of_series, axis=1, ignore_index=True)
    # rename the columns to useful labels
    columns = ['repo', 'view_timestamp', 'view_count', 'view_unique']
    print (df_views)
    df_views.columns = columns

    if df_views.empty:
        print ("no views")
    else:
        print (str(df_views['view_count'].sum()) + ' views for ' + r.name)
        return df_views 
Example 49
Project: deep-learning-note   Author: wdxtub   File: 13_house_price.py    MIT License 5 votes vote down vote up
def train_and_pred(train_features, test_features, train_labels, test_data,
                   num_epochs, lr, weight_decay, batch_size):
    net = get_net(train_features.shape[1])
    train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size)
    utils.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'rmse')
    print('train rmse %f' % train_ls[-1])
    preds = net(test_features).detach().numpy()
    test_data['SalePrice'] = pd.Series(preds.reshape(1, -1)[0])
    submission = pd.concat([test_data['Id'], test_data['SalePrice']], axis=1)
    submission.to_csv('./data/HousePrice/submission.csv', index=False) 
Example 50
Project: synthetic-data-tutorial   Author: theodi   File: SocialSecurityNumberAttribute.py    MIT License 5 votes vote down vote up
def pre_process(column: Series):
    if column.size == 0:
        return column
    elif type(column.iloc[0]) is int:
        return column
    elif type(column.iloc[0]) is str:
        return column.map(lambda x: int(x.replace('-', '')))
    else:
        raise Exception('Invalid SocialSecurityNumber.')