Python pandas.Series() Examples

The following are code examples for showing how to use pandas.Series(). They are extracted from open source Python projects. You can vote up the examples you like or vote down the ones you don't like. You can also save this page to your account.

Example 1
Project: saapy   Author: ashapochka   File: actor.py    (Apache License 2.0) View Source Project 7 votes vote down vote up
def connect_actors(actor_frame, connectivity_sets, connectivity_column):
    """
    :param actor_frame:
    :param connectivity_sets:
    :param connectivity_column:
    :return:

    Examples:

    same_actors = {
        'ccason': [3, 14, 15], 'clipka': [4, 5, 13],
        'wfpokorny': [11, 17], 'anshuarya': [0],
        'bentsm': [1], 'cbarton': [2], 'dbodor': [6],
        'jlecher': [7], 'jgrimbert': [8], 'nalvarez': [9],
        'selvik': [10], 'wverhelst': [12], 'gryken': [16],
        'github': [18]}
    actor_frame = connect_actors(actor_frame, same_actors, 'actor_id')
    """
    connectivity = {}
    for actor_id, connectivity_set in connectivity_sets.items():
        for actor in connectivity_set:
            connectivity[actor] = actor_id
    actor_frame[connectivity_column] = su.categorize(pd.Series(connectivity))
    return actor_frame 
Example 2
Project: QUANTAXIS   Author: yutiansut   File: QAIndicator_Series.py    (license) View Source Project 7 votes vote down vote up
def SMA(Series, N, M=1):

    ret = []
    i = 1
    length = len(Series)
    # ??X????? nan ?
    while i < length:
        if np.isnan(Series[i]):
            i += 1
        else:
            break
    preY = Series[i]  # Y'
    ret.append(preY)
    while i < length:
        Y = (M * Series[i] + (N - M) * preY) / float(N)
        ret.append(Y)
        preY = Y
        i += 1
    return pd.Series(ret) 
Example 3
Project: py-hadoop-tutorial   Author: hougs   File: outliers.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def to_series(tuples):
    """Transforms a list of tuples of the form (date, count) in to a pandas
    series indexed by dt.
    """
    cleaned_time_val_tuples = [tuple for tuple in tuples if not (
        tuple[0] is pd.NaT or tuple[1] is None)]
    if len(cleaned_time_val_tuples) > 0:
        # change list of tuples ie [(a1, b1), (a2, b2), ...] into
        # tuple of lists ie ([a1, a2, ...], [b1, b2, ...])
        unzipped_cleaned_time_values = zip(*cleaned_time_val_tuples)
        # just being explicit about what these are
        counts = unzipped_cleaned_time_values[1]
        timestamps = unzipped_cleaned_time_values[0]
        # Create the series with a sorted index.
        ret_val = pd.Series(counts, index=timestamps).sort_index()
    else:
        ret_val = None
    return ret_val


# In[ ]: 
Example 4
Project: xpandas   Author: alan-turing-institute   File: data_container.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, *args, **kwargs):
        '''
        The same arguments as for pandas.Series
        https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html

        In order to create XSeries of any data_type, data argument must be a pythons list.
        For example, to create XSeries of pandas.Series, pass data should be
        data = [s_1, s2, ..., s3] where s_i is a instance of pandas.Series.
        '''
        super(XSeries, self).__init__(*args, **kwargs)

        data = kwargs.get('data')
        if data is None:
            data = args[0]

        check_result, data_type = _check_all_elements_have_the_same_property(data, type)
        if not check_result:
            raise ValueError('Not all elements the same type')

        if data_type is not None:
            self._data_type = data_type
        else:
            self._data_type = type(data._values[0]) 
Example 5
Project: xpandas   Author: alan-turing-institute   File: bag_of_features_transformer.py    (license) View Source Project 6 votes vote down vote up
def __init__(self, dictionary=None, **kwargs):
        '''
        :param dictionary: custom dictionary to count against. if None, calculate dictionary from dataset
        '''
        self.dictionary = dictionary

        accepted_types = [
            pd.Series, list, np.array, tuple
        ]

        def bag_of_words_transform_function(corpus):
            counter = Counter(corpus)
            for el in self.dictionary:
                if counter.get(el) is None:
                    counter[el] = 0
            return counter

        super(BagOfWordsTransformer, self).__init__(data_types=accepted_types,
                                                    columns=None,
                                                    transform_function=bag_of_words_transform_function) 
Example 6
Project: xpandas   Author: alan-turing-institute   File: test_transformer.py    (license) View Source Project 6 votes vote down vote up
def test_mean_transformer():
    s1 = XSeries([
        pd.Series(np.random.normal(size=10)),
        pd.Series(np.random.normal(size=15))
    ])
    s2 = XSeries([
        pd.Series(np.random.normal(size=10)),
        pd.Series(np.random.normal(size=15)),
        pd.Series(np.random.normal(size=100))
    ])

    tr = MeanSeriesTransformer()
    tr = tr.fit(s1)

    transformed_s = tr.transform(s2)

    assert transformed_s.shape[0] == 3
    assert type(transformed_s) == XSeries 
Example 7
Project: xpandas   Author: alan-turing-institute   File: test_transformer.py    (license) View Source Project 6 votes vote down vote up
def test_mean_transformer_data_frame():
    s1 = XSeries([
        pd.Series(np.random.normal(size=10)),
        pd.Series(np.random.normal(size=15))
    ])
    s2 = XSeries([
        pd.Series(np.random.normal(size=10)),
        pd.Series(np.random.normal(size=15))
    ])

    df = XDataFrame({
        's1': s1,
        's2': s2
    })

    tr = MeanSeriesTransformer()
    try:
        tr = tr.fit(df)
        assert False
    except:
        assert True 
Example 8
Project: xpandas   Author: alan-turing-institute   File: test_data_type.py    (license) View Source Project 6 votes vote down vote up
def test_dataframe_data_types():
    s1 = XSeries([pd.Series([1, 2, 3], index=['a', 'b', 'c']),
                      pd.Series([4, 5, 6], index=['d', 'e', 'g'])])
    s2 = XSeries([1, 2, 3])
    s3 = XSeries([{"k1": "v1"}, {"k2": 'v2'}])
    s4 = XSeries(['f', 's', 't'])

    df = XDataFrame({
        'first_col': s1,
        'second_col': s2,
        'third_col': s3,
        'fourth_col': s4
    })

    assert df['first_col'].data_type == pd.Series
    assert df['second_col'].data_type == np.int64
    assert df['third_col'].data_type == dict
    assert df['fourth_col'].data_type == str

    assert type(df[['first_col']]) == XDataFrame
    assert type(df[['first_col', 'second_col']]) == XDataFrame 
Example 9
Project: xpandas   Author: alan-turing-institute   File: test_data_type.py    (license) View Source Project 6 votes vote down vote up
def test_dataframe_sub_frame_data_types():
    s1 = XSeries([pd.Series([1, 2, 3], index=['a', 'b', 'c']),
                      pd.Series([4, 5, 6], index=['d', 'e', 'g'])])
    s2 = XSeries([1, 2, 3])
    s3 = XSeries([{"k1": "v1"}, {"k2": 'v2'}])
    s4 = XSeries(['f', 's', 't'])

    df = XDataFrame({
        'first_col': s1,
        'second_col': s2,
        'third_col': s3,
        'fourth_col': s4
    })

    sub_df = df.loc[:2]

    assert type(sub_df) == XDataFrame
    assert sub_df['first_col'].data_type == pd.Series
    assert sub_df['second_col'].data_type == np.int64
    assert sub_df['third_col'].data_type == dict
    assert sub_df['fourth_col'].data_type == str

    assert type(sub_df[['first_col']]) == XDataFrame
    assert type(sub_df[['first_col', 'second_col']]) == XDataFrame 
Example 10
Project: xpandas   Author: alan-turing-institute   File: test_data_type.py    (license) View Source Project 6 votes vote down vote up
def test_series_replace_element():
    s = XSeries([
        pd.Series([1, 2, 3], index=['a', 'b', 'c']),
        pd.Series([4, 5, 6], index=['d', 'e', 'g'])
    ], name='MySuperSeries')

    try:
        s[0] = 111
        assert False
    except:
        assert True

    try:
        s[0] = pd.Series(np.random.normal(size=100))
        assert True
    except:
        assert False 
Example 11
Project: xpandas   Author: alan-turing-institute   File: test_dataframe_transformer.py    (license) View Source Project 6 votes vote down vote up
def test_naming():
    X = XSeries([
        pd.Series(np.random.normal(0, 1, 100), name='X')
    ])
    df = XDataFrame({
        'X': X
    })

    dataframe_transformer = XDataFrameTransformer({
        'X': [TimeSeriesTransformer()]
    })

    dataframe_transformer.fit(df)
    transformed_df = dataframe_transformer.transform(df)

    for col_name in transformed_df.columns:
        assert col_name.startswith('X_TimeSeriesTransformer') 
Example 12
Project: xpandas   Author: alan-turing-institute   File: test_dataframe_transformer.py    (license) View Source Project 6 votes vote down vote up
def test_multiple_transformers_for_one_column():
    X = XSeries([
        pd.Series(np.random.normal(0, 1, 100), name='X')
    ])
    df = XDataFrame({
        'X': X
    })

    dataframe_transformer = XDataFrameTransformer({
        'X': [TimeSeriesTransformer(), IdentityTransformer(), MeanSeriesTransformer()]
    })

    dataframe_transformer.fit(df)
    transformed_df = dataframe_transformer.transform(df)

    for col_name in transformed_df.columns:
        assert col_name.startswith('X_TimeSeriesTransformer') or \
               col_name.startswith('X_IdentityTransformer') or \
               col_name.startswith('X_MeanSeriesTransformer') 
Example 13
Project: xpandas   Author: alan-turing-institute   File: test_ts_fresh.py    (license) View Source Project 6 votes vote down vote up
def test_ts_fresh_chain():
    s1 = XSeries([
        pd.Series(np.random.normal(0, 1, 20))
        for _ in range(10)
    ], name='X')

    pipe = PipeLineChain([
        ('mean shift', TimeSeriesWindowTransformer()),
        ('ts fresh step', TsFreshSeriesTransformer())
    ])

    pipe.fit(s1)
    transformed_df = pipe.transform(s1)

    # print(transformed_df.head())

    assert type(transformed_df) == XDataFrame 
Example 14
Project: zipline-chinese   Author: zhanghan1990   File: history_container.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def aggregate_ohlcv_panel(self,
                              fields,
                              ohlcv_panel,
                              items=None,
                              minor_axis=None):
        """
        Convert an OHLCV Panel into a DataFrame by aggregating each field's
        frame into a Series.
        """
        vals = ohlcv_panel
        if isinstance(ohlcv_panel, pd.Panel):
            vals = ohlcv_panel.values
            items = ohlcv_panel.items
            minor_axis = ohlcv_panel.minor_axis

        data = [
            self.frame_to_series(
                field,
                vals[items.get_loc(field)],
                minor_axis
            )
            for field in fields
        ]
        return np.array(data) 
Example 15
Project: zipline-chinese   Author: zhanghan1990   File: test_munge.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_bfill(self):
        # test ndim=1
        N = 100
        s = pd.Series(np.random.randn(N))
        mask = random.sample(range(N), 10)
        s.iloc[mask] = np.nan

        correct = s.bfill().values
        test = bfill(s.values)
        assert_almost_equal(correct, test)

        # test ndim=2
        df = pd.DataFrame(np.random.randn(N, N))
        df.iloc[mask] = np.nan
        correct = df.bfill().values
        test = bfill(df.values)
        assert_almost_equal(correct, test) 
Example 16
Project: zipline-chinese   Author: zhanghan1990   File: test_munge.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_ffill(self):
        # test ndim=1
        N = 100
        s = pd.Series(np.random.randn(N))
        mask = random.sample(range(N), 10)
        s.iloc[mask] = np.nan

        correct = s.ffill().values
        test = ffill(s.values)
        assert_almost_equal(correct, test)

        # test ndim=2
        df = pd.DataFrame(np.random.randn(N, N))
        df.iloc[mask] = np.nan
        correct = df.ffill().values
        test = ffill(df.values)
        assert_almost_equal(correct, test) 
Example 17
Project: zipline-chinese   Author: zhanghan1990   File: test_events.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_conversion_to_df(self, df, infer_timestamps):

        events_by_sid = {0: df}
        loader = EventDataSetLoader(
            dtx,
            events_by_sid,
            infer_timestamps=infer_timestamps,
        )
        self.assertEqual(
            loader.events_by_sid.keys(),
            events_by_sid.keys(),
        )

        if infer_timestamps:
            expected = pd.Series(index=[dtx[0]] * 10, data=dtx,
                                 name=ANNOUNCEMENT_FIELD_NAME)
        else:
            expected = pd.Series(index=dtx, data=dtx,
                                 name=ANNOUNCEMENT_FIELD_NAME)
            expected.index.name = TS_FIELD_NAME
        # Check that index by first given date has been added
        assert_series_equal(
            loader.events_by_sid[0][ANNOUNCEMENT_FIELD_NAME],
            expected,
        ) 
Example 18
Project: scikit-dataaccess   Author: MITHaystack   File: data_fetcher.py    (MIT License) View Source Project 6 votes vote down vote up
def getAntennaLogs():
        '''
        Retrieve information about antenna changes

        @return dictionary of antenna changes
        '''
        store_location = data_util.getDataLocation('ngl_gps')
        store = pd.HDFStore(store_location, 'r')
        logs_df = store['ngl_steps']
        store.close()

        metadata = DataFetcher.getStationMetadata()

        logs_dict = OrderedDict()
        
        for station in metadata.index:
            offset_dates = logs_df[logs_df['Station']==station].index.unique()
            offset_dates = pd.Series(offset_dates)
            logs_dict[station] = offset_dates

        return logs_dict 
Example 19
Project: didi_competition   Author: Heipiao   File: operate_load_poi_data.py    (MIT License) View Source Project 6 votes vote down vote up
def remove_error_poi_each_line(line_data):
    ## from 1 to len(..), because the first one is district hash
    ### why I need a temp_line_data here!!!!
    ### Please see the property of the remove() function

    standard_style = re.compile(r"\d+#\d+:\d+")

    line_data = list(line_data[0])
    temp_line_data = line_data.copy()
    for poi_in_line in temp_line_data:
        if len(poi_in_line) == 32: # this is the district hash
            continue
        if not re.match(standard_style, poi_in_line):
            #print(poi_in_line)
            line_data.remove(poi_in_line)
    return pd.Series([line_data])

# the input line_data is a serise!! 
Example 20
Project: dask_gdf   Author: gpuopenanalytics   File: test_core.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_series_append():
    np.random.seed(0)

    n = 1000
    df = pd.DataFrame({'x': np.random.randint(0, 5, size=n),
                       'y': np.random.normal(size=n)})

    gdf = gd.DataFrame.from_pandas(df)
    frags = _fragmented_gdf(gdf, nsplit=13)

    frags = [df.x for df in frags]

    appending = dgd.from_pygdf(frags[0], npartitions=1)
    for frag in frags[1:]:
        appending = appending.append(frag)

    appended = appending.compute().to_pandas()
    assert isinstance(appended, pd.Series)
    np.testing.assert_array_equal(appended, df.x) 
Example 21
Project: dask_gdf   Author: gpuopenanalytics   File: test_core.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def test_take(nelem, nparts):
    np.random.seed(0)

    # # Use unique index range as the sort may not be stable-ordering
    x = np.random.randint(0, nelem, size=nelem)
    y = np.random.random(nelem)

    selected = np.random.randint(0, nelem - 1, size=nelem // 2)

    df = pd.DataFrame({'x': x, 'y': y})

    ddf = dd.from_pandas(df, npartitions=nparts)
    dgdf = dgd.from_dask_dataframe(ddf)
    out = dgdf.take(gd.Series(selected), npartitions=5)
    got = out.compute().to_pandas()

    expect = df.take(selected)
    assert 1 < out.npartitions <= 5
    np.testing.assert_array_equal(got.index, np.arange(len(got)))
    np.testing.assert_array_equal(got.x, expect.x)
    np.testing.assert_array_equal(got.y, expect.y) 
Example 22
Project: dask_gdf   Author: gpuopenanalytics   File: core.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def set_index(self, index, drop=True, sorted=False):
        """Set new index.

        Parameters
        ----------
        index : str or Series
            If a ``str`` is provided, it is used as the name of the
            column to be made into the index.
            If a ``Series`` is provided, it is used as the new index
        drop : bool
            Whether the first original index column is dropped.
        sorted : bool
            Whether the new index column is already sorted.
        """
        if not drop:
            raise NotImplementedError('drop=False not supported yet')

        if isinstance(index, str):
            return self._set_index_raw(index, drop=drop, sorted=sorted)
        elif isinstance(index, Series):
            indexname = '__dask_gdf.index'
            df = self.assign(**{indexname: index})
            return df._set_index_raw(indexname, drop=drop, sorted=sorted)
        else:
            raise TypeError('cannot set_index from {}'.format(type(index))) 
Example 23
Project: saapy   Author: ashapochka   File: git_authorship.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def _compute_author_similarity(self, paired_authors):
        def row_similarity(row):
            same_email = row.author_email == row.author_email_other
            name_similarity = fuzz.token_set_ratio(row.author_name,
                                                   row.author_name_other)
            email_name_similarity = fuzz.ratio(row.email_name,
                                               row.email_name_other)
            name_to_email_similarity = fuzz.token_set_ratio(row.author_name,
                                                            row.name_from_email_other)
            return pd.Series(
                [same_email, name_similarity, email_name_similarity,
                 name_to_email_similarity])

        newcols = paired_authors.apply(row_similarity, axis=1)
        newcols.columns = ['same_email', 'name_similarity',
                           'email_name_similarity', 'name_to_email_similarity']
        newdf = paired_authors.join(newcols)
        return newdf 
Example 24
Project: QUANTAXIS   Author: yutiansut   File: indicators.py    (license) View Source Project 6 votes vote down vote up
def QA_indicator_dpo(data, N=20, M=6):
    """
    ????
    ???????????????????????????????????????????
    ???
    ????????????????????
    ???DPO????????????????????????????????????????????


    ???????????????????????????????
    ?20????????10?????????????
    ????????????????????????????????????????
    ?????????????????????????????0?????????????????????0??????
    ???????????????????????
    """
    _dpo = pd.Series(data) - pd.Series(data).rolling(N / 2 + 1).mean()
    _madpo = pd.Series(_dpo).rolling(M).mean()
    return _dpo, _madpo 
Example 25
Project: NeoAnalysis   Author: neoanalysis   File: popuanalysis.py    (license) View Source Project 6 votes vote down vote up
def stats_desc(self,store_key,cond):
        '''
        Args
            store_key (string):
                define which data to be analyzed in the workspace
            cond (string):
                sample observation
        Returns
            descriptive statistics
        '''
        datas = list()
        for ite_file in self.store.keys():
            datas.append(self.store[ite_file][store_key][str(cond)]['mean'].value)
        datas = pd.Series(datas)
        return datas.describe()

    # one way ANOVA
    # for scalar value usage only 
Example 26
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_add(self,column,added_info):
        '''
        Args
            column (string):
                the column name to be played with
            added_info (string, int, float or pandas.DataFrame):
                The information to be added to the selected column can be string, int, float, or 
                pandas.DataFrame
        Returns
            -
        '''
        if isinstance(added_info,str):
            self.data_df[column] = self.data_df[column] + self.data_df[added_info]
        elif isinstance(added_info,(int,float)):
            self.data_df[column] = self.data_df[column] + added_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] + added_info

    # This function performs minus to a given column 
Example 27
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_minus(self,column,minus_info):
        '''
        Args
            column (string):
                the column name to be played with
            minus_info (string, int, float or pandas.DataFrame):
                information to be subtracted from the selected column
        Returns
            -
        '''
        if isinstance(minus_info,str):
            self.data_df[column] = self.data_df[column] - self.data_df[minus_info]
        elif isinstance(minus_info,(int,float)):
            self.data_df[column] = self.data_df[column] - minus_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] - added_info

    # This function multiplys the selected column with certain factor 
Example 28
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_multiply(self,column,multiply_info):
        '''
        Args
            column (string):
                the column name to be played with
            multiply_info (string, int, float or pandas.DataFrame):
                information to be used for multiplying
        Returns
            -
        '''
        if isinstance(multiply_info,str):
            self.data_df[column] = self.data_df[column] * self.data_df[multiply_info]
        elif isinstance(multiply_info,(int,float)):
            self.data_df[column] = self.data_df[column] * multiply_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] * added_info

    # This function divides the selected column by certain factor 
Example 29
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_division(self,column,division_info):
        '''
        Args
            column (string):
                the column name to be played with
            division_info (string, int, float or pandas.DataFrame):
                information to be used for dividing
        Returns
            -
        '''
        if isinstance(division_info,str):
            self.data_df[column] = self.data_df[column] / self.data_df[division_info]
        elif isinstance(division_info,(int,float)):
            self.data_df[column] = self.data_df[column] / float(division_info)
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] / added_info

    # delete certain trials in the data table 
Example 30
Project: NeoAnalysis   Author: neoanalysis   File: popuanalysis.py    (license) View Source Project 6 votes vote down vote up
def stats_desc(self,store_key,cond):
        '''
        Args
            store_key (string):
                define which data to be analyzed in the workspace
            cond (string):
                sample observation
        Returns
            descriptive statistics
        '''
        datas = list()
        for ite_file in list(self.store.keys()):
            datas.append(self.store[ite_file][store_key][str(cond)]['mean'].value)
        datas = pd.Series(datas)
        return datas.describe()

    # one way ANOVA
    # for scalar value usage only 
Example 31
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_add(self,column,added_info):
        '''
        Args
            column (string):
                the column name to be played with
            added_info (string, int, float or pandas.DataFrame):
                The information to be added to the selected column can be string, int, float, or 
                pandas.DataFrame
        Returns
            -
        '''
        if isinstance(added_info,str):
            self.data_df[column] = self.data_df[column] + self.data_df[added_info]
        elif isinstance(added_info,(int,float)):
            self.data_df[column] = self.data_df[column] + added_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] + added_info

    # This function performs minus to a given column 
Example 32
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_minus(self,column,minus_info):
        '''
        Args
            column (string):
                the column name to be played with
            minus_info (string, int, float or pandas.DataFrame):
                information to be subtracted from the selected column
        Returns
            -
        '''
        if isinstance(minus_info,str):
            self.data_df[column] = self.data_df[column] - self.data_df[minus_info]
        elif isinstance(minus_info,(int,float)):
            self.data_df[column] = self.data_df[column] - minus_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] - added_info

    # This function multiplys the selected column with certain factor 
Example 33
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_multiply(self,column,multiply_info):
        '''
        Args
            column (string):
                the column name to be played with
            multiply_info (string, int, float or pandas.DataFrame):
                information to be used for multiplying
        Returns
            -
        '''
        if isinstance(multiply_info,str):
            self.data_df[column] = self.data_df[column] * self.data_df[multiply_info]
        elif isinstance(multiply_info,(int,float)):
            self.data_df[column] = self.data_df[column] * multiply_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] * added_info

    # This function divides the selected column by certain factor 
Example 34
Project: NeoAnalysis   Author: neoanalysis   File: graphics.py    (license) View Source Project 6 votes vote down vote up
def df_division(self,column,division_info):
        '''
        Args
            column (string):
                the column name to be played with
            division_info (string, int, float or pandas.DataFrame):
                information to be used for dividing
        Returns
            -
        '''
        if isinstance(division_info,str):
            self.data_df[column] = self.data_df[column] / self.data_df[division_info]
        elif isinstance(division_info,(int,float)):
            self.data_df[column] = self.data_df[column] / division_info
        elif isinstance(added_info,(pd.Series,pd.DataFrame)):
            self.data_df[column] = self.data_df[column] / added_info

    # delete certain trials in the data table 
Example 35
Project: tdx_formula   Author: woodylee1974   File: test.py    (license) View Source Project 6 votes vote down vote up
def test_ABS():
    text = """
        ABS(X);
    """
    param1 = {
        'X': pd.Series([-2, -1, -0.5, 9.8]),
        'RESULT': pd.Series([2, 1, 0.5, 9.8])
    }

    param2 = {
        'X': pd.Series([-2, -1, 0, 9]),
        'RESULT': pd.Series([2, 1, 0, 9])
    }

    params = [param1, param2]
    testfunc(text, params) 
Example 36
Project: tdx_formula   Author: woodylee1974   File: test.py    (license) View Source Project 6 votes vote down vote up
def test_SMA():
    text = """
       SMA(X, M, N);
    """

    param1 = {
        'X': pd.Series([10.2, 30.9, 30.48, 39.34, 43.3, 45.9, 30.48, 39.34, 45.9, 30.48, 39.34]),
        'M': 5,
        'N': 3,
        'RESULT': pd.Series(
            [10.2, 24.985714, 28.507692, 35.177833, 40.101552, 43.594930, 35.713058, 37.890650, 42.697520, 35.366239,
             37.750596])
    }

    params = [param1]
    testfunc(text, params, True, True) 
Example 37
Project: tdx_formula   Author: woodylee1974   File: tdx_formula.py    (license) View Source Project 6 votes vote down vote up
def CROSS(self, param):
        if not isinstance(param[0], pd.core.series.Series) and not isinstance(param[1], pd.core.series.Series):
            print('Invalid data type is detected.')
            return False

        if not isinstance(param[0], pd.core.series.Series):
            x1 = param[0]
            x2 = param[0]
            y1 = param[1].shift(1)
            y2 = param[1]
        
        if not isinstance(param[1], pd.core.series.Series):
            x1 = param[0].shift(1)
            x2 = param[0]
            y1 = param[1]
            y2 = param[1]
        
        if isinstance(param[0], pd.core.series.Series) and isinstance(param[1], pd.core.series.Series):
            x1 = param[0].shift(1)
            x2 = param[0]
            y1 = param[1].shift(1)
            y2 = param[1]
        
        return (x1 <= y1) & (x2 > y2) 
Example 38
Project: tdx_formula   Author: woodylee1974   File: tdx_formula.py    (license) View Source Project 6 votes vote down vote up
def MAX(self, param):
        if isinstance(param[0], pd.core.series.Series):
            df = pd.DataFrame(index = param[0].index)
        elif isinstance(param[1], pd.core.series.Series):
            df = pd.DataFrame(index = param[1].index)
        else:
            df = None
        
        if df is None:
            return np.max(param)

        df['A'] = param[0]
        df['B'] = param[1]
        def callback(row):
            if row['A'] >= row['B']:
                return row['A']
            else:
                return row['B']
        result = df.apply(callback, axis = 1, reduce = True)
        return result 
Example 39
Project: tdx_formula   Author: woodylee1974   File: tdx_formula.py    (license) View Source Project 6 votes vote down vote up
def MIN(self, param):
        if isinstance(param[0], pd.core.series.Series):
            df = pd.DataFrame(index = param[0].index)
        elif isinstance(param[1], pd.core.series.Series):
            df = pd.DataFrame(index = param[1].index)
        else:
            df = None
        
        if df is None:
            return np.max(param)
        df['A'] = param[0]
        df['B'] = param[1]
        def callback(row):
            if row['A'] <= row['B']:
                return row['A']
            else:
                return row['B']
        result = df.apply(callback, axis = 1, reduce = True)
        return result 
Example 40
Project: hidi   Author: VEVO   File: test_sparse_transform.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def setUp(self):
        scores = pd.Series(np.ones(8), dtype=np.float32)
        np_data = np.array([
            [1, 'a'],
            [2, 'b'],
            [4, 'a'],
            [3, 'c'],
            [3, 'b'],
            [5, 'c'],
            [4, 'c'],
            [1, 'b'],
        ])
        col_labels = ['item_id', 'link_id']
        self.input_df = pd.DataFrame(data=np_data, columns=col_labels)
        self.input_df['score'] = scores
        self.sparse = SparseTransform()
        self.out = self.sparse.transform(self.input_df) 
Example 41
Project: powerAI   Author: dreameng28   File: data_view.py    (license) View Source Project 6 votes vote down vote up
def rise_rate(df):
    date1_2 = df[record_date].map(lambda x: str2time(x)).max()
    date1_1 = datetime.datetime(date1_2.year, date1_2.month, 1).date()
    grouped1 = DataView(df).filter_by_record_date2(date1_1, date1_2)[[user_id, power_consumption]].groupby([user_id], as_index=False).mean()
    from dateutil.relativedelta import relativedelta
    date2_1 = date1_1 - relativedelta(months=+1)
    date2_2 = date1_2 - relativedelta(months=+1)
    grouped2 = DataView(df).filter_by_record_date2(date2_1, date2_2)[[user_id, power_consumption]].groupby([user_id], as_index=False).mean()
    print(date1_1,date1_2, date2_1, date2_2)
    print(grouped1)
    print(grouped2)
    user_rise_rate = pd.Series(map(lambda x, y: float(x - y) / y, grouped1[power_consumption], grouped2[power_consumption]))
    user_rise_rate.name = 'user_rise_rate'
    return grouped1[[user_id]].join(user_rise_rate)


# ????? 
Example 42
Project: GOS   Author: crcresearch   File: gos.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def create_agents(self, generator):
        """
        Given information on a set of countries and a generator function,
        generate the agents and assign the results to ``self.agents``.

        :type generator: DataFrame, str, int
        :param generator: A function which generates the agents.
        """
        self.generator = generator
        country_array = pd.concat([pd.Series([c] * k["Population"]) for c, k in self.df.iterrows()])
        country_array.index = range(len(country_array))
        # Garbage collect before creating new processes.
        gc.collect()
        self.agents = pd.concat(
            self.pool.imap(self._gen_agents,
                           np.array_split(country_array, self.processes * self.splits))
        )
        self.agents.index = range(len(self.agents)) 
Example 43
Project: GOS   Author: crcresearch   File: gos.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def create_agents(self, generator):
        """
        Given information on a set of countries and a generator function,
        generate the agents and assign the results to ``self.agents``.

        :type generator: DataFrame, str, int
        :param generator: A function which generates the agents.
        """
        self.generator = generator
        country_array = pd.concat([pd.Series([c] * k["Population"]) for c, k in self.df.iterrows()])
        country_array.index = range(len(country_array))
        # Garbage collect before creating new processes.
        gc.collect()
        self.agents = pd.concat(
            self.pool.imap(self._gen_agents,
                           np.array_split(country_array, self.processes * self.splits))
        )
        self.agents.index = range(len(self.agents)) 
Example 44
Project: PyGPS   Author: gregstarr   File: gps.py    (license) View Source Project 6 votes vote down vote up
def minScalErr(stec,el,z,thisBias):
    """
    this determines the slope of the vTEC vs. Elevation line, which
    should be minimized in the minimum scalloping technique for
    receiver bias removal
    inputs:
        stec - time indexed Series of slant TEC values
        el - corresponding elevation values, also Series
        z - mapping function values to convert to vTEC from entire file, may
            contain nans, Series
        thisBias - the bias to be tested and minimized
    """

    intel=np.asarray(el[stec.index],int) # bin the elevation values into int
    sTEC=np.asarray(stec,float)
    zmap = z[stec.index]
    c=np.array([(i,np.average((sTEC[intel==i]-thisBias)
                              /zmap[intel==i])) for i in np.unique(intel) if i>30])

    return np.polyfit(c[:,0],c[:,1],1)[0] 
Example 45
Project: scheduled-bots   Author: SuLab   File: bot_log_parser.py    (license) View Source Project 6 votes vote down vote up
def generate_summary(df):
    level_counts = df.Level.value_counts().to_dict()
    zlist = list(zip(*[('<a href="#info">Items Processed Succesfully</a>', level_counts.get('INFO', 0)),
                       ('<a href="#warning">Items Skipped Due to a Warning</a>', level_counts.get('WARNING', 0)),
                       ('<a href="#error">Items Skipped Due to an Error</a>', level_counts.get('ERROR', 0))]))
    level_counts = pd.Series(zlist[1], index=zlist[0])
    level_counts.name = "Count"

    info_counts = df.query("Level == 'INFO'").Message.value_counts().to_dict()
    zlist = list(zip(*[('No Action', info_counts.get('SKIP', 0)),
                       ('Update', info_counts.get('UPDATE', 0)),
                       ('Create', info_counts.get('CREATE', 0))]))
    info_counts = pd.Series(zlist[1], index=zlist[0])
    info_counts.name = "Count"

    warning_counts = df.query("Level == 'WARNING'")['Msg Type'].value_counts()
    warning_counts.name = "Count"
    error_counts = df.query("Level == 'ERROR'")['Msg Type'].value_counts()
    error_counts.name = "Count"
    return level_counts, info_counts, warning_counts, error_counts 
Example 46
Project: micom   Author: resendislab   File: util.py    (Apache License 2.0) View Source Project 6 votes vote down vote up
def _format_min_growth(min_growth, species):
    """Format min_growth into a pandas series.

    Arguments
    ---------
    min_growth : positive float or array-like object.
        The minimum growth rate for each individual in the community. Either
        a single value applied to all individuals or one value for each.
    species : array-like
        The ID for each individual model in the community.

    Returns
    -------
    pandas.Series
        A pandas Series mapping each individual to its minimum growth rate.

    """
    try:
        min_growth = float(min_growth)
    except (TypeError, ValueError):
        if len(min_growth) != len(species):
            raise ValueError(
                "min_growth must be single value or an array-like "
                "object with an entry for each species in the model.")
    return pd.Series(min_growth, species) 
Example 47
Project: PortfolioTimeSeriesAnalysis   Author: MizioAnd   File: two_sigma_financial_modelling.py    (MIT License) View Source Project 6 votes vote down vote up
def clean_data(self, df, is_with_MICE=0):
        df = df.copy()
        if df.isnull().sum().sum() > 0:
            if is_with_MICE:
                # Imputation using MICE
                numerical_features_names = self.extract_numerical_features(df)
                df.loc[:, tuple(numerical_features_names)] = self.estimate_by_mice(df[numerical_features_names])
            else:
                if any(tuple(df.columns == 'y')):
                    df = df.dropna()
                else:
                    df = df.dropna(1)
                    TwoSigmaFinModTools._feature_names_num = pd.Series(data=np.intersect1d(
                        TwoSigmaFinModTools._feature_names_num.values, df.columns), dtype=object)
        TwoSigmaFinModTools._numerical_feature_names = TwoSigmaFinModTools.extract_numerical_features(df)
        return df 
Example 48
Project: linkedin_recommend   Author: duggalr2   File: nb_classification.py    (license) View Source Project 6 votes vote down vote up
def predict_job(job_list):
    """Assign a classification to a url"""
    # TODO: Add case where len is 1 or 0....
    job_list = [job for j in job_list for job in j]
    new_job_list = [regex.tokenize_and_stem(i) for i in job_list]
    new_job_list = [' '.join(job) for job in new_job_list]
    vect = CountVectorizer()
    x_series = pd.Series(X)
    X_train_dtm = vect.fit_transform(x_series)
    y_train = pd.Series(y)
    job_list_series = pd.Series(new_job_list)
    job_list_dtm = vect.transform(job_list_series)
    nb = MultinomialNB()
    nb.fit(X_train_dtm, y_train)
    y_pred = nb.predict(job_list_dtm)
    # for i in range(len(job_list)):
    #     print(job_list[i], y_pred[i])
    return y_pred

# print(predict_job([('Founder',), ('Founder',), ('Architect & Full-stack developer',), ('Senior Engineer',), ('Technical Consultant',)])) 
Example 49
Project: inqbus.rainflow   Author: Inqbus   File: helpers.py    (BSD 3-Clause "New" or "Revised" License) View Source Project 6 votes vote down vote up
def count_pairs(data):
    df = pd.DataFrame(data)

    start, target = df.columns.tolist()

    # first we create groups for each pair and take size of each group as count.
    # counts is a pandas.Series with the pairs as index
    counts = df.groupby([start, target]).size()

    # than we remove duplicate pairs from original dateframe,
    # so length and counts are equal in size
    df = df.drop_duplicates()

    # reset index to values of pairs to fit index of counts
    df.set_index([0, 1], inplace=True, drop=False)

    # now we append the counts as column to the original data
    df[2] = pd.Series(counts.values, index=counts.index)

    # just cast pandas-dataframe back to numpy 2d-array usable for following
    # steps
    array = df.values
    return array 
Example 50
Project: bambi   Author: bambinos   File: results.py    (MIT License) View Source Project 6 votes vote down vote up
def _hpd_interval(self, x, width):
        """
        Code adapted from pymc3.stats.calc_min_interval:
        https://github.com/pymc-devs/pymc3/blob/master/pymc3/stats.py
        """
        x = np.sort(x)
        n = len(x)

        interval_idx_inc = int(np.floor(width * n))
        n_intervals = n - interval_idx_inc
        interval_width = x[interval_idx_inc:] - x[:n_intervals]

        if len(interval_width) == 0:
            raise ValueError('Too few elements for interval calculation')

        min_idx = np.argmin(interval_width)
        hdi_min = x[min_idx]
        hdi_max = x[min_idx + interval_idx_inc]

        index = ['hpd{}_{}'.format(width, x) for x in ['lower', 'upper']]
        return pd.Series([hdi_min, hdi_max], index=index)