Python pandas.wide_to_long() Examples

The following are 30 code examples of pandas.wide_to_long(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_melt.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #2
Source File: test_melt.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected) 
Example #3
Source File: test_melt.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected) 
Example #4
Source File: test_melt.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #5
Source File: test_melt.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected) 
Example #6
Source File: test_melt.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #7
Source File: test_melt.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_col_substring_of_stubname(self):
        # GH22468
        # Don't raise ValueError when a column name is a substring
        # of a stubname that's been passed as a string
        wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
                     'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
                     'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
                     'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
                     'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
                     }
        wide_df = pd.DataFrame.from_dict(wide_data)
        expected = pd.wide_to_long(wide_df,
                                   stubnames=['PA'],
                                   i=['node_id', 'A'],
                                   j='time')
        result = pd.wide_to_long(wide_df,
                                 stubnames='PA',
                                 i=['node_id', 'A'],
                                 j='time')
        tm.assert_frame_equal(result, expected) 
Example #8
Source File: test_melt.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_col_substring_of_stubname(self):
        # GH22468
        # Don't raise ValueError when a column name is a substring
        # of a stubname that's been passed as a string
        wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
                     'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
                     'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
                     'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
                     'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
                     }
        wide_df = pd.DataFrame.from_dict(wide_data)
        expected = pd.wide_to_long(wide_df,
                                   stubnames=['PA'],
                                   i=['node_id', 'A'],
                                   j='time')
        result = pd.wide_to_long(wide_df,
                                 stubnames='PA',
                                 i=['node_id', 'A'],
                                 j='time')
        tm.assert_frame_equal(result, expected) 
Example #9
Source File: test_melt.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected) 
Example #10
Source File: test_melt.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #11
Source File: test_melt.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected) 
Example #12
Source File: test_melt.py    From recruit with Apache License 2.0 6 votes vote down vote up
def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected) 
Example #13
Source File: test_melt.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #14
Source File: test_melt.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #15
Source File: test_melt.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected) 
Example #16
Source File: test_melt.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected) 
Example #17
Source File: test_melt.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #18
Source File: test_melt.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected) 
Example #19
Source File: test_subclass.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_subclassed_wide_to_long(self):
        # GH 9762

        np.random.seed(123)
        x = np.random.randn(3)
        df = tm.SubclassedDataFrame({
            "A1970": {0: "a", 1: "b", 2: "c"},
            "A1980": {0: "d", 1: "e", 2: "f"},
            "B1970": {0: 2.5, 1: 1.2, 2: .7},
            "B1980": {0: 3.2, 1: 1.3, 2: .1},
            "X": dict(zip(range(3), x))})

        df["id"] = df.index
        exp_data = {"X": x.tolist() + x.tolist(),
                    "A": ['a', 'b', 'c', 'd', 'e', 'f'],
                    "B": [2.5, 1.2, 0.7, 3.2, 1.3, 0.1],
                    "year": [1970, 1970, 1970, 1980, 1980, 1980],
                    "id": [0, 1, 2, 0, 1, 2]}
        expected = tm.SubclassedDataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        long_frame = pd.wide_to_long(df, ["A", "B"], i="id", j="year")

        tm.assert_frame_equal(long_frame, expected) 
Example #20
Source File: test_melt.py    From coffeegrindsize with MIT License 6 votes vote down vote up
def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected) 
Example #21
Source File: test_melt.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': [2010, 2011, 2010, 2011]}
        expected = pd.DataFrame(exp_data)
        expected = expected.set_index(['id', 'year'])[["X", "A", "B"]]
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result, expected) 
Example #22
Source File: test_melt.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})
        expected = expected.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #23
Source File: test_melt.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        expected = pd.DataFrame(exp_data).astype({'year': 'int'})

        expected = expected.set_index(['id', 'year'])
        expected.index.set_levels([0, 1], level=0, inplace=True)
        result = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(result.sort_index(axis=1),
                              expected.sort_index(axis=1)) 
Example #24
Source File: test_melt.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        expected = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': [1, 2, 1, 2, 1, 2, 1, 2, 1,
                    2, 1, 2, 1, 2, 1, 2, 1, 2]
        })
        expected = expected.set_index(['famid', 'birth', 'age'])[['ht']]
        result = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(result, expected) 
Example #25
Source File: test_reshape.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_multiple_id_columns(self):
        # Taken from http://www.ats.ucla.edu/stat/stata/modules/reshapel.htm
        df = pd.DataFrame({
            'famid': [1, 1, 1, 2, 2, 2, 3, 3, 3],
            'birth': [1, 2, 3, 1, 2, 3, 1, 2, 3],
            'ht1': [2.8, 2.9, 2.2, 2, 1.8, 1.9, 2.2, 2.3, 2.1],
            'ht2': [3.4, 3.8, 2.9, 3.2, 2.8, 2.4, 3.3, 3.4, 2.9]
        })
        exp_frame = pd.DataFrame({
            'ht': [2.8, 3.4, 2.9, 3.8, 2.2, 2.9, 2.0, 3.2, 1.8,
                   2.8, 1.9, 2.4, 2.2, 3.3, 2.3, 3.4, 2.1, 2.9],
            'famid': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3],
            'birth': [1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3, 1, 1, 2, 2, 3, 3],
            'age': ['1', '2', '1', '2', '1', '2', '1', '2', '1',
                    '2', '1', '2', '1', '2', '1', '2', '1', '2']
        })
        exp_frame = exp_frame.set_index(['famid', 'birth', 'age'])[['ht']]
        long_frame = wide_to_long(df, 'ht', i=['famid', 'birth'], j='age')
        tm.assert_frame_equal(long_frame, exp_frame) 
Example #26
Source File: test_melt.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_float_suffix(self):
        df = pd.DataFrame({
            'treatment_1.1': [1.0, 2.0],
            'treatment_2.1': [3.0, 4.0],
            'result_1.2': [5.0, 6.0],
            'result_1': [0, 9],
            'A': ['X1', 'X2']})
        expected = pd.DataFrame({
            'A': ['X1', 'X1', 'X1', 'X1', 'X2', 'X2', 'X2', 'X2'],
            'colname': [1, 1.1, 1.2, 2.1, 1, 1.1, 1.2, 2.1],
            'result': [0.0, np.nan, 5.0, np.nan, 9.0, np.nan, 6.0, np.nan],
            'treatment': [np.nan, 1.0, np.nan, 3.0, np.nan, 2.0, np.nan, 4.0]})
        expected = expected.set_index(['A', 'colname'])
        result = wide_to_long(df, ['result', 'treatment'],
                              i='A', j='colname', suffix='[0-9.]+', sep='_')
        tm.assert_frame_equal(result, expected) 
Example #27
Source File: test_melt.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_col_substring_of_stubname(self):
        # GH22468
        # Don't raise ValueError when a column name is a substring
        # of a stubname that's been passed as a string
        wide_data = {'node_id': {0: 0, 1: 1, 2: 2, 3: 3, 4: 4},
                     'A': {0: 0.80, 1: 0.0, 2: 0.25, 3: 1.0, 4: 0.81},
                     'PA0': {0: 0.74, 1: 0.56, 2: 0.56, 3: 0.98, 4: 0.6},
                     'PA1': {0: 0.77, 1: 0.64, 2: 0.52, 3: 0.98, 4: 0.67},
                     'PA3': {0: 0.34, 1: 0.70, 2: 0.52, 3: 0.98, 4: 0.67}
                     }
        wide_df = pd.DataFrame.from_dict(wide_data)
        expected = pd.wide_to_long(wide_df,
                                   stubnames=['PA'],
                                   i=['node_id', 'A'],
                                   j='time')
        result = pd.wide_to_long(wide_df,
                                 stubnames='PA',
                                 i=['node_id', 'A'],
                                 j='time')
        tm.assert_frame_equal(result, expected) 
Example #28
Source File: test_reshape.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_invalid_suffixtype(self):
        # If all stubs names end with a string, but a numeric suffix is
        # assumed,  an empty data frame is returned
        df = pd.DataFrame({'Aone': [1.0, 2.0],
                           'Atwo': [3.0, 4.0],
                           'Bone': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'Aone': [],
                    'Atwo': [],
                    'Bone': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        exp_frame = pd.DataFrame(exp_data)
        exp_frame = exp_frame.set_index(['id', 'year'])[[
            'X', 'Aone', 'Atwo', 'Bone', 'A', 'B']]
        exp_frame.index.set_levels([[0, 1], []], inplace=True)
        long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(long_frame.sort_index(axis=1),
                              exp_frame.sort_index(axis=1)) 
Example #29
Source File: test_reshape.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_invalid_separator(self):
        # if an invalid separator is supplied a empty data frame is returned
        sep = 'nope!'
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': '',
                    'A2010': [],
                    'A2011': [],
                    'B2010': [],
                    'id': [],
                    'year': [],
                    'A': [],
                    'B': []}
        exp_frame = pd.DataFrame(exp_data)
        exp_frame = exp_frame.set_index(['id', 'year'])[[
            'X', 'A2010', 'A2011', 'B2010', 'A', 'B']]
        exp_frame.index.set_levels([[0, 1], []], inplace=True)
        long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year', sep=sep)
        tm.assert_frame_equal(long_frame.sort_index(axis=1),
                              exp_frame.sort_index(axis=1)) 
Example #30
Source File: test_reshape.py    From elasticintel with GNU General Public License v3.0 6 votes vote down vote up
def test_unbalanced(self):
        # test that we can have a varying amount of time variables
        df = pd.DataFrame({'A2010': [1.0, 2.0],
                           'A2011': [3.0, 4.0],
                           'B2010': [5.0, 6.0],
                           'X': ['X1', 'X2']})
        df['id'] = df.index
        exp_data = {'X': ['X1', 'X1', 'X2', 'X2'],
                    'A': [1.0, 3.0, 2.0, 4.0],
                    'B': [5.0, np.nan, 6.0, np.nan],
                    'id': [0, 0, 1, 1],
                    'year': ['2010', '2011', '2010', '2011']}
        exp_frame = pd.DataFrame(exp_data)
        exp_frame = exp_frame.set_index(['id', 'year'])[["X", "A", "B"]]
        long_frame = wide_to_long(df, ['A', 'B'], i='id', j='year')
        tm.assert_frame_equal(long_frame, exp_frame)