Python pandas.core.frame.DataFrame.from_records() Examples

The following are 27 code examples of pandas.core.frame.DataFrame.from_records(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.frame.DataFrame , or try the search function .
Example #1
Source File: test_stata.py    From Computable with MIT License 6 votes vote down vote up
def test_read_dta4(self):
        parsed = self.read_dta(self.dta4)
        parsed_13 = self.read_dta(self.dta4_13)
        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        tm.assert_frame_equal(parsed, expected)
        tm.assert_frame_equal(parsed_13, expected) 
Example #2
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False) 
Example #3
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            pytest.raises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            # should get a warning for mixed content
            assert len(w) == 1 
Example #4
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:  # noqa
                # Silence warnings
                original.to_stata(path, version=version)
                written_and_read_again = self.read_dta(path)
                res = written_and_read_again.set_index('index')
                tm.assert_frame_equal(res, expected, check_categorical=False) 
Example #5
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label' 
Example #6
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False) 
Example #7
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls 
Example #8
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            pytest.raises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            # should get a warning for mixed content
            assert len(w) == 1 
Example #9
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_categorical_writing(self):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:  # noqa
                # Silence warnings
                original.to_stata(path)
                written_and_read_again = self.read_dta(path)
                res = written_and_read_again.set_index('index')
                tm.assert_frame_equal(res, expected, check_categorical=False) 
Example #10
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label' 
Example #11
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False) 
Example #12
Source File: test_stata.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls 
Example #13
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            msg = ("Stata value labels for a single variable must have"
                   r" a combined length less than 32,000 characters\.")
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch):
            original.to_stata(path)
            # should get a warning for mixed content 
Example #14
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            original.to_stata(path, version=version)
            written_and_read_again = self.read_dta(path)
            res = written_and_read_again.set_index('index')
            tm.assert_frame_equal(res, expected, check_categorical=False) 
Example #15
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label' 
Example #16
Source File: test_stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls 
Example #17
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls 
Example #18
Source File: test_stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            pytest.raises(ValueError, original.to_stata, path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with warnings.catch_warnings(record=True) as w:
            original.to_stata(path)
            # should get a warning for mixed content
            assert len(w) == 1 
Example #19
Source File: test_stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            with warnings.catch_warnings(record=True) as w:  # noqa
                # Silence warnings
                original.to_stata(path, version=version)
                written_and_read_again = self.read_dta(path)
                res = written_and_read_again.set_index('index')
                tm.assert_frame_equal(res, expected, check_categorical=False) 
Example #20
Source File: test_stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label' 
Example #21
Source File: test_stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False) 
Example #22
Source File: test_stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_read_dta4(self, file):

        file = getattr(self, file)
        parsed = self.read_dta(file)

        expected = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one"],
                ["two", "nine", "two", "two", "two"],
                ["three", "eight", "three", "three", "three"],
                ["four", "seven", 4, "four", "four"],
                ["five", "six", 5, np.nan, "five"],
                ["six", "five", 6, np.nan, "six"],
                ["seven", "four", 7, np.nan, "seven"],
                ["eight", "three", 8, np.nan, "eight"],
                ["nine", "two", 9, np.nan, "nine"],
                ["ten", "one", "ten", np.nan, "ten"]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled'])

        # these are all categoricals
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)

        # stata doesn't save .category metadata
        tm.assert_frame_equal(parsed, expected, check_categorical=False)

    # File containing strls 
Example #23
Source File: test_stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_categorical_warnings_and_errors(self):
        # Warning for non-string labels
        # Error for labels too long
        original = pd.DataFrame.from_records(
            [['a' * 10000],
             ['b' * 10000],
             ['c' * 10000],
             ['d' * 10000]],
            columns=['Too_long'])

        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)
        with tm.ensure_clean() as path:
            msg = ("Stata value labels for a single variable must have"
                   r" a combined length less than 32,000 characters\.")
            with pytest.raises(ValueError, match=msg):
                original.to_stata(path)

        original = pd.DataFrame.from_records(
            [['a'],
             ['b'],
             ['c'],
             ['d'],
             [1]],
            columns=['Too_long'])
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        with tm.assert_produces_warning(pd.io.stata.ValueLabelTypeMismatch):
            original.to_stata(path)
            # should get a warning for mixed content 
Example #24
Source File: test_stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_categorical_writing(self, version):
        original = DataFrame.from_records(
            [
                ["one", "ten", "one", "one", "one", 1],
                ["two", "nine", "two", "two", "two", 2],
                ["three", "eight", "three", "three", "three", 3],
                ["four", "seven", 4, "four", "four", 4],
                ["five", "six", 5, np.nan, "five", 5],
                ["six", "five", 6, np.nan, "six", 6],
                ["seven", "four", 7, np.nan, "seven", 7],
                ["eight", "three", 8, np.nan, "eight", 8],
                ["nine", "two", 9, np.nan, "nine", 9],
                ["ten", "one", "ten", np.nan, "ten", 10]
            ],
            columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
                     'labeled_with_missings', 'float_labelled', 'unlabeled'])
        expected = original.copy()

        # these are all categoricals
        original = pd.concat([original[col].astype('category')
                              for col in original], axis=1)

        expected['incompletely_labeled'] = expected[
            'incompletely_labeled'].apply(str)
        expected['unlabeled'] = expected['unlabeled'].apply(str)
        expected = pd.concat([expected[col].astype('category')
                              for col in expected], axis=1)
        expected.index.name = 'index'

        with tm.ensure_clean() as path:
            original.to_stata(path, version=version)
            written_and_read_again = self.read_dta(path)
            res = written_and_read_again.set_index('index')
            tm.assert_frame_equal(res, expected, check_categorical=False) 
Example #25
Source File: test_stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_dta18(self):
        parsed_118 = self.read_dta(self.dta22_118)
        parsed_118["Bytes"] = parsed_118["Bytes"].astype('O')
        expected = DataFrame.from_records(
            [['Cat', 'Bogota', u'Bogotá', 1, 1.0, u'option b Ünicode', 1.0],
             ['Dog', 'Boston', u'Uzunköprü', np.nan, np.nan, np.nan, np.nan],
             ['Plane', 'Rome', u'Tromsø', 0, 0.0, 'option a', 0.0],
             ['Potato', 'Tokyo', u'Elâzığ', -4, 4.0, 4, 4],
             ['', '', '', 0, 0.3332999, 'option a', 1 / 3.]
             ],
            columns=['Things', 'Cities', 'Unicode_Cities_Strl',
                     'Ints', 'Floats', 'Bytes', 'Longs'])
        expected["Floats"] = expected["Floats"].astype(np.float32)
        for col in parsed_118.columns:
            tm.assert_almost_equal(parsed_118[col], expected[col])

        with StataReader(self.dta22_118) as rdr:
            vl = rdr.variable_labels()
            vl_expected = {u'Unicode_Cities_Strl':
                           u'Here are some strls with Ünicode chars',
                           u'Longs': u'long data',
                           u'Things': u'Here are some things',
                           u'Bytes': u'byte data',
                           u'Ints': u'int data',
                           u'Cities': u'Here are some cities',
                           u'Floats': u'float data'}
            tm.assert_dict_equal(vl, vl_expected)

            assert rdr.data_label == u'This is a  Ünicode data label' 
Example #26
Source File: test_stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_read_dta12(self):
        parsed_117 = self.read_dta(self.dta21_117)
        expected = DataFrame.from_records(
            [
                [1, "abc", "abcdefghi"],
                [3, "cba", "qwertywertyqwerty"],
                [93, "", "strl"],
            ],
            columns=['x', 'y', 'z'])

        tm.assert_frame_equal(parsed_117, expected, check_dtype=False) 
Example #27
Source File: test_stata.py    From Computable with MIT License 4 votes vote down vote up
def test_read_dta2(self):
        if LooseVersion(sys.version) < '2.7':
            raise nose.SkipTest('datetime interp under 2.6 is faulty')

        expected = DataFrame.from_records(
            [
                (
                    datetime(2006, 11, 19, 23, 13, 20),
                    1479596223000,
                    datetime(2010, 1, 20),
                    datetime(2010, 1, 8),
                    datetime(2010, 1, 1),
                    datetime(1974, 7, 1),
                    datetime(2010, 1, 1),
                    datetime(2010, 1, 1)
                ),
                (
                    datetime(1959, 12, 31, 20, 3, 20),
                    -1479590,
                    datetime(1953, 10, 2),
                    datetime(1948, 6, 10),
                    datetime(1955, 1, 1),
                    datetime(1955, 7, 1),
                    datetime(1955, 1, 1),
                    datetime(2, 1, 1)
                ),
                (
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                    pd.NaT,
                )
            ],
            columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
                     'monthly_date', 'quarterly_date', 'half_yearly_date',
                     'yearly_date']
        )

        with warnings.catch_warnings(record=True) as w:
            parsed = self.read_dta(self.dta2)
            parsed_13 = self.read_dta(self.dta2_13)
            np.testing.assert_equal(
                len(w), 1)  # should get a warning for that format.

        # buggy test because of the NaT comparison on certain platforms
        #
        #tm.assert_frame_equal(parsed, expected)
        #tm.assert_frame_equal(parsed_13, expected)