Python pandas.core.frame.DataFrame.from_dict() Examples

The following are 12 code examples of pandas.core.frame.DataFrame.from_dict(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.core.frame.DataFrame , or try the search function .
Example #1
Source File: test_stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def test_categorical_order(self, file):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c',
                                       'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd',
                                       'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', [
                     'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', [
                     'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [
                     1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
                     np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_dict(OrderedDict(cols))

        # Read with and with out categoricals, ensure order is identical
        file = getattr(self, file)
        parsed = read_stata(file)
        tm.assert_frame_equal(expected, parsed, check_categorical=False)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed[col].cat.categories) 
Example #2
Source File: stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _do_convert_categoricals(self, data, value_label_dict, lbllist,
                                 order_categoricals):
        """
        Converts categorical columns to Categorical type.
        """
        value_labels = list(compat.iterkeys(value_label_dict))
        cat_converted_data = []
        for col, label in zip(data, lbllist):
            if label in value_labels:
                # Explicit call with ordered=True
                cat_data = Categorical(data[col], ordered=order_categoricals)
                categories = []
                for category in cat_data.categories:
                    if category in value_label_dict[label]:
                        categories.append(value_label_dict[label][category])
                    else:
                        categories.append(category)  # Partially labeled
                try:
                    cat_data.categories = categories
                except ValueError:
                    vc = Series(categories).value_counts()
                    repeats = list(vc.index[vc > 1])
                    repeats = '\n' + '-' * 80 + '\n'.join(repeats)
                    raise ValueError('Value labels for column {col} are not '
                                     'unique. The repeated labels are:\n'
                                     '{repeats}'
                                     .format(col=col, repeats=repeats))
                # TODO: is the next line needed above in the data(...) method?
                cat_data = Series(cat_data, index=data.index)
                cat_converted_data.append((col, cat_data))
            else:
                cat_converted_data.append((col, data[col]))
        data = DataFrame.from_dict(OrderedDict(cat_converted_data))
        return data 
Example #3
Source File: stata.py    From recruit with Apache License 2.0 5 votes vote down vote up
def _prepare_categoricals(self, data):
        """Check for categorical columns, retain categorical information for
        Stata file and convert categorical data to int"""

        is_cat = [is_categorical_dtype(data[col]) for col in data]
        self._is_col_cat = is_cat
        self._value_labels = []
        if not any(is_cat):
            return data

        get_base_missing_value = StataMissingValue.get_base_missing_value
        data_formatted = []
        for col, col_is_cat in zip(data, is_cat):
            if col_is_cat:
                self._value_labels.append(StataValueLabel(data[col]))
                dtype = data[col].cat.codes.dtype
                if dtype == np.int64:
                    raise ValueError('It is not possible to export '
                                     'int64-based categorical data to Stata.')
                values = data[col].cat.codes.values.copy()

                # Upcast if needed so that correct missing values can be set
                if values.max() >= get_base_missing_value(dtype):
                    if dtype == np.int8:
                        dtype = np.int16
                    elif dtype == np.int16:
                        dtype = np.int32
                    else:
                        dtype = np.float64
                    values = np.array(values, dtype=dtype)

                # Replace missing values with Stata missing value for type
                values[values == -1] = get_base_missing_value(dtype)
                data_formatted.append((col, values))
            else:
                data_formatted.append((col, data[col]))
        return DataFrame.from_dict(OrderedDict(data_formatted)) 
Example #4
Source File: test_stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def test_categorical_order(self, file):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c',
                                       'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd',
                                       'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', [
                     'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', [
                     'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [
                     1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
                     np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_dict(OrderedDict(cols))

        # Read with and with out categoricals, ensure order is identical
        file = getattr(self, file)
        parsed = read_stata(file)
        tm.assert_frame_equal(expected, parsed, check_categorical=False)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed[col].cat.categories) 
Example #5
Source File: stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _do_convert_categoricals(self, data, value_label_dict, lbllist,
                                 order_categoricals):
        """
        Converts categorical columns to Categorical type.
        """
        value_labels = list(compat.iterkeys(value_label_dict))
        cat_converted_data = []
        for col, label in zip(data, lbllist):
            if label in value_labels:
                # Explicit call with ordered=True
                cat_data = Categorical(data[col], ordered=order_categoricals)
                categories = []
                for category in cat_data.categories:
                    if category in value_label_dict[label]:
                        categories.append(value_label_dict[label][category])
                    else:
                        categories.append(category)  # Partially labeled
                try:
                    cat_data.categories = categories
                except ValueError:
                    vc = Series(categories).value_counts()
                    repeats = list(vc.index[vc > 1])
                    repeats = '\n' + '-' * 80 + '\n'.join(repeats)
                    msg = 'Value labels for column {0} are not unique. The ' \
                          'repeated labels are:\n{1}'.format(col, repeats)
                    raise ValueError(msg)
                # TODO: is the next line needed above in the data(...) method?
                cat_data = Series(cat_data, index=data.index)
                cat_converted_data.append((col, cat_data))
            else:
                cat_converted_data.append((col, data[col]))
        data = DataFrame.from_dict(OrderedDict(cat_converted_data))
        return data 
Example #6
Source File: stata.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _prepare_categoricals(self, data):
        """Check for categorical columns, retain categorical information for
        Stata file and convert categorical data to int"""

        is_cat = [is_categorical_dtype(data[col]) for col in data]
        self._is_col_cat = is_cat
        self._value_labels = []
        if not any(is_cat):
            return data

        get_base_missing_value = StataMissingValue.get_base_missing_value
        data_formatted = []
        for col, col_is_cat in zip(data, is_cat):
            if col_is_cat:
                self._value_labels.append(StataValueLabel(data[col]))
                dtype = data[col].cat.codes.dtype
                if dtype == np.int64:
                    raise ValueError('It is not possible to export '
                                     'int64-based categorical data to Stata.')
                values = data[col].cat.codes.values.copy()

                # Upcast if needed so that correct missing values can be set
                if values.max() >= get_base_missing_value(dtype):
                    if dtype == np.int8:
                        dtype = np.int16
                    elif dtype == np.int16:
                        dtype = np.int32
                    else:
                        dtype = np.float64
                    values = np.array(values, dtype=dtype)

                # Replace missing values with Stata missing value for type
                values[values == -1] = get_base_missing_value(dtype)
                data_formatted.append((col, values))
            else:
                data_formatted.append((col, data[col]))
        return DataFrame.from_dict(OrderedDict(data_formatted)) 
Example #7
Source File: categorical.py    From Computable with MIT License 5 votes vote down vote up
def describe(self):
        """
        Returns a dataframe with frequency and counts by level.
        """
        # Hack?
        from pandas.core.frame import DataFrame
        grouped = DataFrame(self.labels).groupby(0)
        counts = grouped.count().values.squeeze()
        freqs = counts / float(counts.sum())
        return DataFrame.from_dict({
            'counts': counts,
            'freqs': freqs,
            'levels': self.levels
        }).set_index('levels') 
Example #8
Source File: test_stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def test_categorical_order(self, file):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c',
                                       'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd',
                                       'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', [
                     'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', [
                     'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [
                     1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
                     np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_dict(OrderedDict(cols))

        # Read with and with out categoricals, ensure order is identical
        file = getattr(self, file)
        parsed = read_stata(file)
        tm.assert_frame_equal(expected, parsed, check_categorical=False)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed[col].cat.categories) 
Example #9
Source File: stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _do_convert_categoricals(self, data, value_label_dict, lbllist,
                                 order_categoricals):
        """
        Converts categorical columns to Categorical type.
        """
        value_labels = list(compat.iterkeys(value_label_dict))
        cat_converted_data = []
        for col, label in zip(data, lbllist):
            if label in value_labels:
                # Explicit call with ordered=True
                cat_data = Categorical(data[col], ordered=order_categoricals)
                categories = []
                for category in cat_data.categories:
                    if category in value_label_dict[label]:
                        categories.append(value_label_dict[label][category])
                    else:
                        categories.append(category)  # Partially labeled
                try:
                    cat_data.categories = categories
                except ValueError:
                    vc = Series(categories).value_counts()
                    repeats = list(vc.index[vc > 1])
                    repeats = '\n' + '-' * 80 + '\n'.join(repeats)
                    raise ValueError('Value labels for column {col} are not '
                                     'unique. The repeated labels are:\n'
                                     '{repeats}'
                                     .format(col=col, repeats=repeats))
                # TODO: is the next line needed above in the data(...) method?
                cat_data = Series(cat_data, index=data.index)
                cat_converted_data.append((col, cat_data))
            else:
                cat_converted_data.append((col, data[col]))
        data = DataFrame.from_dict(OrderedDict(cat_converted_data))
        return data 
Example #10
Source File: stata.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 5 votes vote down vote up
def _prepare_categoricals(self, data):
        """Check for categorical columns, retain categorical information for
        Stata file and convert categorical data to int"""

        is_cat = [is_categorical_dtype(data[col]) for col in data]
        self._is_col_cat = is_cat
        self._value_labels = []
        if not any(is_cat):
            return data

        get_base_missing_value = StataMissingValue.get_base_missing_value
        data_formatted = []
        for col, col_is_cat in zip(data, is_cat):
            if col_is_cat:
                self._value_labels.append(StataValueLabel(data[col]))
                dtype = data[col].cat.codes.dtype
                if dtype == np.int64:
                    raise ValueError('It is not possible to export '
                                     'int64-based categorical data to Stata.')
                values = data[col].cat.codes.values.copy()

                # Upcast if needed so that correct missing values can be set
                if values.max() >= get_base_missing_value(dtype):
                    if dtype == np.int8:
                        dtype = np.int16
                    elif dtype == np.int16:
                        dtype = np.int32
                    else:
                        dtype = np.float64
                    values = np.array(values, dtype=dtype)

                # Replace missing values with Stata missing value for type
                values[values == -1] = get_base_missing_value(dtype)
                data_formatted.append((col, values))
            else:
                data_formatted.append((col, data[col]))
        return DataFrame.from_dict(OrderedDict(data_formatted)) 
Example #11
Source File: pandas2ri.py    From rpy2 with GNU General Public License v2.0 5 votes vote down vote up
def rpy2py_dataframe(obj):
    items = OrderedDict((k, rpy2py(v) if isinstance(v, Sexp) else v)
                        for k, v in obj.items())
    res = PandasDataFrame.from_dict(items)
    res.index = obj.rownames
    return res 
Example #12
Source File: test_stata.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_categorical_order(self, file):
        # Directly construct using expected codes
        # Format is is_cat, col_name, labels (in order), underlying data
        expected = [(True, 'ordered', ['a', 'b', 'c', 'd', 'e'], np.arange(5)),
                    (True, 'reverse', ['a', 'b', 'c',
                                       'd', 'e'], np.arange(5)[::-1]),
                    (True, 'noorder', ['a', 'b', 'c', 'd',
                                       'e'], np.array([2, 1, 4, 0, 3])),
                    (True, 'floating', [
                     'a', 'b', 'c', 'd', 'e'], np.arange(0, 5)),
                    (True, 'float_missing', [
                     'a', 'd', 'e'], np.array([0, 1, 2, -1, -1])),
                    (False, 'nolabel', [
                     1.0, 2.0, 3.0, 4.0, 5.0], np.arange(5)),
                    (True, 'int32_mixed', ['d', 2, 'e', 'b', 'a'],
                     np.arange(5))]
        cols = []
        for is_cat, col, labels, codes in expected:
            if is_cat:
                cols.append((col, pd.Categorical.from_codes(codes, labels)))
            else:
                cols.append((col, pd.Series(labels, dtype=np.float32)))
        expected = DataFrame.from_dict(OrderedDict(cols))

        # Read with and with out categoricals, ensure order is identical
        file = getattr(self, file)
        parsed = read_stata(file)
        tm.assert_frame_equal(expected, parsed, check_categorical=False)

        # Check identity of codes
        for col in expected:
            if is_categorical_dtype(expected[col]):
                tm.assert_series_equal(expected[col].cat.codes,
                                       parsed[col].cat.codes)
                tm.assert_index_equal(expected[col].cat.categories,
                                      parsed[col].cat.categories)