Python pandas.DataFrame.from_records() Examples

The following are 30 code examples of pandas.DataFrame.from_records(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas.DataFrame , or try the search function .
Example #1
Source File: test_foreign.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_datetime_roundtrip():
    dta = np.array([(1, datetime(2010, 1, 1), 2),
                    (2, datetime(2010, 2, 1), 3),
                    (4, datetime(2010, 3, 1), 5)],
                    dtype=[('var1', float), ('var2', object), ('var3', float)])
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2" : "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    assert_equal(dta, dta2)

    dta = DataFrame.from_records(dta)
    buf = BytesIO()
    writer = StataWriter(buf, dta, {"var2" : "tm"})
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf, pandas=True)
    ptesting.assert_frame_equal(dta, dta2.drop('index', axis=1)) 
Example #2
Source File: test.py    From realtime_talib with MIT License 6 votes vote down vote up
def getCryptoHistoricalData(self, symbol, endTime, histPeriod, vwap=False):
		endTimeUNIX = dateToUNIX(endTime)
		startDate = getCurrentDateStr()
		priorDate = datetimeDiff(startDate, histPeriod)
		gdaxTicker = GDAX_TO_POLONIEX[symbol]

		stDateUNIX = dateToUNIX(priorDate)
		eDateUNIX = dateToUNIX(startDate)
		poloniexJsonURL = self.POLO_HIST_DATA.format(gdaxTicker, stDateUNIX, eDateUNIX, self.interval)

		import json
		import requests
		poloniexJson = requests.get(poloniexJsonURL).json()

		from pandas import DataFrame
		histDataframe = DataFrame.from_records(poloniexJson)
		histDataframe.drop('quoteVolume', axis=1, inplace=True)
		histDataframe.drop('weightedAverage', axis=1, inplace=True)
		histDataframe['date'] = histDataframe['date'].astype(float)

		return histDataframe[["date", "open", "high", "low", "close", "volume"]] 
Example #3
Source File: Xueqiu.py    From dHydra with Apache License 2.0 6 votes vote down vote up
def get_stocks(
        self,
        stockTypeList=['sha', 'shb', 'sza', 'szb'],
        columns=CONST_XUEQIU_QUOTE_ORDER_COLUMN
    ):
        for stockType in stockTypeList:
            print("正在从雪球获取:{}".format(EX_NAME[stockType]))
            page = 1
            while True:
                response = self.session.get(
                    URL_XUEQIU_QUOTE_ORDER(page, columns, stockType),
                    headers=HEADERS_XUEQIU
                ).json()
                df = DataFrame.from_records(
                    response["data"], columns=response["column"])
                if 'stocks' not in locals().keys():
                    stocks = df
                else:
                    stocks = stocks.append(df)
                if df.size == 0:
                    break
                page += 1
        return stocks 
Example #4
Source File: Xueqiu.py    From dHydra with Apache License 2.0 6 votes vote down vote up
def get_quotation(self, symbol=None, symbolSet=None, dataframe=True, threadNum=3):
        if 'quotation' in self.__dict__.keys():
            del(self.quotation)
            # Cut symbolList
        symbolList = list(symbolSet)
        threads = []
        symbolListSlice = util.slice_list(num=threadNum, data_list=symbolList)
        for symbolList in symbolListSlice:
            loop = asyncio.new_event_loop()
            symbolsList = util.slice_list(step=50, data_list=symbolList)
            tasks = [self.get_quotation_task(
                symbols=symbols) for symbols in symbolsList]
            t = threading.Thread(target=util.thread_loop, args=(loop, tasks))
            threads.append(t)
        for t in threads:
            t.start()
        for t in threads:
            t.join()

        if dataframe:
            self.quotation = DataFrame.from_records(self.quotation).T
        return(self.quotation) 
Example #5
Source File: test_foreign.py    From vnpy_crypto with MIT License 6 votes vote down vote up
def test_stata_writer_pandas():
    buf = BytesIO()
    dta = macrodata.load().data
    dtype = dta.dtype
    #as of 0.9.0 pandas only supports i8 and f8
    dta = dta.astype(np.dtype([('year', 'i8'),
                               ('quarter', 'i8')] + dtype.descr[2:]))
    dta4 = dta.astype(np.dtype([('year', 'i4'),
                               ('quarter', 'i4')] + dtype.descr[2:]))
    dta = DataFrame.from_records(dta)
    dta4 = DataFrame.from_records(dta4)
    # dta is int64 'i8'  given to Stata writer
    writer = StataWriter(buf, dta)
    writer.write_file()
    buf.seek(0)
    dta2 = genfromdta(buf)
    dta5 = DataFrame.from_records(dta2)
    # dta2 is int32 'i4'  returned from Stata reader

    if dta5.dtypes[1] is np.dtype('int64'):
        ptesting.assert_frame_equal(dta.reset_index(), dta5)
    else:
        # don't check index because it has different size, int32 versus int64
        ptesting.assert_frame_equal(dta4, dta5[dta5.columns[1:]]) 
Example #6
Source File: database.py    From cgat-core with MIT License 6 votes vote down vote up
def fetch_DataFrame(query,
                    dbhandle=None,
                    attach=False):
    '''Fetch query results and returns them as a pandas dataframe'''

    dbhandle = connect(dbhandle, attach=attach)

    cc = dbhandle.cursor()
    sqlresult = cc.execute(query).fetchall()
    cc.close()

    # see http://pandas.pydata.org/pandas-docs/dev/generated/
    # pandas.DataFrame.from_records.html#pandas.DataFrame.from_records
    # this method is design to handle sql_records with proper type
    # conversion

    field_names = [d[0] for d in cc.description]
    pandas_DataFrame = DataFrame.from_records(
        sqlresult,
        columns=field_names)
    return pandas_DataFrame 
Example #7
Source File: python_parser_only.py    From twitter-stock-recommendation with MIT License 6 votes vote down vote up
def test_read_table_buglet_4x_multiindex(self):
        # see gh-6607
        text = """                      A       B       C       D        E
one two three   four
a   b   10.0032 5    -0.5109 -2.3358 -0.4645  0.05076  0.3640
a   q   20      4     0.4473  1.4152  0.2834  1.00661  0.1744
x   q   30      3    -0.6662 -0.5243 -0.3580  0.89145  2.5838"""

        df = self.read_table(StringIO(text), sep=r'\s+')
        assert df.index.names == ('one', 'two', 'three', 'four')

        # see gh-6893
        data = '      A B C\na b c\n1 3 7 0 3 6\n3 1 4 1 5 9'
        expected = DataFrame.from_records(
            [(1, 3, 7, 0, 3, 6), (3, 1, 4, 1, 5, 9)],
            columns=list('abcABC'), index=list('abc'))
        actual = self.read_table(StringIO(data), sep=r'\s+')
        tm.assert_frame_equal(actual, expected) 
Example #8
Source File: test_testing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True) 
Example #9
Source File: test_testing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_multiindex_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True) 
Example #10
Source File: test_testing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1, df2, check_index_type=True) 
Example #11
Source File: util.py    From PyAthenaJDBC with MIT License 5 votes vote down vote up
def as_pandas(cursor, coerce_float=False):
    from pandas import DataFrame

    names = [metadata[0] for metadata in cursor.description]
    return DataFrame.from_records(
        cursor.fetchall(), columns=names, coerce_float=coerce_float
    ) 
Example #12
Source File: test_testing.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_multiindex_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        self._assert_not_equal(df1, df2, check_index_type=True) 
Example #13
Source File: common.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_empty_with_nrows_chunksize(self):
        # see gh-9535
        expected = DataFrame([], columns=['foo', 'bar'])
        result = self.read_csv(StringIO('foo,bar\n'), nrows=10)
        tm.assert_frame_equal(result, expected)

        result = next(iter(self.read_csv(
            StringIO('foo,bar\n'), chunksize=10)))
        tm.assert_frame_equal(result, expected)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = self.read_csv(StringIO('foo,bar\n'),
                                   nrows=10, as_recarray=True)
            result = DataFrame(result[2], columns=result[1],
                               index=result[0])
            tm.assert_frame_equal(DataFrame.from_records(
                result), expected, check_index_type=False)

        with tm.assert_produces_warning(
                FutureWarning, check_stacklevel=False):
            result = next(iter(self.read_csv(StringIO('foo,bar\n'),
                                             chunksize=10, as_recarray=True)))
            result = DataFrame(result[2], columns=result[1], index=result[0])
            tm.assert_frame_equal(DataFrame.from_records(result), expected,
                                  check_index_type=False) 
Example #14
Source File: test_sql.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def _to_sql_save_index(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A'])
        self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
        ix_cols = self._get_index_columns('test_to_sql_saves_index')
        assert ix_cols == [['A', ], ] 
Example #15
Source File: test_combine_concat.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_concat_datetime_datetime64_frame(self):
        # #2624
        rows = []
        rows.append([datetime(2010, 1, 1), 1])
        rows.append([datetime(2010, 1, 2), 'hi'])

        df2_obj = DataFrame.from_records(rows, columns=['date', 'test'])

        ind = date_range(start="2000/1/1", freq="D", periods=10)
        df1 = DataFrame({'date': ind, 'test': lrange(10)})

        # it works!
        pd.concat([df1, df2_obj]) 
Example #16
Source File: test_sql.py    From elasticintel with GNU General Public License v3.0 5 votes vote down vote up
def test_multiindex_roundtrip(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A', 'B'])

        df.to_sql('test_multiindex_roundtrip', self.conn)
        result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
                                    self.conn, index_col=['A', 'B'])
        tm.assert_frame_equal(df, result, check_index_type=True) 
Example #17
Source File: test_taar_locale.py    From python_mozetl with MIT License 5 votes vote down vote up
def addon_counts():
    return DataFrame.from_records(
        SAMPLE_ADDON_COUNTS, columns=["locale", "addon", "count"]
    ) 
Example #18
Source File: numpy.py    From Splunking-Crime with GNU Affero General Public License v3.0 5 votes vote down vote up
def recarray_select(recarray, fields):
    """"
    Work-around for changes in NumPy 1.13 that return views for recarray
    multiple column selection
    """
    from pandas import DataFrame
    fields = [fields] if not isinstance(fields, (tuple, list)) else fields
    if len(fields) == len(recarray.dtype):
        return recarray
    recarray = DataFrame.from_records(recarray)
    return recarray[fields].to_records(index=False) 
Example #19
Source File: Sina.py    From dHydra with Apache License 2.0 5 votes vote down vote up
def get_history_data(self, code, year, season):
        """
        新浪历史复权数据接口
        """
        res = self.session.get(url=URL_HISTORY_DATA(code, year, season))
        if res.status_code == 200:
            pattern_data = r'<div align="center">([\d\.]+)</div>'
            data = re.findall(pattern_data, res.text)
            records = util.slice_list(step=7, data_list=data)
            print(records)
            df = DataFrame.from_records(
                records,
                columns=[
                    'open',
                    'high',
                    'close',
                    'low',
                    'volume',
                    'amount',
                    'restoration_factor'
                ]
            )
            pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})'
            date = re.findall(pattern_date, res.text)
            df["date"] = date
            return df
        else:
            self.logger.debug("Status Code: {}".format(res.status_code))
            return False 
Example #20
Source File: Sina.py    From dHydra with Apache License 2.0 5 votes vote down vote up
def get_history_data(self, code, year, season):
        """
        新浪历史复权数据接口
        """
        res = self.session.get(url=URL_HISTORY_DATA(code, year, season))
        if res.status_code == 200:
            pattern_data = r'<div align="center">([\d\.]+)</div>'
            data = re.findall(pattern_data, res.text)
            records = util.slice_list(step=7, data_list=data)
            print(records)
            df = DataFrame.from_records(
                records,
                columns=[
                    'open',
                    'high',
                    'close',
                    'low',
                    'volume',
                    'amount',
                    'restoration_factor'
                ]
            )
            pattern_date = r'date=([\d]{4}-[\d]{2}-[\d]{2})'
            date = re.findall(pattern_date, res.text)
            df["date"] = date
            return df
        else:
            self.logger.debug("Status Code: {}".format(res.status_code))
            return False 
Example #21
Source File: Xueqiu.py    From dHydra with Apache License 2.0 5 votes vote down vote up
def fetch_quotation_coroutine(self, symbols=None):
        loop = asyncio.get_event_loop()
        if symbols is not None:
            async_req = loop.run_in_executor(
                None,
                functools.partial(
                    self.session.get,
                    URL_XUEQIU_QUOTE(symbols),
                    headers=HEADERS_XUEQIU
                )
            )
            try:
                quotation = yield from async_req
            except Exception as e:
                print(e)
                async_req = loop.run_in_executor(
                    None,
                    functools.partial(
                        self.session.get,
                        URL_XUEQIU_QUOTE(symbols),
                        headers=HEADERS_XUEQIU
                    )
                )
                quotation = yield from async_req
            quotation = quotation.json()
        return(quotation)

    # """
    # 雪球单股基本面数据获取
    # 默认返回值格式是dict,若参数dataframe为True则返回dataframe
    # """
    # def fetch_quotation(self, symbols = None, dataframe = False):
    # 	symbols = util.symbols_to_string(symbols)
    # 	if symbols is not None:
    # 		quotation = self.session.get(
    # 			URL_XUEQIU_QUOTE(symbols)
    # 		,	headers = HEADERS_XUEQIU
    # 		).json()
    # 	if dataframe:
    # 		quotation = DataFrame.from_records( quotation ).T
    # 	return(quotation) 
Example #22
Source File: Xueqiu.py    From dHydra with Apache License 2.0 5 votes vote down vote up
def get_today(self, symbol, period='1day', dataframe=True):
        quotation = self.session.get(
            URL_XUEQIU_CHART(symbol=symbol, period=period),	headers=HEADERS_XUEQIU
        ).json()
        if quotation["success"] == "true":
            if dataframe:
                df = DataFrame.from_records(quotation["chartlist"])
                df["time"] = pandas.to_datetime(df["time"])
                df["time"] += timedelta(hours=8)
                df["symbol"] = symbol
                return df
            else:
                return quotation["chartlist"]
        else:
            return False 
Example #23
Source File: test_timezones.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_frame_from_records_utc(self):
        rec = {'datum': 1.5,
               'begin_time': datetime(2006, 4, 27, tzinfo=pytz.utc)}

        # it works
        DataFrame.from_records([rec], index='begin_time') 
Example #24
Source File: test_combine_concat.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_concat_datetime_datetime64_frame(self):
        # #2624
        rows = []
        rows.append([datetime(2010, 1, 1), 1])
        rows.append([datetime(2010, 1, 2), 'hi'])

        df2_obj = DataFrame.from_records(rows, columns=['date', 'test'])

        ind = date_range(start="2000/1/1", freq="D", periods=10)
        df1 = DataFrame({'date': ind, 'test': lrange(10)})

        # it works!
        pd.concat([df1, df2_obj]) 
Example #25
Source File: test_testing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True) 
Example #26
Source File: test_testing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_multiindex_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'b': [2.1, 1.5],
             'c': ['l1', 'l2']}, index=['a', 'b'])
        self._assert_not_equal(df1.c, df2.c, check_index_type=True) 
Example #27
Source File: test_testing.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_index_dtype(self):
        df1 = DataFrame.from_records(
            {'a': [1, 2], 'c': ['l1', 'l2']}, index=['a'])
        df2 = DataFrame.from_records(
            {'a': [1.0, 2.0], 'c': ['l1', 'l2']}, index=['a'])
        self._assert_not_equal(df1, df2, check_index_type=True) 
Example #28
Source File: test_sql.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def _to_sql_save_index(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A'])
        self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
        ix_cols = self._get_index_columns('test_to_sql_saves_index')
        assert ix_cols == [['A', ], ] 
Example #29
Source File: test_sql.py    From twitter-stock-recommendation with MIT License 5 votes vote down vote up
def test_multiindex_roundtrip(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A', 'B'])

        df.to_sql('test_multiindex_roundtrip', self.conn)
        result = sql.read_sql_query('SELECT * FROM test_multiindex_roundtrip',
                                    self.conn, index_col=['A', 'B'])
        tm.assert_frame_equal(df, result, check_index_type=True) 
Example #30
Source File: test_sql.py    From vnpy_crypto with MIT License 5 votes vote down vote up
def _to_sql_save_index(self):
        df = DataFrame.from_records([(1, 2.1, 'line1'), (2, 1.5, 'line2')],
                                    columns=['A', 'B', 'C'], index=['A'])
        self.pandasSQL.to_sql(df, 'test_to_sql_saves_index')
        ix_cols = self._get_index_columns('test_to_sql_saves_index')
        assert ix_cols == [['A', ], ]