Python pandas.ExcelFile() Examples

The following are 30 code examples of pandas.ExcelFile(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module pandas , or try the search function .
Example #1
Source File: test_excel.py    From Computable with MIT License 7 votes vote down vote up
def test_parse_cols_int(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['xls', 'xlsx', 'xlsm']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1', index_col=0, parse_dates=True,
                           parse_cols=3)
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['A', 'B', 'C'])
            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
                            parse_dates=True, parse_cols=3)
            # TODO add index to xls file)
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False) 
Example #2
Source File: simu_agent.py    From OpenData with Apache License 2.0 7 votes vote down vote up
def get_data(self, index):
        prog_cod = index_map[index]
        url = "https://www.barclayhedge.com/cgi-bin/barclay_stats/ghsndx.cgi"
        param = {
            'dump': 'excel',
            'prog_cod': prog_cod,
            }
        response = self.do_request(url, param=param, method='POST', type='binary')
        if response is not None:
            excel = pd.ExcelFile(io.BytesIO(response))
            df = excel.parse('Sheet1').dropna(how='all').copy().reset_index().drop(0)
            df.columns = ['year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec', 'YTD']
            df = df.set_index('year')
            return df, ''

        return None, "获取数据失败" 
Example #3
Source File: own_tech.py    From MultipleFactorRiskModel with MIT License 6 votes vote down vote up
def load_file(file_name, sheet_name_list):
    '''
    load xlsx file into a dictionary indexed by sheet names
    :param string file_name:name of file
    :param [string] sheet_name_list: name of selected sheets in the xlsx file
    :return: {string:DataFrame} raw_data: {name of sheet:pure data retrieved from xlsx
    with column and index 0,1,2,...}
    '''
    print 'loading file...'
    cut_head = 2
    file = pd.ExcelFile(file_name)
    raw_data = {}
    # iterate over every sheet and retrieve useful data into raw_data
    for i in range(len(sheet_name_list)):
        print 'parsing sheet', sheet_name_list[i]
        # parse a sheet from the whole file into a DataFrame with headers cut off
        temp = file.parse(sheet_name_list[i]).iloc[cut_head:, :]
        # now temp.dtype = object,because the data read in contains string.Here convert it to float
        temp = temp.astype(np.float)
        # reset index and column with 0,1,2,...,
        temp.columns = range(temp.shape[1])
        temp.index = range(temp.shape[0])
        temp.fillna(0, inplace=True)
        raw_data[sheet_name_list[i]] = temp
    return raw_data 
Example #4
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_parse_cols_list(self):
        _skip_if_no_openpyxl()
        _skip_if_no_xlrd()

        suffix = ['xls', 'xlsx', 'xlsm']

        for s in suffix:
            pth = os.path.join(self.dirpath, 'test.%s' % s)
            xls = ExcelFile(pth)
            df = xls.parse('Sheet1', index_col=0, parse_dates=True,
                           parse_cols=[0, 2, 3])
            df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
            df2 = df2.reindex(columns=['B', 'C'])
            df3 = xls.parse('Sheet2', skiprows=[1], index_col=0,
                            parse_dates=True,
                            parse_cols=[0, 2, 3])
            # TODO add index to xls file)
            tm.assert_frame_equal(df, df2, check_names=False)
            tm.assert_frame_equal(df3, df2, check_names=False) 
Example #5
Source File: xls2json.py    From indy-ssivc-tutorial with Apache License 2.0 6 votes vote down vote up
def xls2csv(inputfile):
    data_xls = pd.ExcelFile(inputfile)
    # print data_xls.sheet_names
    # print len(data_xls.sheet_names)
    csvs = []
    for tab in data_xls.sheet_names:
        if ".csv" in tab:
            # Get the names of the columns
            column_list = []
            df_column = pd.read_excel(data_xls, tab).columns
            for i in df_column:
                column_list.append(i)
            # Create a converter so column data are all strings
            converter = {col: str for col in column_list}
            # Read the excel tabs
            tab_xls =  pd.read_excel(data_xls, tab, index_col=None, converters=converter)
            tab_xls.to_csv(tab, encoding='utf-8', index=False)
            csvs.append(tab)
    return csvs 
Example #6
Source File: plexos.py    From GridCal with GNU General Public License v3.0 6 votes vote down vote up
def parse_excel(fname):
        """
        Parse excel export of the plexos file
        :param fname: complete path to the file
        """
        excel = pd.ExcelFile(fname)

        print('Reading objects...')
        objects = excel.parse(sheet_name='Objects')

        print('Reading Memberships...')
        memberships = excel.parse(sheet_name='Memberships')

        print('Reading Properties...')
        properties = excel.parse(sheet_name='Properties')
        properties.rename(columns={'filename': 'path'})
        excel.close()

        # file_dict = {row['child_object']: row['filename'] for i, row in properties.iterrows()}

        return objects, memberships, properties 
Example #7
Source File: xlsx_compare_test.py    From xgbfir with MIT License 6 votes vote down vote up
def _compare_xlsx(self, file1, file2, rtol=1e-02, atol=1e-03):
#        print("requested compare: {} and {}".format(file1, file2))
        xl1 = pd.ExcelFile(file1)
        xl2 = pd.ExcelFile(file2)
        self.assertEqual(xl1.sheet_names, xl2.sheet_names)
        
        for sheet in xl1.sheet_names:
#            print("Prrocessing sheet {}".format(sheet))
            df1 = xl1.parse(sheet)
            df2 = xl2.parse(sheet)
            columns1 = list(df1)
            columns2 = list(df2)
            self.assertEqual(len(columns1), len(columns2))
            arr1 = df1.values
            arr2 = df2.values

            self.assertEqual(arr1.shape, arr2.shape)
            for x, y in np.ndindex(arr1.shape):
                v1 = arr1[x, y]
                v2 = arr2[x, y]
#                print("{}: ({}, {}): {} vs {}".format(sheet, x, y, v1, v2))
                if isinstance(v1, six.string_types) or isinstance(v2, six.string_types):
                    self.assertEqual(v1, v2)
                else:
                    npt.assert_allclose(v1, v2, rtol=rtol, atol=atol) 
Example #8
Source File: parseundp.py    From Semantic-Search-for-Sustainable-Development with Apache License 2.0 6 votes vote down vote up
def extract_template_data(path = '.', exclude = []):
    data = []
    found = False
    for file in os.listdir(path):
        if file[-4:] == 'xlsx' and '~' not in file and file not in exclude:
            file_path = os.path.join(path, file)
            xls = pd.ExcelFile(file_path)
            for sheet in range(len(xls.sheet_names)):
                template = pd.read_excel(file_path, header = None, sheetname = sheet)
                template.fillna('', inplace = True)
                for col in range(len(template.columns)):
                    try:
                        if 'Identify closest' in str(template.iloc[0][col]) or 'Identify closest' in str(template.iloc[1][col]):
                            keep = col
                            found = True
                        elif 'National Development Plan' in str(template.iloc[1][col]) and not found:
                            keep = col
                            found = True
                    except:
                        continue
                if found:
                    data.append(template[[template.keys()[1], template.keys()[keep]]])
                found = False

    return data 
Example #9
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_excel_table(self):
        _skip_if_no_xlrd()

        pth = os.path.join(self.dirpath, 'test.xls')
        xls = ExcelFile(pth)
        df = xls.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xls.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse('Sheet1', index_col=0, parse_dates=True,
                        skipfooter=1)
        df5 = xls.parse('Sheet1', index_col=0, parse_dates=True,
                        skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5) 
Example #10
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_xlsx_table(self):
        _skip_if_no_xlrd()
        _skip_if_no_openpyxl()

        pth = os.path.join(self.dirpath, 'test.xlsx')
        xlsx = ExcelFile(pth)
        df = xlsx.parse('Sheet1', index_col=0, parse_dates=True)
        df2 = self.read_csv(self.csv1, index_col=0, parse_dates=True)
        df3 = xlsx.parse('Sheet2', skiprows=[1], index_col=0, parse_dates=True)

        # TODO add index to xlsx file
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
                         skipfooter=1)
        df5 = xlsx.parse('Sheet1', index_col=0, parse_dates=True,
                         skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5) 
Example #11
Source File: eia861.py    From pudl with MIT License 6 votes vote down vote up
def get_xlsx_dict(self, years, file_name):
        """Read in Excel files to create Excel objects.

        Rather than reading in the same Excel files several times, we can just
        read them each in once (one per year) and use the ExcelFile object to
        refer back to the data in memory.

        Args:
            years (list): The years that we're trying to read data for.
            file_name (str): Name of the excel file.

        """
        for yr in years:
            try:
                self.xlsx_dict[yr]
                logger.info(f"we already have an xlsx file for {yr}")
            except KeyError:
                logger.info(
                    f"Extracting data from {self.dataset_name} {file_name} spreadsheet for {yr}.")
                self.xlsx_dict[yr] = pd.ExcelFile(
                    self.get_file(yr, file_name)
                ) 
Example #12
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_sheets(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # Test writing to separate sheets
            writer = ExcelWriter(path)
            self.frame.to_excel(writer, 'test1')
            self.tsframe.to_excel(writer, 'test2')
            writer.save()
            reader = ExcelFile(path)
            recons = reader.parse('test1', index_col=0)
            tm.assert_frame_equal(self.frame, recons)
            recons = reader.parse('test2', index_col=0)
            tm.assert_frame_equal(self.tsframe, recons)
            np.testing.assert_equal(2, len(reader.sheet_names))
            np.testing.assert_equal('test1', reader.sheet_names[0])
            np.testing.assert_equal('test2', reader.sheet_names[1]) 
Example #13
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_colaliases(self):
        _skip_if_no_xlrd()

        with ensure_clean(self.ext) as path:
            self.frame['A'][:5] = nan

            self.frame.to_excel(path, 'test1')
            self.frame.to_excel(path, 'test1', cols=['A', 'B'])
            self.frame.to_excel(path, 'test1', header=False)
            self.frame.to_excel(path, 'test1', index=False)

            # column aliases
            col_aliases = Index(['AA', 'X', 'Y', 'Z'])
            self.frame2.to_excel(path, 'test1', header=col_aliases)
            reader = ExcelFile(path)
            rs = reader.parse('test1', index_col=0)
            xp = self.frame2.copy()
            xp.columns = col_aliases
            tm.assert_frame_equal(xp, rs) 
Example #14
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_excel_roundtrip_indexname(self):
        _skip_if_no_xlrd()

        df = DataFrame(np.random.randn(10, 4))
        df.index.name = 'foo'

        with ensure_clean(self.ext) as path:
            df.to_excel(path, merge_cells=self.merge_cells)

            xf = ExcelFile(path)
            result = xf.parse(xf.sheet_names[0],
                              index_col=0,
                              has_index_names=self.merge_cells)

            tm.assert_frame_equal(result, df)
            self.assertEqual(result.index.name, 'foo') 
Example #15
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_to_excel_multiindex(self):
        _skip_if_no_xlrd()

        frame = self.frame
        arrays = np.arange(len(frame.index) * 2).reshape(2, -1)
        new_index = MultiIndex.from_arrays(arrays,
                                           names=['first', 'second'])
        frame.index = new_index

        with ensure_clean(self.ext) as path:
            frame.to_excel(path, 'test1', header=False)
            frame.to_excel(path, 'test1', cols=['A', 'B'])

            # round trip
            frame.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            df = reader.parse('test1', index_col=[0, 1],
                              parse_dates=False,
                              has_index_names=self.merge_cells)
            tm.assert_frame_equal(frame, df)
            self.assertEqual(frame.index.names, df.index.names) 
Example #16
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_int_types(self):
        _skip_if_no_xlrd()

        for np_type in (np.int8, np.int16, np.int32, np.int64):

            with ensure_clean(self.ext) as path:
                # Test np.int values read come back as int (rather than float
                # which is Excel's format).
                frame = DataFrame(np.random.randint(-10, 10, size=(10, 2)),
                                  dtype=np_type)
                frame.to_excel(path, 'test1')
                reader = ExcelFile(path)
                recons = reader.parse('test1')
                int_frame = frame.astype(np.int64)
                tm.assert_frame_equal(int_frame, recons)
                recons2 = read_excel(path, 'test1')
                tm.assert_frame_equal(int_frame, recons2)

                # test with convert_float=False comes back as float
                float_frame = frame.astype(float)
                recons = read_excel(path, 'test1', convert_float=False)
                tm.assert_frame_equal(recons, float_frame) 
Example #17
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_to_excel_multiindex_dates(self):
        _skip_if_no_xlrd()

        # try multiindex with dates
        tsframe = self.tsframe.copy()
        new_index = [tsframe.index, np.arange(len(tsframe.index))]
        tsframe.index = MultiIndex.from_arrays(new_index)

        with ensure_clean(self.ext) as path:
            tsframe.index.names = ['time', 'foo']
            tsframe.to_excel(path, 'test1', merge_cells=self.merge_cells)
            reader = ExcelFile(path)
            recons = reader.parse('test1',
                                  index_col=[0, 1],
                                  has_index_names=self.merge_cells)

            tm.assert_frame_equal(tsframe, recons)
            self.assertEquals(recons.index.names, ('time', 'foo')) 
Example #18
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_to_excel_multiindex_no_write_index(self):
        _skip_if_no_xlrd()

        # Test writing and re-reading a MI witout the index. GH 5616.

        # Initial non-MI frame.
        frame1 = pd.DataFrame({'a': [10, 20], 'b': [30, 40], 'c': [50, 60]})

        # Add a MI.
        frame2 = frame1.copy()
        multi_index = pd.MultiIndex.from_tuples([(70, 80), (90, 100)])
        frame2.index = multi_index

        with ensure_clean(self.ext) as path:

            # Write out to Excel without the index.
            frame2.to_excel(path, 'test1', index=False)

            # Read it back in.
            reader = ExcelFile(path)
            frame3 = reader.parse('test1')

            # Test that it is the same as the initial frame.
            tm.assert_frame_equal(frame1, frame3) 
Example #19
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def check_excel_table_sheet_by_index(self, filename, csvfile):
        import xlrd

        pth = os.path.join(self.dirpath, filename)
        xls = ExcelFile(pth)
        df = xls.parse(0, index_col=0, parse_dates=True)
        df2 = self.read_csv(csvfile, index_col=0, parse_dates=True)
        df3 = xls.parse(1, skiprows=[1], index_col=0, parse_dates=True)
        tm.assert_frame_equal(df, df2, check_names=False)
        tm.assert_frame_equal(df3, df2, check_names=False)

        df4 = xls.parse(0, index_col=0, parse_dates=True, skipfooter=1)
        df5 = xls.parse(0, index_col=0, parse_dates=True, skip_footer=1)
        tm.assert_frame_equal(df4, df.ix[:-1])
        tm.assert_frame_equal(df4, df5)

        self.assertRaises(xlrd.XLRDError, xls.parse, 'asdf') 
Example #20
Source File: test_excel.py    From Computable with MIT License 6 votes vote down vote up
def test_to_excel_float_format(self):
        _skip_if_no_xlrd()

        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                        index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean(self.ext) as filename:
            df.to_excel(filename, 'test1', float_format='%.2f')

            reader = ExcelFile(filename)
            rs = reader.parse('test1', index_col=None)
            xp = DataFrame([[0.12, 0.23, 0.57],
                            [12.32, 123123.20, 321321.20]],
                            index=['A', 'B'], columns=['X', 'Y', 'Z'])
            tm.assert_frame_equal(rs, xp) 
Example #21
Source File: test_check_object.py    From pythonwhat with GNU Affero General Public License v3.0 5 votes vote down vote up
def test_manual_converter():
    res = helper.run(
        {
            "DC_CODE": "xl = pd.ExcelFile('battledeath2.xlsx')",
            "DC_SOLUTION": "xl = pd.ExcelFile('battledeath.xlsx')",
            "DC_PEC": "import pandas as pd; from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath.xlsx'); from urllib.request import urlretrieve; urlretrieve('https://s3.amazonaws.com/assets.datacamp.com/production/course_998/datasets/battledeath.xlsx', 'battledeath2.xlsx')",
            "DC_SCT": """
def my_converter(x): return(x.sheet_names)
set_converter(key = "pandas.io.excel.ExcelFile", fundef = my_converter)
Ex().check_object('xl').has_equal_value()
""",
        }
    )
    assert res["correct"] 
Example #22
Source File: excel.py    From pudl with MIT License 5 votes vote down vote up
def _load_excel_file(self, year, page):
        """Returns ExcelFile object corresponding to given (year, page).

        Additionally, loaded files are stored under self._file_cache for reuse.
        """
        full_path = self._get_file_path(year, page)
        if full_path not in self._file_cache:
            logger.info(
                f'{self._dataset_name}: Loading excel file {full_path}')
            self._file_cache[full_path] = pd.ExcelFile(full_path)
        return self._file_cache[full_path] 
Example #23
Source File: utils.py    From pyam with Apache License 2.0 5 votes vote down vote up
def read_pandas(path, default_sheet='data', *args, **kwargs):
    """Read a file and return a pandas.DataFrame"""
    if path.endswith('csv'):
        df = pd.read_csv(path, *args, **kwargs)
    else:
        xl = pd.ExcelFile(path)
        if len(xl.sheet_names) > 1 and 'sheet_name' not in kwargs:
            kwargs['sheet_name'] = default_sheet
        df = pd.read_excel(path, *args, **kwargs)
    return df 
Example #24
Source File: core.py    From pyam with Apache License 2.0 5 votes vote down vote up
def _init(self, data, **kwargs):
        """Process data and set attributes for new instance"""
        # import data from pd.DataFrame or read from source
        if isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
            _data = format_data(data.copy(), **kwargs)
        elif has_ix and isinstance(data, ixmp.TimeSeries):
            _data = read_ix(data, **kwargs)
        else:
            logger.info('Reading file `{}`'.format(data))
            _data = read_file(data, **kwargs)

        self.data, self.time_col, self.extra_cols = _data
        # cast time_col to desired format
        if self.time_col == 'year':
            self._format_year_col()
        elif self.time_col == 'time':
            self._format_datetime_col()

        self._LONG_IDX = IAMC_IDX + [self.time_col] + self.extra_cols

        # define `meta` dataframe for categorization & quantitative indicators
        self.meta = self.data[META_IDX].drop_duplicates().set_index(META_IDX)
        self.reset_exclude()

        # if initializing from xlsx, try to load `meta` table from file
        meta_sheet = kwargs.get('meta_sheet_name', 'meta')
        if isstr(data) and data.endswith('.xlsx') and meta_sheet is not False\
                and meta_sheet in pd.ExcelFile(data).sheet_names:
            self.load_meta(data, sheet_name=meta_sheet)

        # execute user-defined code
        if 'exec' in run_control():
            self._execute_run_control() 
Example #25
Source File: base.py    From openscm with GNU Affero General Public License v3.0 5 votes vote down vote up
def _read_pandas(fname: str, *args: Any, **kwargs: Any) -> pd.DataFrame:
    """
    Read a file and return a :class:`pd.DataFrame`.

    Parameters
    ----------
    fname
        Path from which to read data
    *args
        Passed to :func:`pd.read_csv` if :obj:`fname` ends with '.csv', otherwise passed
        to :func:`pd.read_excel`.
    **kwargs
        Passed to :func:`pd.read_csv` if :obj:`fname` ends with '.csv', otherwise passed
        to :func:`pd.read_excel`.

    Returns
    -------
    :obj:`pd.DataFrame`
        Read data

    Raises
    ------
    OSError
        Path specified by :obj:`fname` does not exist
    """
    if not os.path.exists(fname):
        raise OSError("no data file `{}` found!".format(fname))
    if fname.endswith("csv"):
        df = pd.read_csv(fname, *args, **kwargs)
    else:
        xl = pd.ExcelFile(fname)
        if len(xl.sheet_names) > 1 and "sheet_name" not in kwargs:
            kwargs["sheet_name"] = "data"
        df = pd.read_excel(fname, *args, **kwargs)

    return df


# pylint doesn't recognise return statements if they include ',' 
Example #26
Source File: databases.py    From CityEnergyAnalyst with MIT License 5 votes vote down vote up
def database_to_dict(db_path):
    out = OrderedDict()
    xls = pd.ExcelFile(db_path)
    for sheet in xls.sheet_names:
        df = xls.parse(sheet, keep_default_na=False)
        out[sheet] = df.to_dict(orient='records', into=OrderedDict)
    return out 
Example #27
Source File: DyStockDataGateway.py    From DevilYuan with MIT License 5 votes vote down vote up
def _getTickDataFrom163(code=None, date=None, retry_count=3, pause=0.001):
        """
            从网易获取分笔数据
            网易的分笔数据只有最近5日的
            接口和返回的DF,保持跟tushare一致
        Parameters
        ------
            code:string
                        股票代码 e.g. 600848
            date:string
                        日期 format:YYYY-MM-DD
            retry_count : int, 默认 3
                        如遇网络等问题重复执行的次数
            pause : int, 默认 0
                        重复请求数据过程中暂停的秒数,防止请求间隔时间太短出现的问题
            return
            -------
            DataFrame 当日所有股票交易数据(DataFrame)
                    属性:成交时间、成交价格、价格变动,成交手、成交金额(元),买卖类型
        """
        if code is None or len(code)!=6 or date is None:
            return None
        symbol = DyStockDataTicksGateway._codeTo163Symbol(code)
        yyyy, mm, dd = date.split('-')
        for _ in range(retry_count):
            sleep(pause)
            try:
                url = 'http://quotes.money.163.com/cjmx/{0}/{1}/{2}.xls'.format(yyyy, yyyy+mm+dd, symbol)
                socket = urlopen(url)
                xd = pd.ExcelFile(socket)
                df = xd.parse(xd.sheet_names[0], names=['time', 'price', 'change', 'volume', 'amount', 'type'])
                df['amount'] = df['amount'].astype('int64') # keep same as tushare
            except Exception as e:
                print(e)
                ex = e
            else:
                return df
        raise ex 
Example #28
Source File: Options Straddle backtest.py    From quant-trading with Apache License 2.0 5 votes vote down vote up
def main():
    
    data=pd.ExcelFile('stoxx50.xlsx')
    
    aug=data.parse('aug')
    aug.set_index('Dates',inplace=True)
    aug.index=pd.to_datetime(aug.index)
    
    spot=data.parse('spot')
    spot.set_index('Dates',inplace=True)
    spot.index=pd.to_datetime(spot.index)
    
    target=find_strike_price(aug)
    
    #we iterate through all the available option pairs
    #to find the optimal strike price to maximize our profit
    
    for strikeprice in target:
      
        df=straddle(aug,spot,contractsize,strikeprice)
        
        signal=signal_generation(df,threshold)
        
        plot(signal,strikeprice,contractsize)


# In[8]: 
Example #29
Source File: generate_xhale.py    From sharpy with BSD 3-Clause "New" or "Revised" License 5 votes vote down vote up
def read_aero_data(filename='inputs/aero_properties.xlsx'):
        import pandas as pd

        xl = pd.ExcelFile(filename)
        sheets = {sheet_name: xl.parse(sheet_name, header=0, index_col=0) for sheet_name in xl.sheet_names}

        aero_data = dict()
        for sheet, val in sheets.items():
            aero_data[sheet] = dict()
            for item in val['value'].items():
                aero_data[sheet][item[0]] = item[1]

        return aero_data 
Example #30
Source File: eia861.py    From pudl with MIT License 5 votes vote down vote up
def get_path_name(self, yr, file_name):
        """Get the ExcelFile file path name."""
        return self.get_meta('file_name_map', None).loc[yr, file_name]