Python openpyxl.load_workbook() Examples

The following are 30 code examples of openpyxl.load_workbook(). You can vote up the ones you like or vote down the ones you don't like, and go to the original project or source file by following the links above each example. You may also want to check out all available functions/classes of the module openpyxl , or try the search function .
Example #1
Source File: file_utils.py    From seqr with GNU Affero General Public License v3.0 8 votes vote down vote up
def parse_file(filename, stream):
    if filename.endswith('.tsv') or filename.endswith('.fam') or filename.endswith('.ped'):
        return [[s.strip().strip('"') for s in line.rstrip('\n').split('\t')] for line in stream]

    elif filename.endswith('.csv'):
        return [row for row in csv.reader(stream)]

    elif filename.endswith('.xls') or filename.endswith('.xlsx'):
        wb = xl.load_workbook(stream, read_only=True)
        ws = wb[wb.sheetnames[0]]
        rows = [[_parse_excel_string_cell(cell) for cell in row] for row in ws.iter_rows()]
        # trim trailing empty rows
        last_row_index = max(i for i, row in enumerate(rows) if any(val for val in row))
        rows = rows[:last_row_index+1]
        # all rows should have same column count
        last_col_index = max(max(i for i, val in enumerate(row) if val) for row in rows)
        padding = [''] * last_col_index
        rows = [(row + padding)[:last_col_index+1] for row in rows]

        return rows

    elif filename.endswith('.json'):
        return json.loads(stream.read())

    raise ValueError("Unexpected file type: {}".format(filename)) 
Example #2
Source File: test_export_builder.py    From kobo-predict with BSD 2-Clause "Simplified" License 7 votes vote down vote up
def test_to_xls_export_respects_custom_field_delimiter(self):
        survey = self._create_childrens_survey()
        export_builder = ExportBuilder()
        export_builder.GROUP_DELIMITER = ExportBuilder.GROUP_DELIMITER_DOT
        export_builder.set_survey(survey)
        xls_file = NamedTemporaryFile(suffix='.xls')
        filename = xls_file.name
        export_builder.to_xls_export(filename, self.data)
        xls_file.seek(0)
        wb = load_workbook(filename)

        # check header columns
        main_sheet = wb.get_sheet_by_name('childrens_survey')
        expected_column_headers = [
            u'name', u'age', u'geo.geolocation', u'geo._geolocation_latitude',
            u'geo._geolocation_longitude', u'geo._geolocation_altitude',
            u'geo._geolocation_precision', u'tel.tel.office',
            u'tel.tel.mobile', u'_id', u'meta.instanceID', u'_uuid',
            u'_submission_time', u'_index', u'_parent_index',
            u'_parent_table_name', u'_tags', '_notes']
        column_headers = [c[0].value for c in main_sheet.columns]
        self.assertEqual(sorted(column_headers),
                         sorted(expected_column_headers))
        xls_file.close() 
Example #3
Source File: kifield.py    From KiField with MIT License 7 votes vote down vote up
def insert_part_fields_into_xlsx(part_fields_dict, filename, recurse, group_components, backup):
    '''Insert the fields in the extracted part dictionary into an XLSX spreadsheet.'''

    logger.log(
        DEBUG_OVERVIEW,
        'Inserting extracted fields into XLSX file {}.'.format(filename))

    if backup:
        create_backup(filename)

    # Either insert fields into an existing workbook, or use an empty one.
    try:
        wb = pyxl.load_workbook(filename, data_only=True)
    except IOError:
        wb = None

    wb = insert_part_fields_into_wb(part_fields_dict, wb)

    if group_components:
        wb = group_wb(wb)

    wb.save(filename) 
Example #4
Source File: cellInverter.py    From automate-the-boring-stuff-projects with MIT License 7 votes vote down vote up
def invertCells(filename):
    """inverts all cells in a workbook
    Args:
        filename (str): excel file to invert cells in
    Returns:
        None
    """
    wb = openpyxl.load_workbook(filename)
    sheet = wb.active
    newSheet = wb.create_sheet(index=0, title='inverted')

    for rowObj in sheet.rows:
        for cellObj in rowObj:
            colIndex = cellObj.column
            rowIndex = cellObj.row

            newSheet.cell(row=colIndex, column=rowIndex).value = cellObj.value

    wb.save('result_'+filename) 
Example #5
Source File: test_excel.py    From recruit with Apache License 2.0 7 votes vote down vote up
def test_write_append_mode(self, merge_cells, ext, engine, mode, expected):
        import openpyxl
        df = DataFrame([1], columns=['baz'])

        with ensure_clean(ext) as f:
            wb = openpyxl.Workbook()
            wb.worksheets[0].title = 'foo'
            wb.worksheets[0]['A1'].value = 'foo'
            wb.create_sheet('bar')
            wb.worksheets[1]['A1'].value = 'bar'
            wb.save(f)

            writer = ExcelWriter(f, engine=engine, mode=mode)
            df.to_excel(writer, sheet_name='baz', index=False)
            writer.save()

            wb2 = openpyxl.load_workbook(f)
            result = [sheet.title for sheet in wb2.worksheets]
            assert result == expected

            for index, cell_value in enumerate(expected):
                assert wb2.worksheets[index]['A1'].value == cell_value 
Example #6
Source File: templated_workbook.py    From openpyxl-templates with MIT License 6 votes vote down vote up
def __init__(self, file=None, template_styles=None, timestamp=None, templated_sheets=None, keep_vba=False,
                  data_only=False, keep_links=True):
        super(TemplatedWorkbook, self).__init__()

        self.workbook = load_workbook(
            filename=file,
            data_only=data_only,
            keep_vba=keep_vba,
            keep_links=keep_links
        ) if file else Workbook()

        self.template_styles = template_styles or DefaultStyleSet()
        self.timestamp = timestamp

        self.templated_sheets = []
        for sheetname, templated_sheet in self._items.items():
            self.add_templated_sheet(templated_sheet, sheetname=sheetname, add_to_self=False)

        for templated_sheet in templated_sheets or []:
            self.add_templated_sheet(sheet=templated_sheet, sheetname=templated_sheet.sheetname, add_to_self=True)

        self._validate() 
Example #7
Source File: test_export_builder.py    From kobo-predict with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_to_xls_export_generates_valid_sheet_names(self):
        survey = create_survey_from_xls(_logger_fixture_path(
            'childrens_survey_with_a_very_long_name.xls'))
        export_builder = ExportBuilder()
        export_builder.set_survey(survey)
        xls_file = NamedTemporaryFile(suffix='.xls')
        filename = xls_file.name
        export_builder.to_xls_export(filename, self.data)
        xls_file.seek(0)
        wb = load_workbook(filename)
        # check that we have childrens_survey, children, children_cartoons
        # and children_cartoons_characters sheets
        expected_sheet_names = ['childrens_survey_with_a_very_lo',
                                'childrens_survey_with_a_very_l1',
                                'childrens_survey_with_a_very_l2',
                                'childrens_survey_with_a_very_l3']
        self.assertEqual(wb.get_sheet_names(), expected_sheet_names)
        xls_file.close() 
Example #8
Source File: test_export_builder.py    From kobo-predict with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_child_record_parent_table_is_updated_when_sheet_is_renamed(self):
        survey = create_survey_from_xls(_logger_fixture_path(
            'childrens_survey_with_a_very_long_name.xls'))
        export_builder = ExportBuilder()
        export_builder.set_survey(survey)
        xls_file = NamedTemporaryFile(suffix='.xlsx')
        filename = xls_file.name
        export_builder.to_xls_export(filename, self.long_survey_data)
        xls_file.seek(0)
        wb = load_workbook(filename)

        # get the children's sheet
        ws1 = wb.get_sheet_by_name('childrens_survey_with_a_very_l1')

        # parent_table is in cell K2
        parent_table_name = ws1.cell('K2').value
        expected_parent_table_name = 'childrens_survey_with_a_very_lo'
        self.assertEqual(parent_table_name, expected_parent_table_name)

        # get cartoons sheet
        ws2 = wb.get_sheet_by_name('childrens_survey_with_a_very_l2')
        parent_table_name = ws2.cell('G2').value
        expected_parent_table_name = 'childrens_survey_with_a_very_l1'
        self.assertEqual(parent_table_name, expected_parent_table_name)
        xls_file.close() 
Example #9
Source File: script_meta_xlsx_import.py    From TorCMS with MIT License 6 votes vote down vote up
def import_meta():
    inws = pathlib.Path('./database/datacn_datasets')
    print(inws)
    # for y in os.listdir(inws):
    #     print(y)
    for cat_path in inws.iterdir():
        print('x' * 40)
        print(cat_path.name)
        if cat_path.name.lower().endswith('.xlsx'):
            print('////')
            pass
        else:
            continue
        wb = load_workbook(str(cat_path))
        sheets = wb.sheetnames
        for sheet in sheets:
            catid = sheet.split('_')[0]
            ws = wb[sheet]
            rows = ws.max_row
            cols= ws.max_column
            for i in range(1,rows):
                sig = ws.cell(row=i, column=1).value
                print(sig)
                if sig:
                    get_meta(catid, sig) 
Example #10
Source File: xls.py    From rekall with GNU General Public License v2.0 6 votes vote down vote up
def __init__(self, output=None, **kwargs):
        super(XLSRenderer, self).__init__(**kwargs)

        # Make a single delegate text renderer for reuse. Most of the time we
        # will just replicate the output from the TextRenderer inside the
        # spreadsheet cell.
        self.delegate_text_renderer = text.TextRenderer(session=self.session)

        self.output = output or self.session.GetParameter("output")

        # If no output filename was give, just make a name based on the time
        # stamp.
        if self.output == None:
            self.output = "%s.xls" % time.ctime()

        try:
            self.wb = openpyxl.load_workbook(self.output)
            self.current_ws = self.wb.create_sheet()
        except IOError:
            self.wb = openpyxl.Workbook()
            self.current_ws = self.wb.active 
Example #11
Source File: test_export_builder.py    From kobo-predict with BSD 2-Clause "Simplified" License 6 votes vote down vote up
def test_xls_export_works_with_unicode(self):
        survey = create_survey_from_xls(_logger_fixture_path(
            'childrens_survey_unicode.xls'))
        export_builder = ExportBuilder()
        export_builder.set_survey(survey)
        temp_xls_file = NamedTemporaryFile(suffix='.xlsx')
        export_builder.to_xls_export(temp_xls_file.name, self.data_utf8)
        temp_xls_file.seek(0)
        # check that values for red\u2019s and blue\u2019s are set to true
        wb = load_workbook(temp_xls_file.name)
        children_sheet = wb.get_sheet_by_name("children.info")
        data = dict([(r[0].value, r[1].value) for r in children_sheet.columns])
        self.assertTrue(data[u'children.info/fav_colors/red\u2019s'])
        self.assertTrue(data[u'children.info/fav_colors/blue\u2019s'])
        self.assertFalse(data[u'children.info/fav_colors/pink\u2019s'])
        temp_xls_file.close() 
Example #12
Source File: Acunetix11-Scan-Agent.py    From fuzzdb-collect with GNU General Public License v3.0 6 votes vote down vote up
def write_xlsx(vulnerabilities):
    print(define.GREEN+"[*]内容正在写入 vu_name:%s"%vulnerabilities['vt_name'])
    wb = ws.load_workbook(define.filename)
    sheet1 = wb['Sheet']
    num = sheet1.max_row
    sheet1.cell(row = num+1,column = 1,value = vulnerabilities['host'])
    sheet1.cell(row = num+1,column = 2,value = vulnerabilities['vt_name'])
    sheet1.cell(row = num+1,column = 3,value = vulnerabilities['severity'])
    sheet1.cell(row = num+1,column = 4,value = vulnerabilities['affects_detail'])
    sheet1.cell(row = num+1,column = 5,value = vulnerabilities['affects_url'])
    sheet1.cell(row = num+1,column = 6,value = vulnerabilities['request'])
    sheet1.cell(row = num+1,column = 7,value = vulnerabilities['recommendation'])
    sheet1.cell(row = num+1,column = 8,value = vulnerabilities['description'])
    sheet1.cell(row = num+1,column = 9,value = vulnerabilities['details'])
    wb.save(define.filename)

#文件创建 
Example #13
Source File: host_import.py    From KubeOperator with Apache License 2.0 6 votes vote down vote up
def _parse_excel_to_hosts(self):
        wb = load_workbook(self.path)
        sheet_names = wb.sheetnames
        for s_name in sheet_names:
            sh = wb[s_name]
            rows = list(sh.rows)
            for row in rows:
                if row[0].row == 1:
                    continue
                else:
                    defaults = {
                        "name": row[0].value,
                        "ip": row[1].value,
                        "port": int(row[2].value),
                        "credential": row[3].value
                    }
                    self.__hosts.append(defaults) 
Example #14
Source File: script_posts_import.py    From TorCMS with MIT License 6 votes vote down vote up
def chli_xlsx(cat_path):
    print('操作中,请等待')
    result_xlsx = 'xx_{}'.format(os.path.split(str(cat_path))[-1])

    wb = load_workbook(cat_path)

    # uid_reg = re.compile('\_kind-\w{1}')

    # kind_pat = re.search(uid_reg, str(cat_path))
    # if kind_pat:
    kind_sig = '9'

    sheets = wb.sheetnames
    for sheet in sheets:

        catid = sheet.split('_')[0]
        ws = wb[sheet]
        rows = ws.max_row
        cols = ws.max_column
        for i in range(1, rows + 1):
            sig = ws.cell(row=i, column=1).value

            if sig:
                atitle = get_meta(catid, sig, kind_sig=kind_sig) 
Example #15
Source File: levelhelper.py    From python-script with Apache License 2.0 6 votes vote down vote up
def excel_to_db(from_excel, to_db):
    '''Transfer Excel file to leveldb, return total count.'''
    _wb = load_workbook(from_excel, read_only=True)
    _ws = _wb.active
    _db = leveldb.LevelDB(to_db, create_if_missing=True) if isinstance(to_db, str) else to_db
    total = 0
    for _row in _ws.iter_rows(min_row=2, min_col=1, max_col=1):
        if _row and _row[0] and _row[1]:
            _key, _value = '', ''
            if _row[0].data_type == cell.Cell.TYPE_STRING:
                _key = _row[0].value.encode('utf-8')
                _key = ''.join(_key.split())
            if _row[1].data_type == cell.Cell.TYPE_STRING:
                _value = _row[0].value.encode('utf-8')
                _value = ''.join(_value.split())
            _db.Put(_key, _value)
            total += 1

    _wb.close()
    return total 
Example #16
Source File: excel_combine.py    From python-script with Apache License 2.0 6 votes vote down vote up
def load_excel(excel_file):
    '''读取Excel文件内容,返回Excel的标题数组和数据有序字典'''
    _wb = load_workbook(excel_file, read_only=True)
    _ws = _wb.active
    _title = []
    _items = collections.OrderedDict()
    for _r in _ws.rows:
        if not _title:
            for _i in _r:
                _title.append(_i.value)
        else:
            _item = []
            for _i in _r:
                _item.append(_i.value)
            _items[_item[0]] = _item

    _wb.close()
    return _title, _items 
Example #17
Source File: excel.py    From recruit with Apache License 2.0 6 votes vote down vote up
def __init__(self, path, engine=None, mode='w', **engine_kwargs):
        # Use the openpyxl module as the Excel writer.
        from openpyxl.workbook import Workbook

        super(_OpenpyxlWriter, self).__init__(path, mode=mode, **engine_kwargs)

        if self.mode == 'a':  # Load from existing workbook
            from openpyxl import load_workbook
            book = load_workbook(self.path)
            self.book = book
        else:
            # Create workbook object with default optimized_write=True.
            self.book = Workbook()

            if self.book.worksheets:
                try:
                    self.book.remove(self.book.worksheets[0])
                except AttributeError:

                    # compat - for openpyxl <= 2.4
                    self.book.remove_sheet(self.book.worksheets[0]) 
Example #18
Source File: test_excel.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def test_write_append_mode(self, merge_cells, ext, engine, mode, expected):
        import openpyxl
        df = DataFrame([1], columns=['baz'])

        with ensure_clean(ext) as f:
            wb = openpyxl.Workbook()
            wb.worksheets[0].title = 'foo'
            wb.worksheets[0]['A1'].value = 'foo'
            wb.create_sheet('bar')
            wb.worksheets[1]['A1'].value = 'bar'
            wb.save(f)

            writer = ExcelWriter(f, engine=engine, mode=mode)
            df.to_excel(writer, sheet_name='baz', index=False)
            writer.save()

            wb2 = openpyxl.load_workbook(f)
            result = [sheet.title for sheet in wb2.worksheets]
            assert result == expected

            for index, cell_value in enumerate(expected):
                assert wb2.worksheets[index]['A1'].value == cell_value 
Example #19
Source File: excel.py    From predictive-maintenance-using-machine-learning with Apache License 2.0 6 votes vote down vote up
def __init__(self, path, engine=None, mode='w', **engine_kwargs):
        # Use the openpyxl module as the Excel writer.
        from openpyxl.workbook import Workbook

        super(_OpenpyxlWriter, self).__init__(path, mode=mode, **engine_kwargs)

        if self.mode == 'a':  # Load from existing workbook
            from openpyxl import load_workbook
            book = load_workbook(self.path)
            self.book = book
        else:
            # Create workbook object with default optimized_write=True.
            self.book = Workbook()

            if self.book.worksheets:
                try:
                    self.book.remove(self.book.worksheets[0])
                except AttributeError:

                    # compat - for openpyxl <= 2.4
                    self.book.remove_sheet(self.book.worksheets[0]) 
Example #20
Source File: test_export.py    From cadasta-platform with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_make_download_empty(self):
        ensure_dirs()
        original_es_dump_path = os.path.join(
            os.path.dirname(settings.BASE_DIR),
            'search/tests/files/test_es_dump_empty.esjson'
        )
        es_dump_path = os.path.join(test_dir, 'test-xls2.esjson')
        shutil.copy(original_es_dump_path, es_dump_path)

        exporter = XLSExporter(self.project)
        xls_path, mime_type = exporter.make_download(es_dump_path)

        assert xls_path == os.path.join(test_dir, 'test-xls2.xlsx')
        assert mime_type == ('application/vnd.openxmlformats-officedocument.'
                             'spreadsheetml.sheet')

        wb = load_workbook(xls_path)
        assert wb.get_sheet_names() == ['Sheet']
        assert wb['Sheet']['A1'].value is None 
Example #21
Source File: test_export.py    From cadasta-platform with GNU Affero General Public License v3.0 6 votes vote down vote up
def test_make_download_empty(self):
        ensure_dirs()
        original_es_dump_path = os.path.join(
            os.path.dirname(settings.BASE_DIR),
            'search/tests/files/test_es_dump_empty.esjson'
        )
        es_dump_path = os.path.join(test_dir, 'test-all2.esjson')
        shutil.copy(original_es_dump_path, es_dump_path)

        exporter = AllExporter(self.project)
        zip_path, mime_type = exporter.make_download(es_dump_path)

        assert zip_path == os.path.join(test_dir, 'test-all2-res.zip')
        assert mime_type == ('application/zip')

        with ZipFile(zip_path) as myzip:
            assert myzip.namelist() == ['data.xlsx']
            myzip.extract('data.xlsx', test_dir)
            wb = load_workbook(os.path.join(test_dir, 'data.xlsx'))
            sheetnames = wb.get_sheet_names()
            assert sheetnames == ['Sheet']
            assert wb['Sheet']['A1'].value is None 
Example #22
Source File: input.py    From flatten-tool with MIT License 6 votes vote down vote up
def read_sheets(self):
        try:
            self.workbook = openpyxl.load_workbook(self.input_name, data_only=True)
        except BadZipFile as e:  # noqa
            # TODO when we have python3 only add 'from e' to show exception chain
            raise BadXLSXZipFile(
                "The supplied file has extension .xlsx but isn't an XLSX file."
            )

        self.sheet_names_map = OrderedDict(
            (sheet_name, sheet_name) for sheet_name in self.workbook.sheetnames
        )
        if self.include_sheets:
            for sheet in list(self.sheet_names_map):
                if sheet not in self.include_sheets:
                    self.sheet_names_map.pop(sheet)
        for sheet in self.exclude_sheets or []:
            self.sheet_names_map.pop(sheet, None)

        sheet_names = list(sheet for sheet in self.sheet_names_map.keys())
        self.sub_sheet_names = sheet_names
        self.configure_sheets() 
Example #23
Source File: test_reports_views.py    From wagtail with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_xlsx_export(self):

        self.page = Page.objects.first()
        self.page.locked = True
        self.page.locked_by = self.user
        self.page.locked_at = '2013-02-01T12:00:00.000Z'
        self.page.latest_revision_created_at = '2013-01-01T12:00:00.000Z'
        self.page.save()

        response = self.get(params={'export': 'xlsx'})

        # Check response - the locked page info should be in it
        self.assertEqual(response.status_code, 200)
        workbook_data = response.getvalue()
        worksheet = load_workbook(filename=BytesIO(workbook_data))['Sheet1']
        cell_array = [[cell.value for cell in row] for row in worksheet.rows]
        self.assertEqual(cell_array[0], ['Title', 'Updated', 'Status', 'Type', 'Locked At', 'Locked By'])
        self.assertEqual(cell_array[1], ['Root', datetime.datetime(2013, 1, 1, 12, 0), 'live', 'Page', datetime.datetime(2013, 2, 1, 12, 0), 'test@email.com'])
        self.assertEqual(len(cell_array), 2) 
Example #24
Source File: base_formats.py    From wagtail with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def create_dataset(self, in_stream):
        """
        Create dataset from first sheet.
        """
        from io import BytesIO
        import openpyxl
        xlsx_book = openpyxl.load_workbook(BytesIO(in_stream), read_only=True)

        dataset = tablib.Dataset()
        sheet = xlsx_book.active

        # obtain generator
        rows = sheet.rows
        dataset.headers = [cell.value for cell in next(rows)]

        for row in rows:
            row_values = [cell.value for cell in row]
            dataset.append(row_values)
        return dataset


#: These are the default formats for import and export. Whether they can be
#: used or not is depending on their implementation in the tablib library. 
Example #25
Source File: test_simple_modeladmin.py    From wagtail with BSD 3-Clause "New" or "Revised" License 6 votes vote down vote up
def test_xlsx_export(self):
        # Export the whole queryset
        response = self.get(export='xlsx')

        self.assertEqual(response.status_code, 200)
        # Check attachment is present and named correctly using the modeladmin export_filename
        self.assertEqual(response.get('content-disposition'), 'attachment; filename="books-export.xlsx"')

        # Check response - all books should be in it
        workbook_data = response.getvalue()
        worksheet = load_workbook(filename=BytesIO(workbook_data))['Sheet1']
        cell_array = [[cell.value for cell in row] for row in worksheet.rows]
        self.assertEqual(cell_array[0], ['Title', 'Author', 'Author Date Of Birth'])
        self.assertEqual(cell_array[1], ['Charlie and the Chocolate Factory', 'Roald Dahl', '1916-09-13'])
        self.assertEqual(cell_array[2], ['The Chronicles of Narnia', 'Roald Dahl', '1898-11-29'])
        self.assertEqual(cell_array[3], ['The Hobbit', 'J. R. R. Tolkien', '1892-01-03'])
        self.assertEqual(cell_array[4], ['The Lord of the Rings', 'J. R. R. Tolkien', '1892-01-03'])
        self.assertEqual(len(cell_array), 5) 
Example #26
Source File: test_excel.py    From followthemoney with MIT License 6 votes vote down vote up
def test_excel_export(self):
        entity = model.get_proxy(ENTITY)
        exporter = ExcelExporter(self.temp, extra=['source'])
        exporter.write(entity, extra=['test'])
        exporter.finalize()
        workbook = load_workbook(self.temp)
        self.assertListEqual(workbook.sheetnames, ['People'])
        sheet = workbook["People"]
        rows = list(sheet)
        props = exporter.exportable_properties(entity.schema)
        self.assertListEqual(
            [cell.value for cell in rows[0]],
            ['ID', 'source'] +
            [prop.label for prop in props]
        )
        self.assertListEqual(
            [cell.value for cell in rows[1][:3]],
            ['person', 'test', 'Ralph Tester']
        ) 
Example #27
Source File: sheetToTextFile.py    From automate-the-boring-stuff-projects with MIT License 6 votes vote down vote up
def toTextFiles(filename):
    """writes column data in worksheet into text files
    Args:
        filename (str): name of worksheet to read from
    Returns:
        None
    """
    wb = openpyxl.load_workbook(filename)
    sheet = wb.active
    count = 1

    for colObj in sheet.columns:
        
        with open('text-'+str(count)+'.txt', 'w') as file:
            for cellObj in colObj:
                file.write(cellObj.value)

        count += 1 
Example #28
Source File: avatar_downloader.py    From JiaYuan with Apache License 2.0 6 votes vote down vote up
def read_excel(excel_path):
    candidate_list = list()
    wb = load_workbook(excel_path)
    ws = wb.active
    for i in range(2, ws.max_row - 1):  # -1 means that the last row is null
        candidate = Candidate(uid=ws.cell(row=i, column=1).value, nickname=ws.cell(row=i, column=2).value,
                              age=ws.cell(row=i, column=3).value, height=ws.cell(row=i, column=4).value,
                              image=ws.cell(row=i, column=5).value, marriage=ws.cell(row=i, column=6).value,
                              education=ws.cell(row=i, column=7).value, work_location=ws.cell(row=i, column=8).value,
                              work_sublocation=ws.cell(row=i, column=9).value,
                              shortnote=ws.cell(row=i, column=10).value,
                              matchCondition=ws.cell(row=i, column=11).value,
                              randListTag=ws.cell(row=i, column=12).value,
                              province=ws.cell(row=i, column=13).value, gender=ws.cell(row=i, column=14).value)
        candidate_list.append(candidate)
        # print(candidate)

    return candidate_list 
Example #29
Source File: export.py    From pymongo-schema with GNU Lesser General Public License v3.0 6 votes vote down vote up
def write_data(self, file_descr):
        """
        Use dataframe to_excel to write into file_descr (filename) - open first if file exists.
        """
        if os.path.isfile(file_descr):
            print(file_descr, 'exists')
            # Solution to keep existing data
            book = load_workbook(file_descr)
            writer = pd.ExcelWriter(file_descr, engine='openpyxl')
            writer.book = book
            writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
            self.data_df.to_excel(writer, sheet_name='Mongo_Schema', index=True,
                                  float_format='%.2f')
            writer.save()
        else:
            self.data_df.to_excel(file_descr, sheet_name='Mongo_Schema', index=True,
                                  float_format='%.2f') 
Example #30
Source File: test_functional.py    From pymongo-schema with GNU Lesser General Public License v3.0 6 votes vote down vote up
def test02_transform():
    base_output = "output_fctl_data_dict"
    outputs = {}
    extensions = ['html', 'xlsx', 'tsv', 'md']
    for ext in extensions:
        outputs[ext] = "{}.{}".format(base_output, ext)

    exp = os.path.join(TEST_DIR, 'resources', 'expected', 'data_dict')
    argv = ['transform', SCHEMA_FILE, '--output', base_output, '--columns',
            'Field_compact_name', 'Field_name', 'Full_name', 'Description', 'Count', 'Percentage',
            'Types_count',
            '--formats'] + extensions
    main(argv)

    assert filecmp.cmp(outputs['tsv'], "{}.tsv".format(exp))
    assert filecmp.cmp(outputs['md'], "{}.md".format(exp))
    with open(outputs['html']) as out_fd, \
            open("{}.html".format(exp)) as exp_fd:
        assert out_fd.read().replace(' ', '') == exp_fd.read().replace(' ', '')
    res = [cell.value for row in load_workbook(outputs['xlsx']).active for cell in row]
    exp = [cell.value for row in load_workbook("{}.xlsx".format(exp)).active for cell in row]
    assert res == exp
    for output in outputs.values():
        os.remove(output)