Python Library: openpyxl

A Python library to read/write Excel 2010 xlsx/xlsm files

26 Jul 2022

Library resources
PyPI https://pypi.org/project/openpyxl/
Source https://foss.heptapod.net/openpyxl/openpyxl
Documentation https://openpyxl.readthedocs.io/en/stable/

One of the first libraries I started to use extensively, back in 2017.

pip3 install openpyxl

my Excel boilerplate

from datetime import datetime
import os
print(f"{datetime.now().strftime('%H:%M:%S')} starting {os.path.basename(__file__)}")
import time
start_time = time.time()

from openpyxl import load_workbook
import pprint
from openpyxl.utils import column_index_from_string

pp = pprint.PrettyPrinter(indent=4)

SAVE = 'NO'  # LIVE, TEST, NO

file = 'path_to_file_xlsx'

file_test = f'{file[:-5]}_TEST.xlsx'

wb = load_workbook(file)
ws = wb.active

count_row = 0

####################
# Loop

for row in ws:
    count_row += 1
    print(count_row)
    first = row[0].value


    # rest of code logic...


####################
# Save

if SAVE == 'LIVE':
    print(f'\nSaving FINAL file: {file}...')
    time.sleep(1)
    wb.save(file)
elif SAVE == 'TEST':
    print(f'\nSaving TEST file: {file_test}')
    time.sleep(1)
    wb.save(file_test)
elif SAVE == 'NO':
    print('\nNOT Saved.')


########################################################################################################

if __name__ == '__main__':
    print()
    print('-------------------------------')
    print(f"count_row = {count_row}")
    print('-------------------------------')
    run_time = round((time.time() - start_time)/60, 1)
    print(f'{os.path.basename(__file__)} finished in {run_time} minutes.')

Snippets

open Workbook

from openpyxl import load_workbook

wb = load_workbook(moneeFile)
ws = wb['Expenses']

count = 0

airportRef = {}

for row in ws.iter_rows(min_row=2, max_col=22, max_row=10000):
    if row[7].value not in (None, ''):
        airportID = row[0].value
        airportICAO = row[7].value

create Workbook

from openpyxl import Workbook

wb = Workbook()
ws = wb.active

wb.save(file_save)

converting between column letters and numbers

openpyxl.cell.column_index_from_string()
openpyxl.cell.get_column_letter()


>>> import openpyxl
>>> from openpyxl.utils import column_index_from_string

>>> get_column_letter(1)
'A'
>>> get_column_letter(2)
'B'
>>> get_column_letter(27)
'AA'
>>> get_column_letter(900)
'AHP'
>>> wb = openpyxl.load_workbook('example.xlsx')
>>> sheet = wb.get_sheet_by_name('Sheet1')
>>> get_column_letter(sheet.max_column)
'C'
>>> column_index_from_string('A')
1
>>> column_index_from_string('AA')


from openpyxl.utils.cell import coordinate_from_string, column_index_from_string

def colInteger(colIndex):
    xy = coordinate_from_string(f"{colIndex}1")  # returns ('A',4)
    col = column_index_from_string(xy[0]) - 1  # returns 1
    return col

auto-size columns

from openpyxl.utils import get_column_letter

for col in ws.columns:
    print(f"Type: {type(col)} / Variable: {col}")
    max_length = 0
    column = col[0].column # Get the column name
    column = get_column_letter(column)
    print(f"Type: {type(column)} / Variable: {column}")
    for cell in col:
        try: # Necessary to avoid error on empty cells
            if len(str(cell.value)) > max_length:
                max_length = len(cell.value)
        except:
            pass
        adjusted_width = max_length + 2 # can add buffer, e.g. +2
        ws.column_dimensions[column].width = adjusted_width

get column letter

from openpyxl.cell import get_column_letter

select a column by its name

# You can do this by first building a dictionary where the keys are the column names and the values are the column number. Then, use the dictionary to translate from the name to the number.

import openpyxl

workbook = openpyxl.load_workbook('test01.xlsx')
worksheet = workbook['Sheet1']

## Create a dictionary of column names
ColNames = {}
Current  = 0
for COL in worksheet.iter_cols(1, worksheet.max_column):
    ColNames[COL[0].value] = Current
    Current += 1

## Now you can access by column name
## (My data has a column named 'Dogs')
for row_cells in worksheet.iter_rows(min_row=1, max_row=4):
    print(row_cells[ColNames['Dogs']].value)

formatting

expensesSheet.column_dimensions['A'].width = 12
expensesSheet.column_dimensions['B'].width = 9
expensesSheet.column_dimensions['C'].width = 11
expensesSheet.column_dimensions['D'].width = 115
expensesSheet.column_dimensions['E'].width = 15
expensesSheet.column_dimensions['F'].width = 5
expensesSheet.column_dimensions['G'].width = 130

from openpyxl.styles import Font, Fill

for row in expensesSheet:
    row[0].number_format = 'DD/MM/YY'
    cellAmount = row[2]
    cellAmount.number_format = '#,##0.00'
    # cellGBP = row[5]
    # cellGBP.number_format = '#,##0.00'
    for cell in row:
        cell.font = Font(color='505050')
row[4].number_format = 'DD/MM/YYYY'
row[15].number_format = '"$"#,##0'
row[20].number_format = 'YYYY-MM-DD hh:mm:ss'
row[8].alignment = Alignment(horizontal='right')

_cell = ws.cell('C1')

# Font properties
_cell.style.font.color.index = Color.GREEN
_cell.style.font.name = 'Arial'
_cell.style.font.size = 8
_cell.style.font.bold = True
_cell.style.alignment.wrap_text = True

# Cell background color
_cell.style.fill.fill_type = Fill.FILL_SOLID
_cell.style.fill.start_color.index = Color.DARKRED

# You should only modify column dimensions after you have written a cell in 
#     the column. Perfect world: write column dimensions once per column
# 
ws.column_dimensions["C"].width = 60.0

from openpyxl.styles import PatternFill, Border, Side

cell.fill = PatternFill(start_color=colour, end_color=colour, fill_type='solid')

delete rows

ws.delete_rows(index, 1)

"""
where: 'ws' is the worksheet, 'index' is the row number, and '1' is the number of rows to delete.
"""

insert today's date

from datetime import date

date = date.today()

ws.cell(row[0].row, 18, date)

add filter

ws.auto_filter.ref = f"A1:{get_column_letter(ws.max_column)}{ws.max_row}"

write dict to xlsx

for y, z in CFA_dict.items():
    uID = re.match(r'^(.*)-(.*)-(.*)', y)
    operator = uID[1]
    airport = uID[2]
    aircraftType = uID[3]
    row = [operator, airport, aircraftType, z]
    ws.append(row)

skip 1st row in a for loop

# Either

for row in range(2, wsFlighteeHN.max_row + 1):

    icao = wsFlighteeHN.cell(row, 1).value

# row is now an int of row number that we need to use in cell coordinates

# OR if not skipping, use:  

for row in wsFlighteeHN:
    icao = row[0].value

# row is now a tuple of cell objects for which we can extract the first one

cell object attributes

source: https://openpyxl.readthedocs.io/en/latest/api/openpyxl.cell.cell.html

cell.col_idx # 1-based

cell.coordinate # A1, B1, etc.. 

use .hyperlink.target method.

print(ws.cell(row=2, column=1).hyperlink.target)

links

social