26 Jul 2022
Library resources | |
---|---|
PyPI | https://pypi.org/project/openpyxl/ |
Source | https://foss.heptapod.net/openpyxl/openpyxl |
Documentation | https://openpyxl.readthedocs.io/en/stable/ |
One of the first libraries I started to use extensively, back in 2017.
pip3 install openpyxl
my Excel boilerplate
from datetime import datetime
import os
print(f"{datetime.now().strftime('%H:%M:%S')} starting {os.path.basename(__file__)}")
import time
start_time = time.time()
from openpyxl import load_workbook
import pprint
from openpyxl.utils import column_index_from_string
pp = pprint.PrettyPrinter(indent=4)
SAVE = 'NO' # LIVE, TEST, NO
file = 'path_to_file_xlsx'
file_test = f'{file[:-5]}_TEST.xlsx'
wb = load_workbook(file)
ws = wb.active
count_row = 0
####################
# Loop
for row in ws:
count_row += 1
print(count_row)
first = row[0].value
# rest of code logic...
####################
# Save
if SAVE == 'LIVE':
print(f'\nSaving FINAL file: {file}...')
time.sleep(1)
wb.save(file)
elif SAVE == 'TEST':
print(f'\nSaving TEST file: {file_test}')
time.sleep(1)
wb.save(file_test)
elif SAVE == 'NO':
print('\nNOT Saved.')
########################################################################################################
if __name__ == '__main__':
print()
print('-------------------------------')
print(f"count_row = {count_row}")
print('-------------------------------')
run_time = round((time.time() - start_time)/60, 1)
print(f'{os.path.basename(__file__)} finished in {run_time} minutes.')
Snippets
open Workbook
from openpyxl import load_workbook
wb = load_workbook(moneeFile)
ws = wb['Expenses']
count = 0
airportRef = {}
for row in ws.iter_rows(min_row=2, max_col=22, max_row=10000):
if row[7].value not in (None, ''):
airportID = row[0].value
airportICAO = row[7].value
create Workbook
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
wb.save(file_save)
converting between column letters and numbers
openpyxl.cell.column_index_from_string()
openpyxl.cell.get_column_letter()
>>> import openpyxl
>>> from openpyxl.utils import column_index_from_string
>>> get_column_letter(1)
'A'
>>> get_column_letter(2)
'B'
>>> get_column_letter(27)
'AA'
>>> get_column_letter(900)
'AHP'
>>> wb = openpyxl.load_workbook('example.xlsx')
>>> sheet = wb.get_sheet_by_name('Sheet1')
>>> get_column_letter(sheet.max_column)
'C'
>>> column_index_from_string('A')
1
>>> column_index_from_string('AA')
from openpyxl.utils.cell import coordinate_from_string, column_index_from_string
def colInteger(colIndex):
xy = coordinate_from_string(f"{colIndex}1") # returns ('A',4)
col = column_index_from_string(xy[0]) - 1 # returns 1
return col
auto-size columns
from openpyxl.utils import get_column_letter
for col in ws.columns:
print(f"Type: {type(col)} / Variable: {col}")
max_length = 0
column = col[0].column # Get the column name
column = get_column_letter(column)
print(f"Type: {type(column)} / Variable: {column}")
for cell in col:
try: # Necessary to avoid error on empty cells
if len(str(cell.value)) > max_length:
max_length = len(cell.value)
except:
pass
adjusted_width = max_length + 2 # can add buffer, e.g. +2
ws.column_dimensions[column].width = adjusted_width
get column letter
from openpyxl.cell import get_column_letter
select a column by its name
# You can do this by first building a dictionary where the keys are the column names and the values are the column number. Then, use the dictionary to translate from the name to the number.
import openpyxl
workbook = openpyxl.load_workbook('test01.xlsx')
worksheet = workbook['Sheet1']
## Create a dictionary of column names
ColNames = {}
Current = 0
for COL in worksheet.iter_cols(1, worksheet.max_column):
ColNames[COL[0].value] = Current
Current += 1
## Now you can access by column name
## (My data has a column named 'Dogs')
for row_cells in worksheet.iter_rows(min_row=1, max_row=4):
print(row_cells[ColNames['Dogs']].value)
formatting
expensesSheet.column_dimensions['A'].width = 12
expensesSheet.column_dimensions['B'].width = 9
expensesSheet.column_dimensions['C'].width = 11
expensesSheet.column_dimensions['D'].width = 115
expensesSheet.column_dimensions['E'].width = 15
expensesSheet.column_dimensions['F'].width = 5
expensesSheet.column_dimensions['G'].width = 130
from openpyxl.styles import Font, Fill
for row in expensesSheet:
row[0].number_format = 'DD/MM/YY'
cellAmount = row[2]
cellAmount.number_format = '#,##0.00'
# cellGBP = row[5]
# cellGBP.number_format = '#,##0.00'
for cell in row:
cell.font = Font(color='505050')
row[4].number_format = 'DD/MM/YYYY'
row[15].number_format = '"$"#,##0'
row[20].number_format = 'YYYY-MM-DD hh:mm:ss'
row[8].alignment = Alignment(horizontal='right')
_cell = ws.cell('C1')
# Font properties
_cell.style.font.color.index = Color.GREEN
_cell.style.font.name = 'Arial'
_cell.style.font.size = 8
_cell.style.font.bold = True
_cell.style.alignment.wrap_text = True
# Cell background color
_cell.style.fill.fill_type = Fill.FILL_SOLID
_cell.style.fill.start_color.index = Color.DARKRED
# You should only modify column dimensions after you have written a cell in
# the column. Perfect world: write column dimensions once per column
#
ws.column_dimensions["C"].width = 60.0
from openpyxl.styles import PatternFill, Border, Side
cell.fill = PatternFill(start_color=colour, end_color=colour, fill_type='solid')
delete rows
ws.delete_rows(index, 1)
"""
where: 'ws' is the worksheet, 'index' is the row number, and '1' is the number of rows to delete.
"""
insert today's date
from datetime import date
date = date.today()
ws.cell(row[0].row, 18, date)
add filter
ws.auto_filter.ref = f"A1:{get_column_letter(ws.max_column)}{ws.max_row}"
write dict to xlsx
for y, z in CFA_dict.items():
uID = re.match(r'^(.*)-(.*)-(.*)', y)
operator = uID[1]
airport = uID[2]
aircraftType = uID[3]
row = [operator, airport, aircraftType, z]
ws.append(row)
skip 1st row in a for loop
# Either
for row in range(2, wsFlighteeHN.max_row + 1):
icao = wsFlighteeHN.cell(row, 1).value
# row is now an int of row number that we need to use in cell coordinates
# OR if not skipping, use:
for row in wsFlighteeHN:
icao = row[0].value
# row is now a tuple of cell objects for which we can extract the first one
cell object attributes
source: https://openpyxl.readthedocs.io/en/latest/api/openpyxl.cell.cell.html
cell.col_idx # 1-based
cell.coordinate # A1, B1, etc..
extract hyperlinks
use .hyperlink.target
method.
print(ws.cell(row=2, column=1).hyperlink.target)