Need to access Edge browser history with Python.
This will help automate website listings, just by browsing.
Edge stores its history as a SQLite database file in /Users/USER/Library/Application Support/Microsoft Edge/Default/History
.
File without extension.
Steps:
- copy file to dedicated folder & rename to .db
- open with sqlite3
module
import sqlite3
# FUNCTIONS
def select_all_records(db_file_path: str, table_name: str) -> List[DB_Record]:
"""
Fetches all records from the specified SQLite database table and returns them as a list of objects.
Arguments:
db_file_path: The path to the SQLite database file.
table_name: The name of the table from which to fetch the records.
Returns:
A list of objects representing the fetched records.
"""
db_lock = threading.Lock()
with db_lock:
conn = sqlite3.connect(db_file_path)
cur = conn.cursor()
# Execute the PRAGMA statement to get the column names
cur.execute(f"PRAGMA table_info({table_name})")
columns = cur.fetchall()
# Create a list of column names, including 'rowid'
column_names = ['rowid'] + [column[1] for column in columns]
# Define the class dynamically with attributes based on the column names
record_class = type('DB_Record', (DB_Record,), {column_name: None for column_name in column_names})
# Execute the SELECT query to fetch all records, including 'rowid'
cur.execute(f"""
SELECT rowid, *
FROM {table_name}
""")
rows = cur.fetchall()
records = []
for row in rows:
# Create an instance of the class and set its attributes
record = record_class()
for column_name, value in zip(column_names, row):
setattr(record, column_name, value)
# Update the class dynamically to add any new attributes
for column_name in column_names:
if not hasattr(record_class, column_name):
setattr(record_class, column_name, None)
records.append(record)
conn.close()
return records
def copy_and_rename_file(source_path, destination_path, new_name):
shutil.copy2(source_path, destination_path)
new_path = destination_path + '/' + new_name
shutil.move(destination_path + '/' + source_path.split('/')[-1], new_path)
# MAIN
# Copy and rename the SQLite file to a local folder
source_history_file = '/Users/USER/Library/Application Support/Microsoft Edge/Default/History'
destination_path = '/path/to/folder/'
# Rename to `.db`
new_name = f"{datetime.now().strftime('%Y-%m-%d-%H-%M')}-History.db"
copy_and_rename_file(source_history_file, destination_path, new_name)
target_history_file = destination_path + '/' + new_name
# Open the SQLite file just copied and read all records from the 'urls' table
records = select_all_records(target_history_file, 'urls')
blacklist = [
'google.com',
'youtube.com',
'twitter.com',
'github.io',
'bing.com',
]
for record in records:
url = record.url
if not any(ele in url for ele in blacklist):
# Do something with the URLs