26 Jul 2022
| Library resources | |
|---|---|
| PyPI | --- |
| Github | --- |
| Documentation | https://docs.python.org/3/library/sqlite3.html |
Started using a local database earlier this year, to manage large datasets (eg for Twittee and Building a static website: BtoBSales.EU ) to overcome the limitations of Grist - size & latency.
Grist | The Evolution of Spreadsheets
Tutorials


Snippets
SELECT
####################
# Connect to database
import sqlite3
db = sqlite3.connect('/path/to/local/database.db')
from collections import namedtuple
def namedtuple_factory(cursor, row):
"""Returns sqlite rows as named tuples."""
fields = [col[0] for col in cursor.description]
x = namedtuple("x", fields)
return x(*row)
db.row_factory = namedtuple_factory
c = db.cursor()
####################
c.execute("""
SELECT domain
FROM companies
WHERE name IS NULL;
""")
items = [x.domain for x in c.fetchall()]
print()
print(items)
print()
print('items', type(items), len(items))
####################
# Commit command
db.commit()
# Close connection
db.close()
SELECT with variable string
c.execute("""
SELECT rowid, domain, url
FROM twitter_webinars
WHERE url LIKE "%jdsupra%"
""")
#######
db = sqlite3.connect('/path/to/local/database.db')
c = db.cursor()
c.execute("""
SELECT first
FROM first
WHERE first LIKE ?;
""",
(f"{email_prefix.lower()}%",)
)
potential_first = [x[0] for x in c.fetchall()]
initial test
import sqlite3
# Connect to database
conn = sqlite3.connect('/path/to/local/database.db')
# Create a cursor
c = conn.cursor()
# Query The Database
c.execute("SELECT * FROM Companies")
items = c.fetchall()
for item in items:
print(item)
# Commit our command
conn.commit()
# Close our connection
conn.close()
SQL headers
conn = sqlite3.connect('/path/to/local/database.db')
# Create a cursor
c = conn.cursor()
# Query The Database
c.execute("SELECT * FROM Companies")
with conn:
headers = [i[0] for i in c.description]
SQL results as namedtuples
mimics Grist objects Grist | The Evolution of Spreadsheets
def namedtuple_factory(cursor, row):
"""Returns sqlite rows as named tuples."""
fields = [col[0] for col in cursor.description]
Tweet = namedtuple("Tweet", fields)
return Tweet(*row)
conn.row_factory = namedtuple_factory
import CSV to SQLite
# EASIEST
import pandas as pd
db = sqlite3.connect('/path/to/local/database.db')
c = db.cursor()
df = pd.read_csv('/path/to/local/Master.csv')
df.to_sql('companies', db)
####
import csv
import sqlite3
with open('/path/to/local/Master_copy.csv', 'r') as f:
d = csv.DictReader(f)
existing = []
content = []
for i in d:
if i['tweet_id'] not in existing:
existing.append(i['tweet_id'])
content.append(
(
i['tweet_id'],
i['tweeted'],
i['tweet_url'],
i['text'],
i['author_name'],
i['author_website'],
i['domain'],
i['known'],
i['url'],
i['provider'],
i['process'],
i['src'],
i['created'],
i['updated'],
)
)
######
print(f"CSV loaded with {len(content)} records.")
# Connect to SQLite
sqliteConnection = sqlite3.connect('/path/to/local/database.db')
cursor = sqliteConnection.cursor()
cursor.execute("DROP TABLE IF EXISTS twitter_webinars")
# Create table
table = """
CREATE TABLE twitter_webinars(
tweet_id VARCHAR(255) PRIMARY KEY,
tweeted INT,
tweet_url CHAR(255),
text VARCHAR(255),
author_name VARCHAR(255),
author_website VARCHAR(255),
domain VARCHAR(255),
known VARCHAR(255),
url VARCHAR(255),
provider VARCHAR(255),
process VARCHAR(255),
src VARCHAR(25),
created FLOAT,
updated FLOAT
);
"""
cursor.execute(table)
cursor.executemany(
"INSERT INTO twitter_webinars (tweet_id, tweeted, tweet_url, text, author_name, author_website, domain, known, url, provider, process, src, created, updated) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", content)
cursor.execute('SELECT tweet_id FROM twitter_webinars;')
# View result
result = cursor.fetchall()
existing = []
for x in result:
if x in existing:
print(f"ISSUE: {x}")
else:
existing.append(x)
# Commit work and close connection
sqliteConnection.commit()
cursor.close()
sqliteConnection.close()
test in-memory (faster)
test = True
if test:
source = sqlite3.connect('/path/to/local/database.db')
db = sqlite3.connect(':memory:')
source.backup(db)
source.close()
else:
db = sqlite3.connect('/path/to/local/database.db')
Speed
30 Jul 2023
Rather obvious, but it is significantly faster to do a SQL query with sqlite3 than doing a list comprehension on all records.
Ex.:
existing_emails = [x.email for x in select_all_records(DB_BTOB, "people") if x.email != None] # 2.075 seconds to run script
vs.
import sqlite3
conn = sqlite3.connect(DB_BTOB)
cursor = conn.cursor()
cursor.execute('SELECT email FROM people WHERE email IS NOT NULL')
existing_emails = [row[0] for row in cursor.fetchall()]
cursor.close()
conn.close()
# 0.046 seconds to run script