26 Jul 2022

Library resources
PyPI	---
Github	---
Documentation	https://docs.python.org/3/library/sqlite3.html

Started using a local database earlier this year, to manage large datasets (eg for Twittee and Building a static website: BtoBSales.EU ) to overcome the limitations of Grist - size & latency.

Tutorials

How To Use the sqlite3 Module in Python 3

digitalocean.com

https://www.digitalocean.com/community/tutorials/how-to-use-the-sqlite3-module-in-python-3

SQLite - Python

tutorialspoint.com

https://www.tutorialspoint.com/sqlite/sqlite_python.htm

Snippets

SELECT

####################
# Connect to database
import sqlite3
db = sqlite3.connect('/path/to/local/database.db')
from collections import namedtuple

def namedtuple_factory(cursor, row):
    """Returns sqlite rows as named tuples."""
    fields = [col[0] for col in cursor.description]
    x = namedtuple("x", fields)
    return x(*row)
db.row_factory = namedtuple_factory
c = db.cursor()
####################

c.execute("""
        SELECT domain
        FROM companies
        WHERE name IS NULL;
        """)

items = [x.domain for x in c.fetchall()]

print()
print(items)
print()
print('items', type(items), len(items))

####################
# Commit command
db.commit()
# Close connection
db.close()

SELECT with variable string

c.execute("""
            SELECT rowid, domain, url
            FROM twitter_webinars
            WHERE url LIKE "%jdsupra%"
            """)

#######

db = sqlite3.connect('/path/to/local/database.db')
c = db.cursor()
c.execute("""
        SELECT first
        FROM first
        WHERE first LIKE ?;
        """,
        (f"{email_prefix.lower()}%",)
        )
potential_first = [x[0] for x in c.fetchall()]

initial test

import sqlite3
# Connect to database
conn = sqlite3.connect('/path/to/local/database.db')
# Create a cursor
c = conn.cursor()
# Query The Database
c.execute("SELECT * FROM Companies")

items = c.fetchall()
for item in items:
    print(item)


# Commit our command
conn.commit()
# Close our connection
conn.close()

SQL headers

conn = sqlite3.connect('/path/to/local/database.db')
# Create a cursor
c = conn.cursor()
# Query The Database
c.execute("SELECT * FROM Companies")

with conn:
    headers = [i[0] for i in c.description]

SQL results as namedtuples

mimics Grist objects Grist | The Evolution of Spreadsheets

def namedtuple_factory(cursor, row):
    """Returns sqlite rows as named tuples."""
    fields = [col[0] for col in cursor.description]
    Tweet = namedtuple("Tweet", fields)
    return Tweet(*row)

conn.row_factory = namedtuple_factory

import CSV to SQLite

# EASIEST

import pandas as pd
db = sqlite3.connect('/path/to/local/database.db')
c = db.cursor()
df = pd.read_csv('/path/to/local/Master.csv')
df.to_sql('companies', db)

####

import csv
import sqlite3

with open('/path/to/local/Master_copy.csv', 'r') as f:
    d = csv.DictReader(f)
    existing = []
    content = []
    for i in d:
        if i['tweet_id'] not in existing:
            existing.append(i['tweet_id'])
            content.append( 
                (
                i['tweet_id'], 
                i['tweeted'], 
                i['tweet_url'], 
                i['text'], 
                i['author_name'], 
                i['author_website'], 
                i['domain'], 
                i['known'], 
                i['url'], 
                i['provider'], 
                i['process'], 
                i['src'], 
                i['created'], 
                i['updated'],
                )
            )


######

print(f"CSV loaded with {len(content)} records.")

# Connect to SQLite
sqliteConnection = sqlite3.connect('/path/to/local/database.db')
cursor = sqliteConnection.cursor()

cursor.execute("DROP TABLE IF EXISTS twitter_webinars")

# Create table
table = """
        CREATE TABLE twitter_webinars(
        tweet_id VARCHAR(255) PRIMARY KEY,
        tweeted INT,
        tweet_url CHAR(255),
        text VARCHAR(255),
        author_name VARCHAR(255),
        author_website VARCHAR(255),
        domain VARCHAR(255),
        known VARCHAR(255),
        url VARCHAR(255),
        provider VARCHAR(255),
        process VARCHAR(255),
        src VARCHAR(25),
        created FLOAT,
        updated FLOAT
        );
        """

cursor.execute(table)

cursor.executemany(
    "INSERT INTO twitter_webinars (tweet_id, tweeted, tweet_url, text, author_name, author_website, domain, known, url, provider, process, src, created, updated) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?);", content)

cursor.execute('SELECT tweet_id FROM twitter_webinars;')

# View result
result = cursor.fetchall()

existing = []
for x in result:
    if x in existing:
        print(f"ISSUE: {x}")
    else:
        existing.append(x)

# Commit work and close connection
sqliteConnection.commit()
cursor.close()

sqliteConnection.close()

test in-memory (faster)

test = True

if test:
    source = sqlite3.connect('/path/to/local/database.db')
    db = sqlite3.connect(':memory:')
    source.backup(db)
    source.close()

else:
    db = sqlite3.connect('/path/to/local/database.db')

Speed

30 Jul 2023

Rather obvious, but it is significantly faster to do a SQL query with sqlite3 than doing a list comprehension on all records.

Ex.:

existing_emails = [x.email for x in select_all_records(DB_BTOB, "people") if x.email != None] # 2.075 seconds to run script

vs.

import sqlite3
conn = sqlite3.connect(DB_BTOB)
cursor = conn.cursor()
cursor.execute('SELECT email FROM people WHERE email IS NOT NULL')
existing_emails = [row[0] for row in cursor.fetchall()]
cursor.close()
conn.close()
# 0.046 seconds to run script