Import Notion data to Grist

Python script

29 Dec 2022

I use Notion to clip URLs for reference but use Grist Grist | The Evolution of Spreadsheets to manage my data.

Here is a script to import one of my "Notion Queues" to Grist.

Future improvement would be to build a Chrome extension that adds straight to Grist instead.

from datetime import datetime
import os
print("----------")
ts_file = f"{datetime.now().strftime('%y%m%d-%H%M')}"
ts_db = f"{datetime.now().strftime('%Y-%m-%d %H:%M')}"
ts_time = f"{datetime.now().strftime('%H:%M:%S')}"
print(f"{ts_time} starting {os.path.basename(__file__)}")
import time
start_time = time.time()

from dotenv import load_dotenv
load_dotenv()
USER = os.getenv("USER")

import sys
sys.path.append(f"/Users/{USER}/Python/indeXee")

import my_utils
import grist_PE

# get script name
import sys
loc = f"{sys.argv[0][18:-3]}"

# get line numbers
from inspect import currentframe
def ln():
    """
    print line numbers with f"{ln()}"
    """
    cf = currentframe()
    return cf.f_back.f_lineno

import pprint
pp = pprint.PrettyPrinter(indent=4)

count = 0
count_row = 0

test = True
v = True # verbose mode

print(f"{os.path.basename(__file__)} boilerplate loaded -----------\n")
####################
# import Clipper Queue from Notion to Grist

existing_domains = [x.domain for x in grist_PE.Clipper.fetch_table('Master')]

lu = []

from dotenv import load_dotenv
load_dotenv()

import json
import requests

# To pretty print results list
import pprint
pp = pprint.PrettyPrinter(indent=4)

## Define Published Date as yesterday so library updates do not show up as Featured article
from datetime import date
from datetime import timedelta
today = date.today()
yesterday = today - timedelta(days = 1)
publish_date = f"{yesterday.strftime('%Y-%m-%d')}"

# Notion access
token = os.getenv("NOTION_TOKEN") # generate at https://www.notion.so/my-integrations + ensure all access is granted
database_id = os.getenv("NOTION_DATABASE_ID_CLIPPER") # first variable in database URL, ie before ?

headers = {
    "Authorization": "Bearer " + token,
    "Content-Type": "application/json",
    "Notion-Version": "2021-05-13"
}

url = f'https://api.notion.com/v1/databases/{database_id}/query'

r = requests.post(url, headers={
    "Authorization": f"Bearer {token}",
    "Notion-Version": "2021-08-16"
    })

result_dict = r.json()
if v:
    print(f"\n#{ln()} {result_dict=}\n")
results = result_dict['results']

# for analysis of results structure
if v:
    print(f"results: \n")
    pp.pprint(results) 
    print()

from datetime import datetime
today = datetime.now()
date_today = today.strftime('%Y-%m-%d')

output_rows = ''

count_records = 0

list_missing = []

name_summary_split = [
    '|',
    ':',
    '-',
    'ยท',
    ]

for record in results:
    if v:
        print(f"\n{ln()} record:\n")
        pp.pprint(record)
    count_records += 1

    timestamp = record['created_time'][:10] # only 10 first chars for YYYY-MM-DD format

    # below: 2nd field in 'properties' field will depend on column headers in database
    url = record['properties']['URL']['url']

    domain = my_utils.domain_from_url(url)
    if domain not in existing_domains:

        # name

        title = ''
        name = record['properties']['Name']['title'][0]['plain_text'] # some need deep navigation / nested fields
        for split_char in name_summary_split:
            if split_char in name:
                parts = name.split(split_char)
                name = parts[1].strip()
                title = parts[0].strip()

        slug = my_utils.slug_from_url(url)

        lu.append(
                    {   
                        'url': my_utils.clean_url(url),
                        'raw_url': url,
                        'domain': domain,
                        'slug': slug,
                        'title': title,
                        'name': name,
                        }
        )

        existing_domains.append(domain)

        print(f"timestamp: {timestamp}")
        print(f"URL: {url}") 
        print(f"domain: {domain}") 
        print(f"Name: {name}") 
        print(f"Title: {title}") 
    print()

if not test:
    # Bulk update Grist
    grist_PE.Clipper.add_records('Master', lu)
    # Check new domains
    new_existing_domains = [x.domain for x in grist_PE.Clipper.fetch_table('Master')]
    # Delete from Notion
    for record in results:
        url = record['properties']['URL']['url']
        domain = my_utils.domain_from_url(url)
        if domain in new_existing_domains:
            # Delete from Notion
            record_id = record['id']
            print(f"\nDELETING {record_id} from Notion.")

            url_delete = f'https://api.notion.com/v1/blocks/{record_id}'
            request_delete = requests.delete(url_delete, headers={
                "Authorization": f"Bearer {token}",
                "Notion-Version": "2021-08-16"
                })
else:
    print(f"\nlu:\n")
    pp.pprint(lu)
    print(f"\nTEST RUN\nNothing added to Grist")


########################################################################################################

if __name__ == '__main__':
    print()
    print()
    print('-------------------------------')
    print(f"{os.path.basename(__file__)}")
    print()
    print(f"{count=}")
    print()
    print('-------------------------------')
    run_time = round((time.time() - start_time), 1)
    if run_time > 60:
        print(f'{os.path.basename(__file__)} finished in {run_time/60} minutes.')
    else:
        print(f'{os.path.basename(__file__)} finished in {run_time}s.')
    print()

links

social