29 Dec 2022
I use Notion to clip URLs for reference but use Grist Grist | The Evolution of Spreadsheets to manage my data.
Here is a script to import one of my "Notion Queues" to Grist.
Future improvement would be to build a Chrome extension that adds straight to Grist instead.
from datetime import datetime
import os
print("----------")
ts_file = f"{datetime.now().strftime('%y%m%d-%H%M')}"
ts_db = f"{datetime.now().strftime('%Y-%m-%d %H:%M')}"
ts_time = f"{datetime.now().strftime('%H:%M:%S')}"
print(f"{ts_time} starting {os.path.basename(__file__)}")
import time
start_time = time.time()
from dotenv import load_dotenv
load_dotenv()
USER = os.getenv("USER")
import sys
sys.path.append(f"/Users/{USER}/Python/indeXee")
import my_utils
import grist_PE
# get script name
import sys
loc = f"{sys.argv[0][18:-3]}"
# get line numbers
from inspect import currentframe
def ln():
"""
print line numbers with f"{ln()}"
"""
cf = currentframe()
return cf.f_back.f_lineno
import pprint
pp = pprint.PrettyPrinter(indent=4)
count = 0
count_row = 0
test = True
v = True # verbose mode
print(f"{os.path.basename(__file__)} boilerplate loaded -----------\n")
####################
# import Clipper Queue from Notion to Grist
existing_domains = [x.domain for x in grist_PE.Clipper.fetch_table('Master')]
lu = []
from dotenv import load_dotenv
load_dotenv()
import json
import requests
# To pretty print results list
import pprint
pp = pprint.PrettyPrinter(indent=4)
## Define Published Date as yesterday so library updates do not show up as Featured article
from datetime import date
from datetime import timedelta
today = date.today()
yesterday = today - timedelta(days = 1)
publish_date = f"{yesterday.strftime('%Y-%m-%d')}"
# Notion access
token = os.getenv("NOTION_TOKEN") # generate at https://www.notion.so/my-integrations + ensure all access is granted
database_id = os.getenv("NOTION_DATABASE_ID_CLIPPER") # first variable in database URL, ie before ?
headers = {
"Authorization": "Bearer " + token,
"Content-Type": "application/json",
"Notion-Version": "2021-05-13"
}
url = f'https://api.notion.com/v1/databases/{database_id}/query'
r = requests.post(url, headers={
"Authorization": f"Bearer {token}",
"Notion-Version": "2021-08-16"
})
result_dict = r.json()
if v:
print(f"\n#{ln()} {result_dict=}\n")
results = result_dict['results']
# for analysis of results structure
if v:
print(f"results: \n")
pp.pprint(results)
print()
from datetime import datetime
today = datetime.now()
date_today = today.strftime('%Y-%m-%d')
output_rows = ''
count_records = 0
list_missing = []
name_summary_split = [
'|',
':',
'-',
'ยท',
]
for record in results:
if v:
print(f"\n{ln()} record:\n")
pp.pprint(record)
count_records += 1
timestamp = record['created_time'][:10] # only 10 first chars for YYYY-MM-DD format
# below: 2nd field in 'properties' field will depend on column headers in database
url = record['properties']['URL']['url']
domain = my_utils.domain_from_url(url)
if domain not in existing_domains:
# name
title = ''
name = record['properties']['Name']['title'][0]['plain_text'] # some need deep navigation / nested fields
for split_char in name_summary_split:
if split_char in name:
parts = name.split(split_char)
name = parts[1].strip()
title = parts[0].strip()
slug = my_utils.slug_from_url(url)
lu.append(
{
'url': my_utils.clean_url(url),
'raw_url': url,
'domain': domain,
'slug': slug,
'title': title,
'name': name,
}
)
existing_domains.append(domain)
print(f"timestamp: {timestamp}")
print(f"URL: {url}")
print(f"domain: {domain}")
print(f"Name: {name}")
print(f"Title: {title}")
print()
if not test:
# Bulk update Grist
grist_PE.Clipper.add_records('Master', lu)
# Check new domains
new_existing_domains = [x.domain for x in grist_PE.Clipper.fetch_table('Master')]
# Delete from Notion
for record in results:
url = record['properties']['URL']['url']
domain = my_utils.domain_from_url(url)
if domain in new_existing_domains:
# Delete from Notion
record_id = record['id']
print(f"\nDELETING {record_id} from Notion.")
url_delete = f'https://api.notion.com/v1/blocks/{record_id}'
request_delete = requests.delete(url_delete, headers={
"Authorization": f"Bearer {token}",
"Notion-Version": "2021-08-16"
})
else:
print(f"\nlu:\n")
pp.pprint(lu)
print(f"\nTEST RUN\nNothing added to Grist")
########################################################################################################
if __name__ == '__main__':
print()
print()
print('-------------------------------')
print(f"{os.path.basename(__file__)}")
print()
print(f"{count=}")
print()
print('-------------------------------')
run_time = round((time.time() - start_time), 1)
if run_time > 60:
print(f'{os.path.basename(__file__)} finished in {run_time/60} minutes.')
else:
print(f'{os.path.basename(__file__)} finished in {run_time}s.')
print()