15 Sep 2022
from selenium.webdriver import Chrome, ChromeOptions
from bs4 import BeautifulSoup
chrome_options = ChromeOptions()
chrome_options.add_argument('--headless')
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
def metadata_from_url(url):
# need to download browser first
# for Chrome, see: https://sites.google.com/chromium.org/driver/downloads
s = Service('/Users/xxxx/path/to/local/driver')
web = Chrome(service=s,options=chrome_options)
web.get(url)
xml = web.page_source
web.quit()
soup = BeautifulSoup(xml, features='html.parser')
metas = [x for x in soup.find_all('meta') if x.get('property')]
return [{x.get('property'): x.get('content')} for x in metas]
result = metadata_from_url('https://url-to-get-metadata-from.com') # list of dicts
print(f"\n{result=}\n")
for res in result:
print(res)
ex. with