04 Sep 2022
Requires downloading Chrome driver from:
ChromeDriver - WebDriver for Chrome - Downloads
google.com
Important: ensure that the driver version matches the version installed locally.
from selenium.webdriver import Chrome, ChromeOptions
from bs4 import BeautifulSoup
chrome_options = ChromeOptions()
chrome_options.add_argument('--headless')
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
def metadata_from_url(url):
s = Service('/path/to/chromedriver')
web = Chrome(service=s,options=chrome_options)
web.get(url)
xml = web.page_source
web.quit()
soup = BeautifulSoup(xml, features='html.parser')
metas = [x for x in soup.find_all('meta') if x.get('property')]
return [{x.get('property'): x.get('content')} for x in metas]
result = metadata_from_url('https://www.website/url/') # list of dicts