blackadad/paper-scraper

patent number to PDF

Opened this issue · 0 comments

import re
import requests

ID = "WO2010019397A2"
url = f"https://patents.google.com/patent/{ID}/"

response = requests.get(url)

if response.status_code == 200:
    html = response.text
    pattern = r"https://patentimages\.storage\.googleapis\.com/([\w/]+)/" + re.escape(ID) + r"\.pdf"
    
    match = re.search(pattern, html)
    if match:
        extracted_url = match.group(0)
        print(extracted_url)
    else:
        print("No match found")
else:
    print(f"Failed to fetch the web page. Status code: {response.status_code}")