A basic web crawler that retrieves and extracts links from web pages, starting with a specified URL.
- Python 3.x
requests
librarybeautifulsoup4
library
To install the required libraries, run:
pip install requests beautifulsoup4
from web_crawler import web_crawler
start_url = "https://example.com"
crawled_pages = web_crawler(start_url, max_pages=10)
from web_crawler import web_crawler
start_url = "https://as.com"
crawled_pages = web_crawler(start_url)
print("Crawled pages:")
for page in crawled_pages:
print(page)