SIMPLE WEB CRAWLER - Following links
Python
#!/usr/bin/env python3
"""SIMPLE WEB CRAWLER - Following links"""
import urllib.request
import urllib.parse
import re
print("Simple Web Crawler:")
def get_links(url):
try:
with urllib.request.urlopen(url, timeout=5) as response:
html = response.read().decode('utf-8', errors='ignore')
links = re.findall(r'href=["\']([^"\']+)["\']', html)
return links
except:
return []
start_url = "http://httpbin.org/links/5"
print(f" Starting at: {start_url}")
links = get_links(start_url)
print(f" Found {len(links)} links:")
for i, link in enumerate(links[:10], 1):
print(f" {i}. {link}")