Skip to content

COMPLETE WEB SCRAPER - Full scraping example

Python
#!/usr/bin/env python3
"""COMPLETE WEB SCRAPER - Full scraping example"""
from bs4 import BeautifulSoup
html = """
<html><body>
<div class="article">
    <h2 class="title">Article Title</h2>
    <span class="author">John Doe</span>
    <span class="date">2024-01-15</span>
    <div class="content">Article content goes here.</div>
    <div class="tags">
        <span class="tag">python</span>
        <span class="tag">web-scraping</span>
    </div>
</div>
</body></html>
"""
class ArticleScraper:
    def __init__(self, html):
        self.soup = BeautifulSoup(html, 'html.parser')
    def extract_article(self):
        article = self.soup.find(class_='article')
        return {
            'title': article.find(class_='title').string,
            'author': article.find(class_='author').string,
            'date': article.find(class_='date').string,
            'content': article.find(class_='content').string,
            'tags': [tag.string for tag in article.find_all(class_='tag')]
        }
scraper = ArticleScraper(html)
article = scraper.extract_article()
print("Article Scraper:")
for key, value in article.items():
    print(f"  {key.capitalize()}: {value}")