Skip to content

BASIC BEAUTIFULSOUP USAGE - Creating and parsing HTML

Python
#!/usr/bin/env python3
"""
BASIC BEAUTIFULSOUP USAGE - Creating and parsing HTML
Demonstrates basic BeautifulSoup initialization and parsing
"""

from bs4 import BeautifulSoup

print("=" * 60)
print("BASIC BEAUTIFULSOUP USAGE")
print("=" * 60)

# Sample HTML
html = """
<html>
<head><title>Sample Page</title></head>
<body>
    <h1>Welcome</h1>
    <p class="intro">This is a paragraph.</p>
    <p id="main">Another paragraph with <b>bold</b> text.</p>
</body>
</html>
"""

# Example 1: Create BeautifulSoup object
print("\n1. Creating BeautifulSoup Object")
print("-" * 40)
soup = BeautifulSoup(html, 'html.parser')
print(f"  Type: {type(soup)}")
print(f"  Parser: html.parser")

# Example 2: Get title
print("\n2. Accessing Title")
print("-" * 40)
print(f"  Title tag: {soup.title}")
print(f"  Title text: {soup.title.string}")

# Example 3: Get first tag
print("\n3. Getting First Tag")
print("-" * 40)
print(f"  First h1: {soup.h1}")
print(f"  First p: {soup.p}")

# Example 4: Tag name and attributes
print("\n4. Tag Properties")
print("-" * 40)
first_p = soup.p
print(f"  Tag name: {first_p.name}")
print(f"  Attributes: {first_p.attrs}")
print(f"  Class: {first_p.get('class')}")

# Example 5: Get text content
print("\n5. Extracting Text")
print("-" * 40)
print(f"  Body text: {soup.body.get_text(strip=True)[:50]}...")

print("\n" + "=" * 60)
print("Key Points:")
print("  - BeautifulSoup(html, 'html.parser')")
print("  - Access tags directly: soup.title")
print("  - get_text() extracts all text")
print("  - attrs contains tag attributes")
print("=" * 60)