BASIC BEAUTIFULSOUP USAGE - Creating and parsing HTML
Python
#!/usr/bin/env python3
"""
BASIC BEAUTIFULSOUP USAGE - Creating and parsing HTML
Demonstrates basic BeautifulSoup initialization and parsing
"""
from bs4 import BeautifulSoup
print("=" * 60)
print("BASIC BEAUTIFULSOUP USAGE")
print("=" * 60)
# Sample HTML
html = """
<html>
<head><title>Sample Page</title></head>
<body>
<h1>Welcome</h1>
<p class="intro">This is a paragraph.</p>
<p id="main">Another paragraph with <b>bold</b> text.</p>
</body>
</html>
"""
# Example 1: Create BeautifulSoup object
print("\n1. Creating BeautifulSoup Object")
print("-" * 40)
soup = BeautifulSoup(html, 'html.parser')
print(f" Type: {type(soup)}")
print(f" Parser: html.parser")
# Example 2: Get title
print("\n2. Accessing Title")
print("-" * 40)
print(f" Title tag: {soup.title}")
print(f" Title text: {soup.title.string}")
# Example 3: Get first tag
print("\n3. Getting First Tag")
print("-" * 40)
print(f" First h1: {soup.h1}")
print(f" First p: {soup.p}")
# Example 4: Tag name and attributes
print("\n4. Tag Properties")
print("-" * 40)
first_p = soup.p
print(f" Tag name: {first_p.name}")
print(f" Attributes: {first_p.attrs}")
print(f" Class: {first_p.get('class')}")
# Example 5: Get text content
print("\n5. Extracting Text")
print("-" * 40)
print(f" Body text: {soup.body.get_text(strip=True)[:50]}...")
print("\n" + "=" * 60)
print("Key Points:")
print(" - BeautifulSoup(html, 'html.parser')")
print(" - Access tags directly: soup.title")
print(" - get_text() extracts all text")
print(" - attrs contains tag attributes")
print("=" * 60)