Skip to content

EXTRACTING TEXT - get_text() and string methods

Python
#!/usr/bin/env python3
"""EXTRACTING TEXT - get_text() and string methods"""
from bs4 import BeautifulSoup
html = """
<html><body>
<div>
    <h1>Title</h1>
    <p>Paragraph with <b>bold</b> and <i>italic</i> text.</p>
</div>
</body></html>
"""
soup = BeautifulSoup(html, 'html.parser')
print("Extracting Text:")
div = soup.find('div')
print(f"  get_text(): {div.get_text()}")
print(f"  get_text(strip=True): {div.get_text(strip=True)}")
print(f"  get_text(separator=' '): {div.get_text(separator=' ', strip=True)}")
h1 = soup.find('h1')
print(f"  h1.string: {h1.string}")
p = soup.find('p')
print(f"  p text: {p.get_text()}")