Skip to content

SEARCHING WITH REGEX - Pattern matching

Python
#!/usr/bin/env python3
"""SEARCHING WITH REGEX - Pattern matching"""
from bs4 import BeautifulSoup
import re
html = """
<html><body>
<a href="page1.html">Page 1</a>
<a href="page2.html">Page 2</a>
<a href="https://external.com">External</a>
<p class="text-primary">Text 1</p>
<p class="text-secondary">Text 2</p>
</body></html>
"""
soup = BeautifulSoup(html, 'html.parser')
print("Regex Searching:")
html_links = soup.find_all('a', href=re.compile(r'\.html$'))
print(f"  .html links: {[a['href'] for a in html_links]}")
text_classes = soup.find_all(class_=re.compile(r'^text-'))
print(f"  text-* classes: {[p.get('class') for p in text_classes]}")