RECURSIVE DIRECTORY TRAVERSAL - Using os.walk()
Python
#!/usr/bin/env python3
"""
RECURSIVE DIRECTORY TRAVERSAL - Using os.walk()
Demonstrates recursively walking directory trees
"""
import os
import tempfile
import shutil
print("=" * 60)
print("RECURSIVE DIRECTORY TRAVERSAL - os.walk()")
print("=" * 60)
temp_dir = tempfile.gettempdir()
test_dir = os.path.join(temp_dir, "walk_demo")
# Create complex directory structure
structure = {
"project": {
"src": ["main.py", "utils.py", "config.py"],
"tests": ["test_main.py", "test_utils.py"],
"docs": ["README.md", "API.md"],
"data": {
"input": ["data1.csv", "data2.csv"],
"output": []
}
}
}
def create_structure(base_path, struct):
"""Recursively create directory structure"""
for name, content in struct.items():
path = os.path.join(base_path, name)
if isinstance(content, dict):
os.makedirs(path, exist_ok=True)
create_structure(path, content)
elif isinstance(content, list):
os.makedirs(path, exist_ok=True)
for filename in content:
filepath = os.path.join(path, filename)
with open(filepath, 'w') as f:
f.write(f"Content of {filename}\n")
os.makedirs(test_dir, exist_ok=True)
create_structure(test_dir, structure)
print(f"Created test directory structure: {test_dir}\n")
# Example 1: Basic os.walk()
print("1. Basic os.walk() - Iterate All Directories")
print("-" * 40)
for root, dirs, files in os.walk(test_dir):
level = root.replace(test_dir, '').count(os.sep)
indent = ' ' * 2 * level
print(f"{indent}{os.path.basename(root)}/")
sub_indent = ' ' * 2 * (level + 1)
for filename in files[:3]: # Limit output
print(f"{sub_indent}{filename}")
if len(files) > 3:
print(f"{sub_indent}... and {len(files) - 3} more")
# Example 2: Count all files
print("\n2. Count All Files Recursively")
print("-" * 40)
total_files = 0
total_dirs = 0
for root, dirs, files in os.walk(test_dir):
total_files += len(files)
total_dirs += len(dirs)
print(f" Total directories: {total_dirs}")
print(f" Total files: {total_files}")
# Example 3: Find files by extension
print("\n3. Find All .py Files")
print("-" * 40)
py_files = []
for root, dirs, files in os.walk(test_dir):
for filename in files:
if filename.endswith('.py'):
full_path = os.path.join(root, filename)
relative_path = os.path.relpath(full_path, test_dir)
py_files.append(relative_path)
print(f" Found {len(py_files)} Python files:")
for py_file in py_files:
print(f" {py_file}")
# Example 4: Calculate total size
print("\n4. Calculate Total Directory Size")
print("-" * 40)
total_size = 0
for root, dirs, files in os.walk(test_dir):
for filename in files:
filepath = os.path.join(root, filename)
total_size += os.path.getsize(filepath)
print(f" Total size: {total_size} bytes ({total_size / 1024:.2f} KB)")
# Example 5: Find empty directories
print("\n5. Find Empty Directories")
print("-" * 40)
empty_dirs = []
for root, dirs, files in os.walk(test_dir):
if not dirs and not files:
empty_dirs.append(os.path.relpath(root, test_dir))
if empty_dirs:
print(f" Found {len(empty_dirs)} empty directories:")
for empty_dir in empty_dirs:
print(f" {empty_dir}/")
else:
print(" No empty directories found")
# Example 6: Top-down vs bottom-up
print("\n6. Top-Down (default) vs Bottom-Up Traversal")
print("-" * 40)
print(" Top-down (topdown=True):")
count = 0
for root, dirs, files in os.walk(test_dir, topdown=True):
count += 1
if count <= 3:
print(f" {os.path.relpath(root, test_dir) or '.'}")
print("\n Bottom-up (topdown=False):")
count = 0
for root, dirs, files in os.walk(test_dir, topdown=False):
count += 1
if count <= 3:
print(f" {os.path.relpath(root, test_dir) or '.'}")
# Example 7: Filter directories during walk
print("\n7. Skip Directories During Walk")
print("-" * 40)
print(" Walking, but skipping 'tests' directory:")
for root, dirs, files in os.walk(test_dir):
# Skip 'tests' directory
if 'tests' in dirs:
dirs.remove('tests')
rel_path = os.path.relpath(root, test_dir)
print(f" {rel_path or '.'}/")
# Example 8: Find largest files
print("\n8. Find Largest Files")
print("-" * 40)
file_sizes = []
for root, dirs, files in os.walk(test_dir):
for filename in files:
filepath = os.path.join(root, filename)
size = os.path.getsize(filepath)
rel_path = os.path.relpath(filepath, test_dir)
file_sizes.append((rel_path, size))
file_sizes.sort(key=lambda x: x[1], reverse=True)
print(" Top 5 largest files:")
for filepath, size in file_sizes[:5]:
print(f" {filepath:30s} {size:>6} bytes")
# Example 9: Generate file list
print("\n9. Generate Complete File Listing")
print("-" * 40)
listing_file = os.path.join(test_dir, "file_listing.txt")
with open(listing_file, 'w') as f:
f.write("DIRECTORY LISTING\n")
f.write("=" * 60 + "\n\n")
for root, dirs, files in os.walk(test_dir):
rel_root = os.path.relpath(root, test_dir)
if rel_root != '.':
f.write(f"\n{rel_root}/\n")
f.write("-" * 40 + "\n")
for filename in files:
if filename != "file_listing.txt":
filepath = os.path.join(root, filename)
size = os.path.getsize(filepath)
f.write(f" {filename:30s} {size:>8} bytes\n")
print(f" Created listing: file_listing.txt")
# Example 10: Build directory tree structure
print("\n10. Build Directory Tree Dictionary")
print("-" * 40)
def build_tree(path):
"""Build nested dictionary representing directory tree"""
tree = {'name': os.path.basename(path), 'type': 'directory', 'children': []}
try:
items = os.listdir(path)
except PermissionError:
return tree
for item in items:
item_path = os.path.join(path, item)
if os.path.isdir(item_path):
tree['children'].append(build_tree(item_path))
else:
tree['children'].append({
'name': item,
'type': 'file',
'size': os.path.getsize(item_path)
})
return tree
tree = build_tree(os.path.join(test_dir, "project"))
print(f" Tree structure built for 'project' directory")
print(f" Root: {tree['name']}")
print(f" Children: {len(tree['children'])}")
# Cleanup
shutil.rmtree(test_dir)
print("\n" + "=" * 60)
print("Key Points:")
print(" - os.walk() yields (root, dirs, files) tuples")
print(" - Recursively traverses directory tree")
print(" - Can modify dirs list to skip directories")
print(" - topdown=True (default) processes parents first")
print(" - Perfect for finding files, calculating sizes")
print("=" * 60)