Skip to content

RECURSIVE DIRECTORY TRAVERSAL - Using os.walk()

Python
#!/usr/bin/env python3
"""
RECURSIVE DIRECTORY TRAVERSAL - Using os.walk()
Demonstrates recursively walking directory trees
"""

import os
import tempfile
import shutil

print("=" * 60)
print("RECURSIVE DIRECTORY TRAVERSAL - os.walk()")
print("=" * 60)

temp_dir = tempfile.gettempdir()
test_dir = os.path.join(temp_dir, "walk_demo")

# Create complex directory structure
structure = {
    "project": {
        "src": ["main.py", "utils.py", "config.py"],
        "tests": ["test_main.py", "test_utils.py"],
        "docs": ["README.md", "API.md"],
        "data": {
            "input": ["data1.csv", "data2.csv"],
            "output": []
        }
    }
}

def create_structure(base_path, struct):
    """Recursively create directory structure"""
    for name, content in struct.items():
        path = os.path.join(base_path, name)
        if isinstance(content, dict):
            os.makedirs(path, exist_ok=True)
            create_structure(path, content)
        elif isinstance(content, list):
            os.makedirs(path, exist_ok=True)
            for filename in content:
                filepath = os.path.join(path, filename)
                with open(filepath, 'w') as f:
                    f.write(f"Content of {filename}\n")

os.makedirs(test_dir, exist_ok=True)
create_structure(test_dir, structure)

print(f"Created test directory structure: {test_dir}\n")

# Example 1: Basic os.walk()
print("1. Basic os.walk() - Iterate All Directories")
print("-" * 40)
for root, dirs, files in os.walk(test_dir):
    level = root.replace(test_dir, '').count(os.sep)
    indent = ' ' * 2 * level
    print(f"{indent}{os.path.basename(root)}/")
    sub_indent = ' ' * 2 * (level + 1)
    for filename in files[:3]:  # Limit output
        print(f"{sub_indent}{filename}")
    if len(files) > 3:
        print(f"{sub_indent}... and {len(files) - 3} more")

# Example 2: Count all files
print("\n2. Count All Files Recursively")
print("-" * 40)
total_files = 0
total_dirs = 0

for root, dirs, files in os.walk(test_dir):
    total_files += len(files)
    total_dirs += len(dirs)

print(f"  Total directories: {total_dirs}")
print(f"  Total files: {total_files}")

# Example 3: Find files by extension
print("\n3. Find All .py Files")
print("-" * 40)
py_files = []

for root, dirs, files in os.walk(test_dir):
    for filename in files:
        if filename.endswith('.py'):
            full_path = os.path.join(root, filename)
            relative_path = os.path.relpath(full_path, test_dir)
            py_files.append(relative_path)

print(f"  Found {len(py_files)} Python files:")
for py_file in py_files:
    print(f"    {py_file}")

# Example 4: Calculate total size
print("\n4. Calculate Total Directory Size")
print("-" * 40)
total_size = 0

for root, dirs, files in os.walk(test_dir):
    for filename in files:
        filepath = os.path.join(root, filename)
        total_size += os.path.getsize(filepath)

print(f"  Total size: {total_size} bytes ({total_size / 1024:.2f} KB)")

# Example 5: Find empty directories
print("\n5. Find Empty Directories")
print("-" * 40)
empty_dirs = []

for root, dirs, files in os.walk(test_dir):
    if not dirs and not files:
        empty_dirs.append(os.path.relpath(root, test_dir))

if empty_dirs:
    print(f"  Found {len(empty_dirs)} empty directories:")
    for empty_dir in empty_dirs:
        print(f"    {empty_dir}/")
else:
    print("  No empty directories found")

# Example 6: Top-down vs bottom-up
print("\n6. Top-Down (default) vs Bottom-Up Traversal")
print("-" * 40)

print("  Top-down (topdown=True):")
count = 0
for root, dirs, files in os.walk(test_dir, topdown=True):
    count += 1
    if count <= 3:
        print(f"    {os.path.relpath(root, test_dir) or '.'}")

print("\n  Bottom-up (topdown=False):")
count = 0
for root, dirs, files in os.walk(test_dir, topdown=False):
    count += 1
    if count <= 3:
        print(f"    {os.path.relpath(root, test_dir) or '.'}")

# Example 7: Filter directories during walk
print("\n7. Skip Directories During Walk")
print("-" * 40)

print("  Walking, but skipping 'tests' directory:")
for root, dirs, files in os.walk(test_dir):
    # Skip 'tests' directory
    if 'tests' in dirs:
        dirs.remove('tests')

    rel_path = os.path.relpath(root, test_dir)
    print(f"    {rel_path or '.'}/")

# Example 8: Find largest files
print("\n8. Find Largest Files")
print("-" * 40)
file_sizes = []

for root, dirs, files in os.walk(test_dir):
    for filename in files:
        filepath = os.path.join(root, filename)
        size = os.path.getsize(filepath)
        rel_path = os.path.relpath(filepath, test_dir)
        file_sizes.append((rel_path, size))

file_sizes.sort(key=lambda x: x[1], reverse=True)

print("  Top 5 largest files:")
for filepath, size in file_sizes[:5]:
    print(f"    {filepath:30s} {size:>6} bytes")

# Example 9: Generate file list
print("\n9. Generate Complete File Listing")
print("-" * 40)
listing_file = os.path.join(test_dir, "file_listing.txt")

with open(listing_file, 'w') as f:
    f.write("DIRECTORY LISTING\n")
    f.write("=" * 60 + "\n\n")

    for root, dirs, files in os.walk(test_dir):
        rel_root = os.path.relpath(root, test_dir)
        if rel_root != '.':
            f.write(f"\n{rel_root}/\n")
            f.write("-" * 40 + "\n")

        for filename in files:
            if filename != "file_listing.txt":
                filepath = os.path.join(root, filename)
                size = os.path.getsize(filepath)
                f.write(f"  {filename:30s} {size:>8} bytes\n")

print(f"  Created listing: file_listing.txt")

# Example 10: Build directory tree structure
print("\n10. Build Directory Tree Dictionary")
print("-" * 40)

def build_tree(path):
    """Build nested dictionary representing directory tree"""
    tree = {'name': os.path.basename(path), 'type': 'directory', 'children': []}

    try:
        items = os.listdir(path)
    except PermissionError:
        return tree

    for item in items:
        item_path = os.path.join(path, item)
        if os.path.isdir(item_path):
            tree['children'].append(build_tree(item_path))
        else:
            tree['children'].append({
                'name': item,
                'type': 'file',
                'size': os.path.getsize(item_path)
            })

    return tree

tree = build_tree(os.path.join(test_dir, "project"))
print(f"  Tree structure built for 'project' directory")
print(f"  Root: {tree['name']}")
print(f"  Children: {len(tree['children'])}")

# Cleanup
shutil.rmtree(test_dir)

print("\n" + "=" * 60)
print("Key Points:")
print("  - os.walk() yields (root, dirs, files) tuples")
print("  - Recursively traverses directory tree")
print("  - Can modify dirs list to skip directories")
print("  - topdown=True (default) processes parents first")
print("  - Perfect for finding files, calculating sizes")
print("=" * 60)