Skip to content

LINE-BY-LINE READING - Efficient processing of large files

Python
#!/usr/bin/env python3
"""
LINE-BY-LINE READING - Efficient processing of large files
Demonstrates iterating through file lines without loading entire file
"""

import os
import tempfile

print("=" * 60)
print("LINE-BY-LINE READING - Efficient File Processing")
print("=" * 60)

temp_dir = tempfile.gettempdir()
data_file = os.path.join(temp_dir, "server_log.txt")

# Create sample log file
print("\n1. Creating Sample Log File")
print("-" * 40)
with open(data_file, 'w') as f:
    f.write("INFO: Server started on port 8080\n")
    f.write("DEBUG: Connection pool initialized\n")
    f.write("ERROR: Failed to connect to database\n")
    f.write("INFO: Retrying connection...\n")
    f.write("INFO: Database connected successfully\n")
    f.write("WARNING: High memory usage detected\n")
    f.write("ERROR: Request timeout on /api/users\n")
    f.write("INFO: Request completed in 245ms\n")

print(f"Created {data_file}")

# Example 1: Direct iteration (most Pythonic)
print("\n2. Iterating Directly Over File Object")
print("-" * 40)
line_count = 0
with open(data_file, 'r') as f:
    for line in f:
        line_count += 1
        print(f"Line {line_count}: {line.strip()}")

# Example 2: Filter lines by keyword
print("\n3. Filtering Lines - Show Only Errors")
print("-" * 40)
with open(data_file, 'r') as f:
    for line in f:
        if "ERROR" in line:
            print(f"  {line.strip()}")

# Example 3: Process and collect data
print("\n4. Counting Log Levels")
print("-" * 40)
log_levels = {"INFO": 0, "DEBUG": 0, "ERROR": 0, "WARNING": 0}

with open(data_file, 'r') as f:
    for line in f:
        for level in log_levels:
            if level in line:
                log_levels[level] += 1
                break

for level, count in log_levels.items():
    print(f"  {level}: {count}")

# Example 4: Enumerate for line numbers
print("\n5. Using Enumerate for Line Numbers")
print("-" * 40)
with open(data_file, 'r') as f:
    for line_num, line in enumerate(f, 1):
        if "ERROR" in line or "WARNING" in line:
            print(f"  Line {line_num}: {line.strip()}")

# Example 5: Strip and clean lines
print("\n6. Processing with String Methods")
print("-" * 40)
with open(data_file, 'r') as f:
    for line in f:
        clean_line = line.strip()
        if clean_line:  # Skip empty lines
            parts = clean_line.split(":", 1)
            if len(parts) == 2:
                level = parts[0]
                message = parts[1].strip()
                print(f"  [{level}] {message}")

# Example 6: First N lines
print("\n7. Reading First 3 Lines Only")
print("-" * 40)
with open(data_file, 'r') as f:
    for i, line in enumerate(f):
        if i >= 3:
            break
        print(f"  {line.strip()}")

# Example 7: Processing with conditions
print("\n8. Multi-Condition Filtering")
print("-" * 40)
with open(data_file, 'r') as f:
    for line in f:
        line = line.strip()
        if ("ERROR" in line or "WARNING" in line) and len(line) > 30:
            print(f"  Long alert: {line}")

# Example 8: Building from file
print("\n9. Creating Summary Report")
print("-" * 40)
summary_file = os.path.join(temp_dir, "summary.txt")

error_count = 0
warning_count = 0
info_count = 0

with open(data_file, 'r') as f:
    for line in f:
        if "ERROR" in line:
            error_count += 1
        elif "WARNING" in line:
            warning_count += 1
        elif "INFO" in line:
            info_count += 1

with open(summary_file, 'w') as f:
    f.write("LOG SUMMARY\n")
    f.write("=" * 40 + "\n")
    f.write(f"Total Errors: {error_count}\n")
    f.write(f"Total Warnings: {warning_count}\n")
    f.write(f"Total Info: {info_count}\n")

with open(summary_file, 'r') as f:
    print(f.read())

# Cleanup
for f in [data_file, summary_file]:
    if os.path.exists(f):
        os.remove(f)

print("\n" + "=" * 60)
print("Key Points:")
print("  - Iterate directly: for line in file")
print("  - Memory efficient for large files")
print("  - Can filter, count, or process each line")
print("  - Use enumerate() for line numbers")
print("  - Use strip() to remove newlines")
print("=" * 60)