LINE-BY-LINE READING - Efficient processing of large files
Python
#!/usr/bin/env python3
"""
LINE-BY-LINE READING - Efficient processing of large files
Demonstrates iterating through file lines without loading entire file
"""
import os
import tempfile
print("=" * 60)
print("LINE-BY-LINE READING - Efficient File Processing")
print("=" * 60)
temp_dir = tempfile.gettempdir()
data_file = os.path.join(temp_dir, "server_log.txt")
# Create sample log file
print("\n1. Creating Sample Log File")
print("-" * 40)
with open(data_file, 'w') as f:
f.write("INFO: Server started on port 8080\n")
f.write("DEBUG: Connection pool initialized\n")
f.write("ERROR: Failed to connect to database\n")
f.write("INFO: Retrying connection...\n")
f.write("INFO: Database connected successfully\n")
f.write("WARNING: High memory usage detected\n")
f.write("ERROR: Request timeout on /api/users\n")
f.write("INFO: Request completed in 245ms\n")
print(f"Created {data_file}")
# Example 1: Direct iteration (most Pythonic)
print("\n2. Iterating Directly Over File Object")
print("-" * 40)
line_count = 0
with open(data_file, 'r') as f:
for line in f:
line_count += 1
print(f"Line {line_count}: {line.strip()}")
# Example 2: Filter lines by keyword
print("\n3. Filtering Lines - Show Only Errors")
print("-" * 40)
with open(data_file, 'r') as f:
for line in f:
if "ERROR" in line:
print(f" {line.strip()}")
# Example 3: Process and collect data
print("\n4. Counting Log Levels")
print("-" * 40)
log_levels = {"INFO": 0, "DEBUG": 0, "ERROR": 0, "WARNING": 0}
with open(data_file, 'r') as f:
for line in f:
for level in log_levels:
if level in line:
log_levels[level] += 1
break
for level, count in log_levels.items():
print(f" {level}: {count}")
# Example 4: Enumerate for line numbers
print("\n5. Using Enumerate for Line Numbers")
print("-" * 40)
with open(data_file, 'r') as f:
for line_num, line in enumerate(f, 1):
if "ERROR" in line or "WARNING" in line:
print(f" Line {line_num}: {line.strip()}")
# Example 5: Strip and clean lines
print("\n6. Processing with String Methods")
print("-" * 40)
with open(data_file, 'r') as f:
for line in f:
clean_line = line.strip()
if clean_line: # Skip empty lines
parts = clean_line.split(":", 1)
if len(parts) == 2:
level = parts[0]
message = parts[1].strip()
print(f" [{level}] {message}")
# Example 6: First N lines
print("\n7. Reading First 3 Lines Only")
print("-" * 40)
with open(data_file, 'r') as f:
for i, line in enumerate(f):
if i >= 3:
break
print(f" {line.strip()}")
# Example 7: Processing with conditions
print("\n8. Multi-Condition Filtering")
print("-" * 40)
with open(data_file, 'r') as f:
for line in f:
line = line.strip()
if ("ERROR" in line or "WARNING" in line) and len(line) > 30:
print(f" Long alert: {line}")
# Example 8: Building from file
print("\n9. Creating Summary Report")
print("-" * 40)
summary_file = os.path.join(temp_dir, "summary.txt")
error_count = 0
warning_count = 0
info_count = 0
with open(data_file, 'r') as f:
for line in f:
if "ERROR" in line:
error_count += 1
elif "WARNING" in line:
warning_count += 1
elif "INFO" in line:
info_count += 1
with open(summary_file, 'w') as f:
f.write("LOG SUMMARY\n")
f.write("=" * 40 + "\n")
f.write(f"Total Errors: {error_count}\n")
f.write(f"Total Warnings: {warning_count}\n")
f.write(f"Total Info: {info_count}\n")
with open(summary_file, 'r') as f:
print(f.read())
# Cleanup
for f in [data_file, summary_file]:
if os.path.exists(f):
os.remove(f)
print("\n" + "=" * 60)
print("Key Points:")
print(" - Iterate directly: for line in file")
print(" - Memory efficient for large files")
print(" - Can filter, count, or process each line")
print(" - Use enumerate() for line numbers")
print(" - Use strip() to remove newlines")
print("=" * 60)