LOG FILE ANALYZER - Parsing and analyzing log files
Python
#!/usr/bin/env python3
"""
LOG FILE ANALYZER - Parsing and analyzing log files
Demonstrates extracting information from structured logs
"""
import os
import tempfile
from datetime import datetime
print("=" * 60)
print("LOG FILE ANALYZER - Parsing Structured Logs")
print("=" * 60)
temp_dir = tempfile.gettempdir()
# Create sample log file
log_file = os.path.join(temp_dir, "application.log")
with open(log_file, 'w') as f:
f.write("2024-01-15 08:00:00 INFO Server started on port 8080\n")
f.write("2024-01-15 08:00:05 INFO Database connection established\n")
f.write("2024-01-15 08:05:12 DEBUG User authentication attempt: user123\n")
f.write("2024-01-15 08:05:13 INFO User user123 logged in successfully\n")
f.write("2024-01-15 08:10:22 WARNING High memory usage: 85%\n")
f.write("2024-01-15 08:15:45 ERROR Failed to connect to external API\n")
f.write("2024-01-15 08:15:46 INFO Retrying API connection...\n")
f.write("2024-01-15 08:15:50 INFO API connection successful\n")
f.write("2024-01-15 08:20:33 ERROR Database query timeout\n")
f.write("2024-01-15 08:25:11 WARNING Disk space low: 10% remaining\n")
f.write("2024-01-15 08:30:00 INFO Backup completed successfully\n")
f.write("2024-01-15 08:35:22 DEBUG Cache cleared: 1500 entries\n")
f.write("2024-01-15 08:40:15 ERROR Connection refused: service_xyz\n")
f.write("2024-01-15 08:45:30 INFO User user456 logged in successfully\n")
f.write("2024-01-15 08:50:00 INFO Daily report generated\n")
print(f"Created sample log: {log_file}\n")
# Example 1: Count log levels
print("1. Count Messages by Log Level")
print("-" * 40)
levels = {"INFO": 0, "DEBUG": 0, "WARNING": 0, "ERROR": 0}
with open(log_file, 'r') as f:
for line in f:
for level in levels:
if level in line:
levels[level] += 1
break
for level, count in levels.items():
print(f" {level:8s}: {count}")
# Example 2: Extract all errors
print("\n2. Extract All Error Messages")
print("-" * 40)
error_file = os.path.join(temp_dir, "errors.log")
error_count = 0
with open(log_file, 'r') as src, open(error_file, 'w') as dst:
for line in src:
if "ERROR" in line:
dst.write(line)
error_count += 1
print(f"Extracted {error_count} errors to {error_file}")
with open(error_file, 'r') as f:
for line in f:
print(f" {line.rstrip()}")
# Example 3: Parse log structure
print("\n3. Parse Log Line Structure")
print("-" * 40)
parsed_entries = []
with open(log_file, 'r') as f:
for line in f:
parts = line.strip().split(' ', 3)
if len(parts) >= 4:
entry = {
'date': parts[0],
'time': parts[1],
'level': parts[2],
'message': parts[3]
}
parsed_entries.append(entry)
print(f"Parsed {len(parsed_entries)} entries")
print("First 3 entries:")
for entry in parsed_entries[:3]:
print(f" [{entry['level']}] {entry['time']}: {entry['message']}")
# Example 4: Filter by time range
print("\n4. Filter Logs by Time Range")
print("-" * 40)
start_time = "08:15:00"
end_time = "08:30:00"
filtered = []
with open(log_file, 'r') as f:
for line in f:
parts = line.strip().split(' ', 3)
if len(parts) >= 2:
time = parts[1]
if start_time <= time <= end_time:
filtered.append(line.rstrip())
print(f"Logs between {start_time} and {end_time}:")
for line in filtered:
print(f" {line}")
# Example 5: Search for specific keywords
print("\n5. Search for Keyword: 'connection'")
print("-" * 40)
keyword = "connection"
matches = []
with open(log_file, 'r') as f:
for line_num, line in enumerate(f, 1):
if keyword.lower() in line.lower():
matches.append((line_num, line.rstrip()))
print(f"Found {len(matches)} matches:")
for line_num, line in matches:
print(f" Line {line_num}: {line}")
# Example 6: Generate summary report
print("\n6. Generate Summary Report")
print("-" * 40)
summary_file = os.path.join(temp_dir, "log_summary.txt")
# Collect statistics
total_lines = 0
level_counts = {"INFO": 0, "DEBUG": 0, "WARNING": 0, "ERROR": 0}
first_timestamp = None
last_timestamp = None
with open(log_file, 'r') as f:
for line in f:
total_lines += 1
parts = line.strip().split(' ', 3)
if len(parts) >= 3:
timestamp = f"{parts[0]} {parts[1]}"
if first_timestamp is None:
first_timestamp = timestamp
last_timestamp = timestamp
level = parts[2]
if level in level_counts:
level_counts[level] += 1
# Write summary
with open(summary_file, 'w') as f:
f.write("LOG FILE SUMMARY REPORT\n")
f.write("=" * 60 + "\n\n")
f.write(f"File: {log_file}\n")
f.write(f"Total Entries: {total_lines}\n")
f.write(f"First Entry: {first_timestamp}\n")
f.write(f"Last Entry: {last_timestamp}\n\n")
f.write("Breakdown by Level:\n")
f.write("-" * 40 + "\n")
for level, count in level_counts.items():
percentage = (count / total_lines * 100) if total_lines > 0 else 0
f.write(f" {level:8s}: {count:3d} ({percentage:5.1f}%)\n")
f.write("\n")
if level_counts['ERROR'] > 0:
f.write(f"⚠ ALERT: {level_counts['ERROR']} errors detected\n")
if level_counts['WARNING'] > 0:
f.write(f"⚠ WARNING: {level_counts['WARNING']} warnings detected\n")
with open(summary_file, 'r') as f:
print(f.read())
# Example 7: Extract user activity
print("\n7. Extract User Activity")
print("-" * 40)
user_activity = []
with open(log_file, 'r') as f:
for line in f:
if "user" in line.lower() and "logged in" in line.lower():
parts = line.strip().split(' ', 3)
if len(parts) >= 4:
time = parts[1]
message = parts[3]
user_activity.append((time, message))
print(f"Found {len(user_activity)} user login events:")
for time, message in user_activity:
print(f" {time}: {message}")
# Example 8: Categorize messages
print("\n8. Categorize Messages")
print("-" * 40)
categories = {
"Authentication": ["login", "logout", "authentication"],
"Database": ["database", "query"],
"Network": ["connection", "API", "service"],
"System": ["memory", "disk", "backup"]
}
categorized = {cat: [] for cat in categories}
with open(log_file, 'r') as f:
for line in f:
line_lower = line.lower()
for category, keywords in categories.items():
if any(keyword in line_lower for keyword in keywords):
categorized[category].append(line.rstrip())
break
for category, lines in categorized.items():
if lines:
print(f"\n{category}:")
for line in lines:
print(f" {line}")
# Cleanup
for f in [log_file, error_file, summary_file]:
if os.path.exists(f):
os.remove(f)
print("\n" + "=" * 60)
print("Key Points:")
print(" - Parse structured log format")
print(" - Filter by level, time, or keywords")
print(" - Count and categorize messages")
print(" - Generate summary reports")
print(" - Extract specific event types")
print("=" * 60)