Skip to content

LOG FILE ANALYZER - Parsing and analyzing log files

Python
#!/usr/bin/env python3
"""
LOG FILE ANALYZER - Parsing and analyzing log files
Demonstrates extracting information from structured logs
"""

import os
import tempfile
from datetime import datetime

print("=" * 60)
print("LOG FILE ANALYZER - Parsing Structured Logs")
print("=" * 60)

temp_dir = tempfile.gettempdir()

# Create sample log file
log_file = os.path.join(temp_dir, "application.log")
with open(log_file, 'w') as f:
    f.write("2024-01-15 08:00:00 INFO Server started on port 8080\n")
    f.write("2024-01-15 08:00:05 INFO Database connection established\n")
    f.write("2024-01-15 08:05:12 DEBUG User authentication attempt: user123\n")
    f.write("2024-01-15 08:05:13 INFO User user123 logged in successfully\n")
    f.write("2024-01-15 08:10:22 WARNING High memory usage: 85%\n")
    f.write("2024-01-15 08:15:45 ERROR Failed to connect to external API\n")
    f.write("2024-01-15 08:15:46 INFO Retrying API connection...\n")
    f.write("2024-01-15 08:15:50 INFO API connection successful\n")
    f.write("2024-01-15 08:20:33 ERROR Database query timeout\n")
    f.write("2024-01-15 08:25:11 WARNING Disk space low: 10% remaining\n")
    f.write("2024-01-15 08:30:00 INFO Backup completed successfully\n")
    f.write("2024-01-15 08:35:22 DEBUG Cache cleared: 1500 entries\n")
    f.write("2024-01-15 08:40:15 ERROR Connection refused: service_xyz\n")
    f.write("2024-01-15 08:45:30 INFO User user456 logged in successfully\n")
    f.write("2024-01-15 08:50:00 INFO Daily report generated\n")

print(f"Created sample log: {log_file}\n")

# Example 1: Count log levels
print("1. Count Messages by Log Level")
print("-" * 40)
levels = {"INFO": 0, "DEBUG": 0, "WARNING": 0, "ERROR": 0}

with open(log_file, 'r') as f:
    for line in f:
        for level in levels:
            if level in line:
                levels[level] += 1
                break

for level, count in levels.items():
    print(f"  {level:8s}: {count}")

# Example 2: Extract all errors
print("\n2. Extract All Error Messages")
print("-" * 40)
error_file = os.path.join(temp_dir, "errors.log")

error_count = 0
with open(log_file, 'r') as src, open(error_file, 'w') as dst:
    for line in src:
        if "ERROR" in line:
            dst.write(line)
            error_count += 1

print(f"Extracted {error_count} errors to {error_file}")
with open(error_file, 'r') as f:
    for line in f:
        print(f"  {line.rstrip()}")

# Example 3: Parse log structure
print("\n3. Parse Log Line Structure")
print("-" * 40)
parsed_entries = []

with open(log_file, 'r') as f:
    for line in f:
        parts = line.strip().split(' ', 3)
        if len(parts) >= 4:
            entry = {
                'date': parts[0],
                'time': parts[1],
                'level': parts[2],
                'message': parts[3]
            }
            parsed_entries.append(entry)

print(f"Parsed {len(parsed_entries)} entries")
print("First 3 entries:")
for entry in parsed_entries[:3]:
    print(f"  [{entry['level']}] {entry['time']}: {entry['message']}")

# Example 4: Filter by time range
print("\n4. Filter Logs by Time Range")
print("-" * 40)
start_time = "08:15:00"
end_time = "08:30:00"

filtered = []
with open(log_file, 'r') as f:
    for line in f:
        parts = line.strip().split(' ', 3)
        if len(parts) >= 2:
            time = parts[1]
            if start_time <= time <= end_time:
                filtered.append(line.rstrip())

print(f"Logs between {start_time} and {end_time}:")
for line in filtered:
    print(f"  {line}")

# Example 5: Search for specific keywords
print("\n5. Search for Keyword: 'connection'")
print("-" * 40)
keyword = "connection"
matches = []

with open(log_file, 'r') as f:
    for line_num, line in enumerate(f, 1):
        if keyword.lower() in line.lower():
            matches.append((line_num, line.rstrip()))

print(f"Found {len(matches)} matches:")
for line_num, line in matches:
    print(f"  Line {line_num}: {line}")

# Example 6: Generate summary report
print("\n6. Generate Summary Report")
print("-" * 40)
summary_file = os.path.join(temp_dir, "log_summary.txt")

# Collect statistics
total_lines = 0
level_counts = {"INFO": 0, "DEBUG": 0, "WARNING": 0, "ERROR": 0}
first_timestamp = None
last_timestamp = None

with open(log_file, 'r') as f:
    for line in f:
        total_lines += 1
        parts = line.strip().split(' ', 3)

        if len(parts) >= 3:
            timestamp = f"{parts[0]} {parts[1]}"
            if first_timestamp is None:
                first_timestamp = timestamp
            last_timestamp = timestamp

            level = parts[2]
            if level in level_counts:
                level_counts[level] += 1

# Write summary
with open(summary_file, 'w') as f:
    f.write("LOG FILE SUMMARY REPORT\n")
    f.write("=" * 60 + "\n\n")
    f.write(f"File: {log_file}\n")
    f.write(f"Total Entries: {total_lines}\n")
    f.write(f"First Entry: {first_timestamp}\n")
    f.write(f"Last Entry: {last_timestamp}\n\n")

    f.write("Breakdown by Level:\n")
    f.write("-" * 40 + "\n")
    for level, count in level_counts.items():
        percentage = (count / total_lines * 100) if total_lines > 0 else 0
        f.write(f"  {level:8s}: {count:3d} ({percentage:5.1f}%)\n")

    f.write("\n")
    if level_counts['ERROR'] > 0:
        f.write(f"⚠ ALERT: {level_counts['ERROR']} errors detected\n")
    if level_counts['WARNING'] > 0:
        f.write(f"⚠ WARNING: {level_counts['WARNING']} warnings detected\n")

with open(summary_file, 'r') as f:
    print(f.read())

# Example 7: Extract user activity
print("\n7. Extract User Activity")
print("-" * 40)
user_activity = []

with open(log_file, 'r') as f:
    for line in f:
        if "user" in line.lower() and "logged in" in line.lower():
            parts = line.strip().split(' ', 3)
            if len(parts) >= 4:
                time = parts[1]
                message = parts[3]
                user_activity.append((time, message))

print(f"Found {len(user_activity)} user login events:")
for time, message in user_activity:
    print(f"  {time}: {message}")

# Example 8: Categorize messages
print("\n8. Categorize Messages")
print("-" * 40)
categories = {
    "Authentication": ["login", "logout", "authentication"],
    "Database": ["database", "query"],
    "Network": ["connection", "API", "service"],
    "System": ["memory", "disk", "backup"]
}

categorized = {cat: [] for cat in categories}

with open(log_file, 'r') as f:
    for line in f:
        line_lower = line.lower()
        for category, keywords in categories.items():
            if any(keyword in line_lower for keyword in keywords):
                categorized[category].append(line.rstrip())
                break

for category, lines in categorized.items():
    if lines:
        print(f"\n{category}:")
        for line in lines:
            print(f"  {line}")

# Cleanup
for f in [log_file, error_file, summary_file]:
    if os.path.exists(f):
        os.remove(f)

print("\n" + "=" * 60)
print("Key Points:")
print("  - Parse structured log format")
print("  - Filter by level, time, or keywords")
print("  - Count and categorize messages")
print("  - Generate summary reports")
print("  - Extract specific event types")
print("=" * 60)