Skip to content

FILE COMPARISON - Comparing contents of two files

Python
#!/usr/bin/env python3
"""
FILE COMPARISON - Comparing contents of two files
Demonstrates finding differences between files
"""

import os
import tempfile

print("=" * 60)
print("FILE COMPARISON - Finding Differences")
print("=" * 60)

temp_dir = tempfile.gettempdir()

# Create sample files
file1 = os.path.join(temp_dir, "version1.txt")
file2 = os.path.join(temp_dir, "version2.txt")

with open(file1, 'w') as f:
    f.write("Line 1: Same in both\n")
    f.write("Line 2: Different in file 1\n")
    f.write("Line 3: Same in both\n")
    f.write("Line 4: Only in file 1\n")
    f.write("Line 5: Same in both\n")

with open(file2, 'w') as f:
    f.write("Line 1: Same in both\n")
    f.write("Line 2: Different in file 2\n")
    f.write("Line 3: Same in both\n")
    f.write("Line 5: Same in both\n")
    f.write("Line 6: Only in file 2\n")

print("Created two files for comparison\n")

# Example 1: Exact comparison
print("1. Exact File Comparison")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
    content1 = f1.read()
    content2 = f2.read()

if content1 == content2:
    print("  Files are identical")
else:
    print("  Files are different")
    print(f"  File 1 size: {len(content1)} chars")
    print(f"  File 2 size: {len(content2)} chars")

# Example 2: Line-by-line comparison
print("\n2. Line-by-Line Comparison")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
    lines1 = f1.readlines()
    lines2 = f2.readlines()

print(f"File 1: {len(lines1)} lines")
print(f"File 2: {len(lines2)} lines")

max_lines = max(len(lines1), len(lines2))
differences = 0

for i in range(max_lines):
    line1 = lines1[i] if i < len(lines1) else None
    line2 = lines2[i] if i < len(lines2) else None

    if line1 != line2:
        differences += 1
        print(f"\nDifference at line {i + 1}:")
        if line1:
            print(f"  File1: {line1.rstrip()}")
        else:
            print(f"  File1: (missing)")
        if line2:
            print(f"  File2: {line2.rstrip()}")
        else:
            print(f"  File2: (missing)")

print(f"\nTotal differences: {differences}")

# Example 3: Find unique lines
print("\n3. Find Unique Lines in Each File")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
    lines1 = set(f1.readlines())
    lines2 = set(f2.readlines())

only_in_file1 = lines1 - lines2
only_in_file2 = lines2 - lines1

print("Only in File 1:")
for line in sorted(only_in_file1):
    print(f"  {line.rstrip()}")

print("\nOnly in File 2:")
for line in sorted(only_in_file2):
    print(f"  {line.rstrip()}")

# Example 4: Find common lines
print("\n4. Find Common Lines")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
    lines1 = set(f1.readlines())
    lines2 = set(f2.readlines())

common = lines1 & lines2
print(f"Found {len(common)} common lines:")
for line in sorted(common):
    print(f"  {line.rstrip()}")

# Example 5: Character-by-character comparison
print("\n5. Character-by-Character Comparison")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
    content1 = f1.read()
    content2 = f2.read()

min_len = min(len(content1), len(content2))
first_diff = -1

for i in range(min_len):
    if content1[i] != content2[i]:
        first_diff = i
        break

if first_diff >= 0:
    print(f"  First difference at position {first_diff}")
    print(f"  File1: '{content1[first_diff]}'")
    print(f"  File2: '{content2[first_diff]}'")
elif len(content1) != len(content2):
    print(f"  Files identical up to position {min_len}")
    print(f"  But different lengths: {len(content1)} vs {len(content2)}")
else:
    print("  Files are identical")

# Example 6: Generate diff report
print("\n6. Generate Difference Report")
print("-" * 40)
diff_report = os.path.join(temp_dir, "diff_report.txt")

with open(file1, 'r') as f1, open(file2, 'r') as f2:
    lines1 = f1.readlines()
    lines2 = f2.readlines()

with open(diff_report, 'w') as report:
    report.write("FILE COMPARISON REPORT\n")
    report.write("=" * 60 + "\n\n")
    report.write(f"File 1: {file1}\n")
    report.write(f"File 2: {file2}\n\n")

    report.write(f"Lines in File 1: {len(lines1)}\n")
    report.write(f"Lines in File 2: {len(lines2)}\n\n")

    report.write("Line-by-Line Comparison:\n")
    report.write("-" * 60 + "\n")

    max_lines = max(len(lines1), len(lines2))
    for i in range(max_lines):
        line1 = lines1[i].rstrip() if i < len(lines1) else "(missing)"
        line2 = lines2[i].rstrip() if i < len(lines2) else "(missing)"

        status = "SAME" if line1 == line2 else "DIFF"
        report.write(f"Line {i + 1:3d} [{status}]\n")
        if line1 != line2:
            report.write(f"  < {line1}\n")
            report.write(f"  > {line2}\n")

print(f"Generated report: {diff_report}")

# Show report
with open(diff_report, 'r') as f:
    print("\nReport content:")
    print(f.read())

# Example 7: Similarity percentage
print("\n7. Calculate Similarity Percentage")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
    lines1 = f1.readlines()
    lines2 = f2.readlines()

max_lines = max(len(lines1), len(lines2))
matching_lines = 0

for i in range(min(len(lines1), len(lines2))):
    if lines1[i] == lines2[i]:
        matching_lines += 1

similarity = (matching_lines / max_lines * 100) if max_lines > 0 else 100

print(f"  Matching lines: {matching_lines}/{max_lines}")
print(f"  Similarity: {similarity:.1f}%")

# Example 8: Binary file comparison
print("\n8. Binary File Comparison")
print("-" * 40)
bin1 = os.path.join(temp_dir, "file1.bin")
bin2 = os.path.join(temp_dir, "file2.bin")

with open(bin1, 'wb') as f:
    f.write(bytes([1, 2, 3, 4, 5]))

with open(bin2, 'wb') as f:
    f.write(bytes([1, 2, 9, 4, 5]))

with open(bin1, 'rb') as f1, open(bin2, 'rb') as f2:
    data1 = f1.read()
    data2 = f2.read()

print(f"  Binary 1: {list(data1)}")
print(f"  Binary 2: {list(data2)}")

if data1 == data2:
    print("  Binary files are identical")
else:
    print("  Binary files differ")
    for i in range(min(len(data1), len(data2))):
        if data1[i] != data2[i]:
            print(f"    First difference at byte {i}: {data1[i]} vs {data2[i]}")
            break

# Cleanup
for f in [file1, file2, diff_report, bin1, bin2]:
    if os.path.exists(f):
        os.remove(f)

print("\n" + "=" * 60)
print("Key Points:")
print("  - Compare entire content or line-by-line")
print("  - Find unique and common lines")
print("  - Calculate similarity percentage")
print("  - Generate diff reports")
print("  - Works for text and binary files")
print("=" * 60)