FILE COMPARISON - Comparing contents of two files
Python
#!/usr/bin/env python3
"""
FILE COMPARISON - Comparing contents of two files
Demonstrates finding differences between files
"""
import os
import tempfile
print("=" * 60)
print("FILE COMPARISON - Finding Differences")
print("=" * 60)
temp_dir = tempfile.gettempdir()
# Create sample files
file1 = os.path.join(temp_dir, "version1.txt")
file2 = os.path.join(temp_dir, "version2.txt")
with open(file1, 'w') as f:
f.write("Line 1: Same in both\n")
f.write("Line 2: Different in file 1\n")
f.write("Line 3: Same in both\n")
f.write("Line 4: Only in file 1\n")
f.write("Line 5: Same in both\n")
with open(file2, 'w') as f:
f.write("Line 1: Same in both\n")
f.write("Line 2: Different in file 2\n")
f.write("Line 3: Same in both\n")
f.write("Line 5: Same in both\n")
f.write("Line 6: Only in file 2\n")
print("Created two files for comparison\n")
# Example 1: Exact comparison
print("1. Exact File Comparison")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
content1 = f1.read()
content2 = f2.read()
if content1 == content2:
print(" Files are identical")
else:
print(" Files are different")
print(f" File 1 size: {len(content1)} chars")
print(f" File 2 size: {len(content2)} chars")
# Example 2: Line-by-line comparison
print("\n2. Line-by-Line Comparison")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()
print(f"File 1: {len(lines1)} lines")
print(f"File 2: {len(lines2)} lines")
max_lines = max(len(lines1), len(lines2))
differences = 0
for i in range(max_lines):
line1 = lines1[i] if i < len(lines1) else None
line2 = lines2[i] if i < len(lines2) else None
if line1 != line2:
differences += 1
print(f"\nDifference at line {i + 1}:")
if line1:
print(f" File1: {line1.rstrip()}")
else:
print(f" File1: (missing)")
if line2:
print(f" File2: {line2.rstrip()}")
else:
print(f" File2: (missing)")
print(f"\nTotal differences: {differences}")
# Example 3: Find unique lines
print("\n3. Find Unique Lines in Each File")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
lines1 = set(f1.readlines())
lines2 = set(f2.readlines())
only_in_file1 = lines1 - lines2
only_in_file2 = lines2 - lines1
print("Only in File 1:")
for line in sorted(only_in_file1):
print(f" {line.rstrip()}")
print("\nOnly in File 2:")
for line in sorted(only_in_file2):
print(f" {line.rstrip()}")
# Example 4: Find common lines
print("\n4. Find Common Lines")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
lines1 = set(f1.readlines())
lines2 = set(f2.readlines())
common = lines1 & lines2
print(f"Found {len(common)} common lines:")
for line in sorted(common):
print(f" {line.rstrip()}")
# Example 5: Character-by-character comparison
print("\n5. Character-by-Character Comparison")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
content1 = f1.read()
content2 = f2.read()
min_len = min(len(content1), len(content2))
first_diff = -1
for i in range(min_len):
if content1[i] != content2[i]:
first_diff = i
break
if first_diff >= 0:
print(f" First difference at position {first_diff}")
print(f" File1: '{content1[first_diff]}'")
print(f" File2: '{content2[first_diff]}'")
elif len(content1) != len(content2):
print(f" Files identical up to position {min_len}")
print(f" But different lengths: {len(content1)} vs {len(content2)}")
else:
print(" Files are identical")
# Example 6: Generate diff report
print("\n6. Generate Difference Report")
print("-" * 40)
diff_report = os.path.join(temp_dir, "diff_report.txt")
with open(file1, 'r') as f1, open(file2, 'r') as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()
with open(diff_report, 'w') as report:
report.write("FILE COMPARISON REPORT\n")
report.write("=" * 60 + "\n\n")
report.write(f"File 1: {file1}\n")
report.write(f"File 2: {file2}\n\n")
report.write(f"Lines in File 1: {len(lines1)}\n")
report.write(f"Lines in File 2: {len(lines2)}\n\n")
report.write("Line-by-Line Comparison:\n")
report.write("-" * 60 + "\n")
max_lines = max(len(lines1), len(lines2))
for i in range(max_lines):
line1 = lines1[i].rstrip() if i < len(lines1) else "(missing)"
line2 = lines2[i].rstrip() if i < len(lines2) else "(missing)"
status = "SAME" if line1 == line2 else "DIFF"
report.write(f"Line {i + 1:3d} [{status}]\n")
if line1 != line2:
report.write(f" < {line1}\n")
report.write(f" > {line2}\n")
print(f"Generated report: {diff_report}")
# Show report
with open(diff_report, 'r') as f:
print("\nReport content:")
print(f.read())
# Example 7: Similarity percentage
print("\n7. Calculate Similarity Percentage")
print("-" * 40)
with open(file1, 'r') as f1, open(file2, 'r') as f2:
lines1 = f1.readlines()
lines2 = f2.readlines()
max_lines = max(len(lines1), len(lines2))
matching_lines = 0
for i in range(min(len(lines1), len(lines2))):
if lines1[i] == lines2[i]:
matching_lines += 1
similarity = (matching_lines / max_lines * 100) if max_lines > 0 else 100
print(f" Matching lines: {matching_lines}/{max_lines}")
print(f" Similarity: {similarity:.1f}%")
# Example 8: Binary file comparison
print("\n8. Binary File Comparison")
print("-" * 40)
bin1 = os.path.join(temp_dir, "file1.bin")
bin2 = os.path.join(temp_dir, "file2.bin")
with open(bin1, 'wb') as f:
f.write(bytes([1, 2, 3, 4, 5]))
with open(bin2, 'wb') as f:
f.write(bytes([1, 2, 9, 4, 5]))
with open(bin1, 'rb') as f1, open(bin2, 'rb') as f2:
data1 = f1.read()
data2 = f2.read()
print(f" Binary 1: {list(data1)}")
print(f" Binary 2: {list(data2)}")
if data1 == data2:
print(" Binary files are identical")
else:
print(" Binary files differ")
for i in range(min(len(data1), len(data2))):
if data1[i] != data2[i]:
print(f" First difference at byte {i}: {data1[i]} vs {data2[i]}")
break
# Cleanup
for f in [file1, file2, diff_report, bin1, bin2]:
if os.path.exists(f):
os.remove(f)
print("\n" + "=" * 60)
print("Key Points:")
print(" - Compare entire content or line-by-line")
print(" - Find unique and common lines")
print(" - Calculate similarity percentage")
print(" - Generate diff reports")
print(" - Works for text and binary files")
print("=" * 60)