FINDING DUPLICATE FILES
Python
#!/usr/bin/env python3
"""FINDING DUPLICATE FILES"""
import os, tempfile, hashlib
temp = tempfile.gettempdir()
test_dir = os.path.join(temp, "dup_test")
os.makedirs(test_dir)
open(os.path.join(test_dir, "file1.txt"), 'w').write("content1")
open(os.path.join(test_dir, "file2.txt"), 'w').write("content1")
open(os.path.join(test_dir, "file3.txt"), 'w').write("content2")
def file_hash(filepath):
with open(filepath, 'rb') as f:
return hashlib.md5(f.read()).hexdigest()
hashes = {}
for file in os.listdir(test_dir):
filepath = os.path.join(test_dir, file)
h = file_hash(filepath)
hashes.setdefault(h, []).append(file)
print("Duplicate files:")
for h, files in hashes.items():
if len(files) > 1:
print(f" {files}")
import shutil
shutil.rmtree(test_dir)