import os, time, hashlib import tkinter as tk from tkinter import filedialog, messagebox, simpledialog def md5(fpath): hash_md5 = hashlib.md5() with open(fpath, "rb") as f: for chunk in iter(lambda: f.read(4096), b""): hash_md5.update(chunk) return hash_md5.hexdigest() root = tk.Tk() root.withdraw() base_folder = filedialog.askdirectory(parent=root, initialdir="/", title='Please select the base-directory') output_folder_name = "000_merged" base_subfolders = [] mctr = 0 sctr = 0 output_folder = os.path.join(base_folder, output_folder_name) if base_folder is not None: # Check if output-folder exists and create it if not if not os.path.isdir(output_folder): os.mkdir(output_folder) # Get folders in Basedir for d in os.listdir(base_folder): d_path = os.path.join(base_folder, d) if os.path.isdir(d_path) and not output_folder_name in d_path: base_subfolders.append(d_path) # iterate over folders for subdir, dirs, files in os.walk(base_folder): # don't process output folder if subdir == base_folder: [dirs.remove(d) for d in list(dirs) if d == output_folder_name] for filename in files: fpath = os.path.join(subdir, filename) target_fpath = fpath for d in base_subfolders: target_fpath = target_fpath.replace(d, output_folder) # Check if target file exist if os.path.isfile(target_fpath): # Check if source file exist if not os.path.isfile(fpath): continue # Check md5 sum of files md5_source = md5(fpath) md5_target = md5(target_fpath) if md5_source != md5_target: # Rename targetfile time_stamp = str(time.time()).encode("utf-8") tmp = os.path.basename(target_fpath) tmp = list(os.path.splitext(tmp)) tmp[0] = tmp[0] + "_" + str(hashlib.md5(time_stamp).hexdigest()) new_filename = "".join(tmp) # Create new path and log the filename target_fpath = os.path.join(os.path.dirname(target_fpath), new_filename) with open(os.path.join(base_folder, "different_files.log"), "a", encoding="utf-8") as logfile: logfile.write(target_fpath + "\n") else: sctr += 1 print("SKIPPING: " + fpath) continue # Create folder-structure in the output-filder os.makedirs(os.path.dirname(target_fpath), exist_ok = True) # move file mctr += 1 try: os.rename(fpath, target_fpath) print("MOVING: " + target_fpath) except FileNotFoundError: print("ERROR: " + target_fpath) print() print("UNIQ FILES: " + str(mctr)) print("SKIPPED FILES: " + str(sctr))