Merging TimeMachine backup-snapshots

08.01.2022, 22:02 - Autor: Mark B.
Data-recovery programms have a really hard time to handle TimeMachine-backups. Usually you are able to repair the filesystem on a mac and mount the drive so that you can recovery the latest version of the backup. Sometimes the filesystem is in such a bad shape that the mac alone will not do that relaiably. It's also often better to restore also older version in case the latest version is corrupted.

If you open the TimeMachine valume and within that the folder called Backups.backupdb you see a folder-structure like that:

/Volumes/TimeMachine/Backups.backupdb/SomeonesMacPro/2013-11-09-133025
/Volumes/TimeMachine/Backups.backupdb/SomeonesMacPro/2014-01-06-124948
...
/Volumes/TimeMachine/Backups.backupdb/SomeonesMacPro/2018-01-13-134648

... if you try to copy the latest backup-folder and you get errors like folder is causing a circle then this script will puzzle the backups together.

The script can merge the snapshot-folders from newest to oldest backup and check if the MD5-sum of a file match. If not it restores the older file as well and rename it. That's how it merges all snapshots into one folder together. All errors along get logged but ignored to retrive the maximum amount of data possible.

Source:

import os, time, hashlib, shutil
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog

def md5(fpath):
    hash_md5 = hashlib.md5()
    with open(fpath, "rb") as f:
        try:
            for chunk in iter(lambda: f.read(4096*1024), b""):
                hash_md5.update(chunk)
        except KeyboardInterrupt:
            answ = input("Enter Q to quit, any other input to skip actual file: ").strip()
            if answ.lower() == "q":
                quit()
            else:
                return None
    return hash_md5.hexdigest()


root = tk.Tk()
root.withdraw()

base_folder = filedialog.askdirectory(parent=root, initialdir="/", title='Please select the backups-directory')
outp_folder = filedialog.askdirectory(parent=root, initialdir="/", title='Please select the output-directory')

output_folder_name = "000_merged"
base_subfolders = []
mctr = 0
sctr = 0

output_folder = os.path.join(outp_folder, output_folder_name)

if base_folder is not None and outp_folder is not None:
    # Check if output-folder exists and create it if not
    if not os.path.isdir(output_folder):
        os.mkdir(output_folder)

    # Get folders in Basedir
    for d in os.listdir(base_folder):
        d_path = os.path.join(base_folder, d)
        if os.path.isdir(d_path) and not output_folder_name in d_path and not "Latest" in d_path:
            base_subfolders.append(d_path)

    base_subfolders.sort(reverse=True)

    # iterate over folders in reverse sorted order
    for base_subfolder in base_subfolders:
        for subdir, dirs, files in os.walk(base_subfolder):
            for filename in files:
                fpath = os.path.join(subdir, filename)
                target_fpath = fpath
                for d in base_subfolders:
                    target_fpath = target_fpath.replace(d, output_folder)

                # Check if target file exist
                if os.path.isfile(target_fpath):
                    # Check if source file exist
                    if not os.path.isfile(fpath):
                        continue
                    
                    # Check md5 sum of files
                    try:
                        md5_source = md5(fpath)
                        md5_target = md5(target_fpath)
                    except (FileNotFoundError, OSError, PermissionError) as e:
                        # If an error occure set MD5-sums to equal values to skip that file
                        md5_source = "x"
                        md5_target = "x"

                    if md5_source != md5_target and md5_source != None and md5_target != None:
                        # Rename targetfile
                        time_stamp = str(time.time()).encode("utf-8")
                        tmp = os.path.basename(target_fpath)
                        tmp = list(os.path.splitext(tmp))
                        tmp[0] = tmp[0] + "_" + str(hashlib.md5(time_stamp).hexdigest())
                        new_filename = "".join(tmp)

                        # Create new path and log the filename
                        target_fpath = os.path.join(os.path.dirname(target_fpath), new_filename)
                        with open(os.path.join(outp_folder, "different_files.log"), "a", encoding="utf-8") as logfile:
                            logfile.write(target_fpath + "\n")
                        
                    else:
                        sctr += 1
                        print("SKIPPING:  " + fpath)
                        continue

                # Create folder-structure in the output-filder
                os.makedirs(os.path.dirname(target_fpath), exist_ok = True)

                # copy file
                mctr += 1
                try:
                    shutil.copy(fpath, target_fpath)
                    print("COPYING:   " + target_fpath)
                except FileNotFoundError:
                    print("FNF-ERROR: " + target_fpath)
                except OSError:
                    print("OS-ERROR:  " + target_fpath)
                except PermissionError:
                    print("PERM-ERROR:" + target_fpath)

             
print()                    
print("UNIQ FILES:    " + str(mctr))
print("SKIPPED FILES: " + str(sctr))

Download

System requrements:

Mac OS X