Merging multiple RAW recoveries
12.10.2020, 18:23 - Autor: Mark B.
In case you go for multiple RAW-recovery attempts to maximize the count of useable files you end up with a few files differing from the ones recovered with another tool but the most files will be the same as in the other attempts!
So I created a tool which can merge files from multiple recovery-attempts based on the MD5 checksum of the files. To do that you slould create a folder-structure like that:
D:\DE\case12345\a
D:\DE\case12345\b
D:\DE\case12345\c
... each RAW-recovery with each tool into it's own folder and that folders collected in a case-folder.
The folder-names (
a,
b and
c in that example) can be named as you like. They will be processed alphabetically and so if you prefer to start with an specific attempt you should make sure that's the first processed folder!
When you start the tool you get an propt to select the base-folder. That would be in that example from above
D:\DE\case12345.
The tool will create a log-file with all MD5-checksums and a folder named
000_merged which will be skipped when processing. So in case you have to stop the tool and rerun it later your sorting will be fine and the tool will skip all previously processed files.
The tool also checks JPG-files for EXIF-data and seperates them based on the presence of EXIF-data to
jpg_camera and
jpg in the
000_merged folder.
Source:
import os, time, hashlib, PIL
import tkinter as tk
from tkinter import filedialog, messagebox, simpledialog
from PIL import Image
def md5(fpath):
hash_md5 = hashlib.md5()
with open(fpath, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
root = tk.Tk()
root.withdraw()
# Message with requested parameters
messagebox.showinfo(title="Ready to start", message="Pay attention to the following things: \n1) The RAW-recovery from r-Studio should be in the first parsed folder. \n2) Within the images-filder the \"JPEGs from camera\" folder should be before the folder with the normal JPEGs!")
# Select base folder
base_folder = filedialog.askdirectory(parent=root, initialdir="/", title='Please select the base-directory')
output_folder_name = "000_merged"
logfile_name = "000_merged.log"
found_types = []
md5_list = []
mctr = 0
sctr = 0
if base_folder is not None:
# Check if output-folder exists and create it if not
output_folder = os.path.join(base_folder, output_folder_name)
if not os.path.isdir(output_folder):
os.mkdir(output_folder)
# read md4 values from previous run
logfile_path = os.path.join(base_folder, logfile_name)
if os.path.isfile(logfile_path):
with open(logfile_path) as f:
for line in f:
md5_list.append(line.strip())
# Write logfile
with open(logfile_path, "a") as f:
# iterate over folders
for subdir, dirs, files in os.walk(base_folder):
for filename in files:
fpath = os.path.join(subdir, filename)
# don't process output folder
if not output_folder_name in fpath:
ftype = fpath.split(".")[-1]
ftype = ftype.lower()
ftype_folder = ftype
if ftype == "jpeg":
ftype_folder = "jpg"
# read EXIF data from JPGs
if ftype_folder == "jpg":
try:
exif = Image.open(fpath)._getexif()
try:
if str(exif[271]) != "" and str(exif[272]) != "":
ftype_folder = "jpg_camera"
ftype = "jpg_camera"
except (KeyError, TypeError) as e:
pass
except PIL.UnidentifiedImageError:
pass
# File without extention
if not "." in fpath:
ftype_folder = "unknown"
ftype = "unknown"
# Assemble folder-path for the filetype
type_folder = os.path.join(output_folder, ftype_folder)
# remember new found types and create folder
if not ftype in found_types:
found_types.append(ftype)
if not os.path.isdir(type_folder):
os.mkdir(type_folder)
# get md5 value of file
file_md5 = md5(fpath)
# prepare output
out = fpath[0:100] + " "
out += "." * (102 - len(out))
# check if file was peviously sorted
if file_md5 not in md5_list:
md5_list.append(file_md5)
f.write(file_md5 + "\n")
mctr += 1
# Check if file exsts
if os.path.isfile(os.path.join(type_folder, filename)):
filename = filename.replace("." + ftype, "_" + str(mctr) + "." + ftype)
print(out + ".. RENAMED")
# Move file
try:
tpath = os.path.join(type_folder, filename)
os.rename(fpath, tpath)
print(out + ".. ADDED")
except FileExistsError:
tpath = tpath + str(time.time()) + "." + ftype
os.rename(fpath, tpath)
print(out + ".. ADDED")
except FileNotFoundError:
print(tpath + ".. NOT FOUND")
# if was perv. sorted just skip file
else:
print(out + ".. SKIPPED")
sctr += 1
print("UNIQ FILES: " + str(mctr))
print("SKIPPED FILES: " + str(sctr))
Download
System requrements:
Windows, OSX or Linux with
- Python 3.6 or newer
- Pillow-Module 7.0 or newer
(you can install it under OSX and Linux with pip3 install pillow and under Windows with py.exe -3 -m pip install pillow)