Add compare command

This commit is contained in:
Cédric Leporcq 2021-07-26 20:50:51 +02:00
parent 1de9e963aa
commit b4a8cc88cb
4 changed files with 284 additions and 0 deletions

View File

@ -21,6 +21,7 @@ from elodie import log
from elodie.compatability import _decode from elodie.compatability import _decode
from elodie.config import load_config from elodie.config import load_config
from elodie.filesystem import FileSystem from elodie.filesystem import FileSystem
from elodie.gui import CompareImageApp
from elodie.localstorage import Db from elodie.localstorage import Db
from elodie.media.media import Media, get_all_subclasses from elodie.media.media import Media, get_all_subclasses
from elodie.media.audio import Audio from elodie.media.audio import Audio
@ -474,11 +475,60 @@ def _update(album, location, time, title, paths, debug):
sys.exit(1) sys.exit(1)
@click.command('compare')
@click.option('--debug', default=False, is_flag=True,
help='Override the value in constants.py with True.')
@click.option('--dry-run', default=False, is_flag=True,
help='Dry run only, no change made to the filesystem.')
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
dir')
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\
compare')
@click.option('--similar-to', '-s', default=False, help='Similar to given\
image')
@click.option('--similarity', '-S', default=80, help='Similarity level for\
images')
@click.option('--verbose', '-v', default=False, is_flag=True,
help='True if you want to see details of file processing')
@click.argument('path', nargs=1, required=True)
def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates,
revert_compare, similar_to, similarity, verbose, path):
'''Compare files in directories'''
logger = logging.getLogger('elodie')
if debug:
logger.setLevel(logging.DEBUG)
elif verbose:
logger.setLevel(logging.INFO)
else:
logger.setLevel(logging.ERROR)
# Initialize Db
db = Db(path)
filesystem = FileSystem(mode='move', dry_run=dry_run, logger=logger)
if revert_compare:
summary, has_errors = filesystem.revert_compare(path, db, dry_run)
else:
summary, has_errors = filesystem.sort_similar_images(path, db,
similarity, dry_run)
if verbose or debug:
summary.write()
if has_errors:
sys.exit(1)
@click.group() @click.group()
def main(): def main():
pass pass
main.add_command(_compare)
main.add_command(_import) main.add_command(_import)
main.add_command(_sort) main.add_command(_sort)
main.add_command(_update) main.add_command(_update)

View File

@ -21,6 +21,7 @@ from elodie import constants
from elodie.localstorage import Db from elodie.localstorage import Db
from elodie.media.media import get_media_class, get_all_subclasses from elodie.media.media import get_media_class, get_all_subclasses
from elodie.media.photo import CompareImages
from elodie.plugins.plugins import Plugins from elodie.plugins.plugins import Plugins
from elodie.summary import Summary from elodie.summary import Summary
@ -893,6 +894,149 @@ class FileSystem(object):
return self.summary, has_errors return self.summary, has_errors
def check_path(self, path):
path = os.path.abspath(os.path.expanduser(path))
# some error checking
if not os.path.exists(path):
self.logger.error(f'Directory {path} does not exist')
sys.exit(1)
return path
def set_hash(self, result, src_path, dest_path, checksum, db):
if result:
# Check if file remain the same
result = self.checkcomp(dest_path, checksum)
has_errors = False
if result:
if not self.dry_run:
db.add_hash(checksum, dest_path)
db.update_hash_db()
if dest_path:
self.logger.info(f'{src_path} -> {dest_path}')
self.summary.append((src_path, dest_path))
else:
self.logger.error(f'Files {src_path} and {dest_path} are not identical')
# sys.exit(1)
self.summary.append((src_path, False))
has_errors = True
else:
self.summary.append((src_path, False))
has_errors = True
return has_errors
def move_file(self, img_path, dest_path, checksum, db):
if not self.dry_run:
try:
shutil.move(img_path, dest_path)
except OSError as error:
self.logger.error(error)
self.logger.info(f'move: {img_path} -> {dest_path}')
return self.set_hash(True, img_path, dest_path, checksum, db)
def sort_similar_images(self, path, db, similarity=80):
has_errors = False
path = self.check_path(path)
for dirname, dirnames, filenames, level in self.walklevel(path, None):
if dirname == os.path.join(path, '.elodie'):
continue
if dirname.find('similar_to') == 0:
continue
file_paths = set()
for filename in filenames:
file_paths.add(os.path.join(dirname, filename))
ci = CompareImages(file_paths, logger=self.logger)
images = set([ i for i in ci.get_images() ])
for image in images:
if not os.path.isfile(image):
continue
checksum1 = db.checksum(image)
# Process files
# media = get_media_class(src_path, False)
# TODO compare metadata
# if media:
# metadata = media.get_metadata()
similar = False
moved_imgs = set()
for img_path in ci.find_similar(image, similarity):
similar = True
checksum2 = db.checksum(img_path)
# move image into directory
name = os.path.splitext(os.path.basename(image))[0]
directory_name = 'similar_to_' + name
dest_directory = os.path.join(os.path.dirname(img_path),
directory_name)
dest_path = os.path.join(dest_directory, os.path.basename(img_path))
result = self.create_directory(dest_directory)
# Move the simlars file into the destination directory
if result:
result = self.move_file(img_path, dest_path, checksum2, db)
moved_imgs.add(img_path)
if not result:
has_errors = True
else:
has_errors = True
if similar:
dest_path = os.path.join(dest_directory,
os.path.basename(image))
result = self.move_file(image, dest_path, checksum1, db)
moved_imgs.add(image)
if not result:
has_errors = True
for moved_img in moved_imgs:
ci.file_paths.remove(moved_img)
return self.summary, has_errors
def revert_compare(self, path, db):
has_errors = False
path = self.check_path(path)
for dirname, dirnames, filenames, level in self.walklevel(path, None):
if dirname == os.path.join(path, '.elodie'):
continue
if dirname.find('similar_to') == 0:
continue
for subdir in dirnames:
if subdir.find('similar_to') == 0:
file_names = os.listdir(os.path.abspath(os.path.join(dirname, subdir)))
for file_name in file_names:
# move file to initial folder
img_path = os.path.join(dirname, subdir, file_name)
if os.path.isdir(img_path):
continue
checksum = db.checksum(img_path)
dest_path = os.path.join(dirname, os.path.basename(img_path))
result = self.move_file(img_path, dest_path, checksum, db)
if not result:
has_errors = True
# remove directory
try:
os.rmdir(os.path.join (dirname, subdir))
except OSError as error:
self.logger.error(error)
return self.summary, has_errors
def process_file(self, _file, destination, db, media, album_from_folder, def process_file(self, _file, destination, db, media, album_from_folder,
mode, **kwargs): mode, **kwargs):
allow_duplicate = False allow_duplicate = False

View File

@ -5,8 +5,12 @@ image objects (JPG, DNG, etc.).
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com> .. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
""" """
import imagehash
import imghdr import imghdr
import logging
import numpy as np
import os import os
from PIL import Image
import time import time
from .media import Media from .media import Media
@ -81,3 +85,88 @@ class Photo(Media):
return False return False
return extension in self.extensions return extension in self.extensions
class CompareImages:
def __init__(self, file_paths, hash_size=8, logger=logging.getLogger()):
self.file_paths = file_paths
self.hash_size = hash_size
self.logger = logger
logger.setLevel(logging.INFO)
def get_images(self):
'''
:returns: img_path generator
'''
for img_path in self.file_paths:
if imghdr.what(img_path) is not None:
yield img_path
def find_duplicates(self):
"""
Find duplicates
"""
hashes = {}
duplicates = []
# Searching for duplicates.
for img_path in self.get_images():
if imghdr.what(img_path) is not None:
with Image.open(img_path) as img:
temp_hash = imagehash.average_hash(img, self.hash_size)
if temp_hash in hashes:
self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
duplicates.append(img_path)
else:
hashes[temp_hash] = img_path
return duplicates
def remove_duplicates(self, duplicates):
for duplicate in duplicates:
try:
os.remove(duplicate)
except OSError as error:
self.logger.error(error)
def remove_duplicates_interactive(self, duplicates):
if len(duplicates) != 0:
answer = input(f"Do you want to delete these {duplicates} images? Y/n: ")
if(answer.strip().lower() == 'y'):
self.remove_duplicates(duplicates)
self.logger.info(f'{duplicate} deleted successfully!')
else:
self.logger.info("No duplicates found")
def find_similar(self, image, similarity=80):
'''
Find similar images
:returns: img_path generator
'''
threshold = 1 - similarity/100
diff_limit = int(threshold*(self.hash_size**2))
hash1 = ''
if imghdr.what(image) is not None:
with Image.open(image) as img:
hash1 = imagehash.average_hash(img, self.hash_size).hash
self.logger.info(f'Finding similar images to {image}')
for img_path in self.get_images():
if img_path == image:
continue
with Image.open(img_path) as img:
hash2 = imagehash.average_hash(img, self.hash_size).hash
diff_images = np.count_nonzero(hash1 != hash2)
if diff_images <= diff_limit:
threshold_img = diff_images / (self.hash_size**2)
similarity_img = round((1 - threshold_img) * 100)
self.logger.info(f'{img_path} image found {similarity_img}% similar to {image}')
yield img_path

View File

@ -1,4 +1,5 @@
click==6.6 click==6.6
imagehash==4.2.1
requests==2.20.0 requests==2.20.0
Send2Trash==1.3.0 Send2Trash==1.3.0
configparser==3.5.0 configparser==3.5.0