Add compare command

This commit is contained in:
Cédric Leporcq 2021-07-26 20:50:51 +02:00
parent 1de9e963aa
commit b4a8cc88cb
4 changed files with 284 additions and 0 deletions

View File

@ -21,6 +21,7 @@ from elodie import log
from elodie.compatability import _decode
from elodie.config import load_config
from elodie.filesystem import FileSystem
from elodie.gui import CompareImageApp
from elodie.localstorage import Db
from elodie.media.media import Media, get_all_subclasses
from elodie.media.audio import Audio
@ -474,11 +475,60 @@ def _update(album, location, time, title, paths, debug):
sys.exit(1)
@click.command('compare')
@click.option('--debug', default=False, is_flag=True,
help='Override the value in constants.py with True.')
@click.option('--dry-run', default=False, is_flag=True,
help='Dry run only, no change made to the filesystem.')
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
dir')
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\
compare')
@click.option('--similar-to', '-s', default=False, help='Similar to given\
image')
@click.option('--similarity', '-S', default=80, help='Similarity level for\
images')
@click.option('--verbose', '-v', default=False, is_flag=True,
help='True if you want to see details of file processing')
@click.argument('path', nargs=1, required=True)
def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates,
revert_compare, similar_to, similarity, verbose, path):
'''Compare files in directories'''
logger = logging.getLogger('elodie')
if debug:
logger.setLevel(logging.DEBUG)
elif verbose:
logger.setLevel(logging.INFO)
else:
logger.setLevel(logging.ERROR)
# Initialize Db
db = Db(path)
filesystem = FileSystem(mode='move', dry_run=dry_run, logger=logger)
if revert_compare:
summary, has_errors = filesystem.revert_compare(path, db, dry_run)
else:
summary, has_errors = filesystem.sort_similar_images(path, db,
similarity, dry_run)
if verbose or debug:
summary.write()
if has_errors:
sys.exit(1)
@click.group()
def main():
pass
main.add_command(_compare)
main.add_command(_import)
main.add_command(_sort)
main.add_command(_update)

View File

@ -21,6 +21,7 @@ from elodie import constants
from elodie.localstorage import Db
from elodie.media.media import get_media_class, get_all_subclasses
from elodie.media.photo import CompareImages
from elodie.plugins.plugins import Plugins
from elodie.summary import Summary
@ -893,6 +894,149 @@ class FileSystem(object):
return self.summary, has_errors
def check_path(self, path):
path = os.path.abspath(os.path.expanduser(path))
# some error checking
if not os.path.exists(path):
self.logger.error(f'Directory {path} does not exist')
sys.exit(1)
return path
def set_hash(self, result, src_path, dest_path, checksum, db):
if result:
# Check if file remain the same
result = self.checkcomp(dest_path, checksum)
has_errors = False
if result:
if not self.dry_run:
db.add_hash(checksum, dest_path)
db.update_hash_db()
if dest_path:
self.logger.info(f'{src_path} -> {dest_path}')
self.summary.append((src_path, dest_path))
else:
self.logger.error(f'Files {src_path} and {dest_path} are not identical')
# sys.exit(1)
self.summary.append((src_path, False))
has_errors = True
else:
self.summary.append((src_path, False))
has_errors = True
return has_errors
def move_file(self, img_path, dest_path, checksum, db):
if not self.dry_run:
try:
shutil.move(img_path, dest_path)
except OSError as error:
self.logger.error(error)
self.logger.info(f'move: {img_path} -> {dest_path}')
return self.set_hash(True, img_path, dest_path, checksum, db)
def sort_similar_images(self, path, db, similarity=80):
has_errors = False
path = self.check_path(path)
for dirname, dirnames, filenames, level in self.walklevel(path, None):
if dirname == os.path.join(path, '.elodie'):
continue
if dirname.find('similar_to') == 0:
continue
file_paths = set()
for filename in filenames:
file_paths.add(os.path.join(dirname, filename))
ci = CompareImages(file_paths, logger=self.logger)
images = set([ i for i in ci.get_images() ])
for image in images:
if not os.path.isfile(image):
continue
checksum1 = db.checksum(image)
# Process files
# media = get_media_class(src_path, False)
# TODO compare metadata
# if media:
# metadata = media.get_metadata()
similar = False
moved_imgs = set()
for img_path in ci.find_similar(image, similarity):
similar = True
checksum2 = db.checksum(img_path)
# move image into directory
name = os.path.splitext(os.path.basename(image))[0]
directory_name = 'similar_to_' + name
dest_directory = os.path.join(os.path.dirname(img_path),
directory_name)
dest_path = os.path.join(dest_directory, os.path.basename(img_path))
result = self.create_directory(dest_directory)
# Move the simlars file into the destination directory
if result:
result = self.move_file(img_path, dest_path, checksum2, db)
moved_imgs.add(img_path)
if not result:
has_errors = True
else:
has_errors = True
if similar:
dest_path = os.path.join(dest_directory,
os.path.basename(image))
result = self.move_file(image, dest_path, checksum1, db)
moved_imgs.add(image)
if not result:
has_errors = True
for moved_img in moved_imgs:
ci.file_paths.remove(moved_img)
return self.summary, has_errors
def revert_compare(self, path, db):
has_errors = False
path = self.check_path(path)
for dirname, dirnames, filenames, level in self.walklevel(path, None):
if dirname == os.path.join(path, '.elodie'):
continue
if dirname.find('similar_to') == 0:
continue
for subdir in dirnames:
if subdir.find('similar_to') == 0:
file_names = os.listdir(os.path.abspath(os.path.join(dirname, subdir)))
for file_name in file_names:
# move file to initial folder
img_path = os.path.join(dirname, subdir, file_name)
if os.path.isdir(img_path):
continue
checksum = db.checksum(img_path)
dest_path = os.path.join(dirname, os.path.basename(img_path))
result = self.move_file(img_path, dest_path, checksum, db)
if not result:
has_errors = True
# remove directory
try:
os.rmdir(os.path.join (dirname, subdir))
except OSError as error:
self.logger.error(error)
return self.summary, has_errors
def process_file(self, _file, destination, db, media, album_from_folder,
mode, **kwargs):
allow_duplicate = False

View File

@ -5,8 +5,12 @@ image objects (JPG, DNG, etc.).
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
"""
import imagehash
import imghdr
import logging
import numpy as np
import os
from PIL import Image
import time
from .media import Media
@ -81,3 +85,88 @@ class Photo(Media):
return False
return extension in self.extensions
class CompareImages:
def __init__(self, file_paths, hash_size=8, logger=logging.getLogger()):
self.file_paths = file_paths
self.hash_size = hash_size
self.logger = logger
logger.setLevel(logging.INFO)
def get_images(self):
'''
:returns: img_path generator
'''
for img_path in self.file_paths:
if imghdr.what(img_path) is not None:
yield img_path
def find_duplicates(self):
"""
Find duplicates
"""
hashes = {}
duplicates = []
# Searching for duplicates.
for img_path in self.get_images():
if imghdr.what(img_path) is not None:
with Image.open(img_path) as img:
temp_hash = imagehash.average_hash(img, self.hash_size)
if temp_hash in hashes:
self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
duplicates.append(img_path)
else:
hashes[temp_hash] = img_path
return duplicates
def remove_duplicates(self, duplicates):
for duplicate in duplicates:
try:
os.remove(duplicate)
except OSError as error:
self.logger.error(error)
def remove_duplicates_interactive(self, duplicates):
if len(duplicates) != 0:
answer = input(f"Do you want to delete these {duplicates} images? Y/n: ")
if(answer.strip().lower() == 'y'):
self.remove_duplicates(duplicates)
self.logger.info(f'{duplicate} deleted successfully!')
else:
self.logger.info("No duplicates found")
def find_similar(self, image, similarity=80):
'''
Find similar images
:returns: img_path generator
'''
threshold = 1 - similarity/100
diff_limit = int(threshold*(self.hash_size**2))
hash1 = ''
if imghdr.what(image) is not None:
with Image.open(image) as img:
hash1 = imagehash.average_hash(img, self.hash_size).hash
self.logger.info(f'Finding similar images to {image}')
for img_path in self.get_images():
if img_path == image:
continue
with Image.open(img_path) as img:
hash2 = imagehash.average_hash(img, self.hash_size).hash
diff_images = np.count_nonzero(hash1 != hash2)
if diff_images <= diff_limit:
threshold_img = diff_images / (self.hash_size**2)
similarity_img = round((1 - threshold_img) * 100)
self.logger.info(f'{img_path} image found {similarity_img}% similar to {image}')
yield img_path

View File

@ -1,4 +1,5 @@
click==6.6
imagehash==4.2.1
requests==2.20.0
Send2Trash==1.3.0
configparser==3.5.0