diff --git a/elodie.py b/elodie.py index fa5c815..d14a196 100755 --- a/elodie.py +++ b/elodie.py @@ -21,6 +21,7 @@ from elodie import log from elodie.compatability import _decode from elodie.config import load_config from elodie.filesystem import FileSystem +from elodie.gui import CompareImageApp from elodie.localstorage import Db from elodie.media.media import Media, get_all_subclasses from elodie.media.audio import Audio @@ -474,11 +475,60 @@ def _update(album, location, time, title, paths, debug): sys.exit(1) +@click.command('compare') +@click.option('--debug', default=False, is_flag=True, + help='Override the value in constants.py with True.') +@click.option('--dry-run', default=False, is_flag=True, + help='Dry run only, no change made to the filesystem.') +@click.option('--find-duplicates', '-f', default=False, is_flag=True) +@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\ + dir') +@click.option('--remove-duplicates', '-r', default=False, is_flag=True) +@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\ + compare') +@click.option('--similar-to', '-s', default=False, help='Similar to given\ + image') +@click.option('--similarity', '-S', default=80, help='Similarity level for\ + images') +@click.option('--verbose', '-v', default=False, is_flag=True, + help='True if you want to see details of file processing') +@click.argument('path', nargs=1, required=True) +def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates, + revert_compare, similar_to, similarity, verbose, path): + '''Compare files in directories''' + + logger = logging.getLogger('elodie') + if debug: + logger.setLevel(logging.DEBUG) + elif verbose: + logger.setLevel(logging.INFO) + else: + logger.setLevel(logging.ERROR) + + # Initialize Db + db = Db(path) + + filesystem = FileSystem(mode='move', dry_run=dry_run, logger=logger) + + if revert_compare: + summary, has_errors = filesystem.revert_compare(path, db, dry_run) + else: + summary, has_errors = filesystem.sort_similar_images(path, db, + similarity, dry_run) + + if verbose or debug: + summary.write() + + if has_errors: + sys.exit(1) + + @click.group() def main(): pass +main.add_command(_compare) main.add_command(_import) main.add_command(_sort) main.add_command(_update) diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 17df748..7f5044a 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -21,6 +21,7 @@ from elodie import constants from elodie.localstorage import Db from elodie.media.media import get_media_class, get_all_subclasses +from elodie.media.photo import CompareImages from elodie.plugins.plugins import Plugins from elodie.summary import Summary @@ -893,6 +894,149 @@ class FileSystem(object): return self.summary, has_errors + def check_path(self, path): + path = os.path.abspath(os.path.expanduser(path)) + + # some error checking + if not os.path.exists(path): + self.logger.error(f'Directory {path} does not exist') + sys.exit(1) + + return path + + + def set_hash(self, result, src_path, dest_path, checksum, db): + if result: + # Check if file remain the same + result = self.checkcomp(dest_path, checksum) + has_errors = False + if result: + if not self.dry_run: + db.add_hash(checksum, dest_path) + db.update_hash_db() + + if dest_path: + self.logger.info(f'{src_path} -> {dest_path}') + + self.summary.append((src_path, dest_path)) + + else: + self.logger.error(f'Files {src_path} and {dest_path} are not identical') + # sys.exit(1) + self.summary.append((src_path, False)) + has_errors = True + else: + self.summary.append((src_path, False)) + has_errors = True + + return has_errors + + + def move_file(self, img_path, dest_path, checksum, db): + if not self.dry_run: + try: + shutil.move(img_path, dest_path) + except OSError as error: + self.logger.error(error) + + self.logger.info(f'move: {img_path} -> {dest_path}') + return self.set_hash(True, img_path, dest_path, checksum, db) + + + def sort_similar_images(self, path, db, similarity=80): + + has_errors = False + path = self.check_path(path) + for dirname, dirnames, filenames, level in self.walklevel(path, None): + if dirname == os.path.join(path, '.elodie'): + continue + if dirname.find('similar_to') == 0: + continue + + file_paths = set() + for filename in filenames: + file_paths.add(os.path.join(dirname, filename)) + + ci = CompareImages(file_paths, logger=self.logger) + + images = set([ i for i in ci.get_images() ]) + for image in images: + if not os.path.isfile(image): + continue + checksum1 = db.checksum(image) + # Process files + # media = get_media_class(src_path, False) + # TODO compare metadata + # if media: + # metadata = media.get_metadata() + similar = False + moved_imgs = set() + for img_path in ci.find_similar(image, similarity): + similar = True + checksum2 = db.checksum(img_path) + # move image into directory + name = os.path.splitext(os.path.basename(image))[0] + directory_name = 'similar_to_' + name + dest_directory = os.path.join(os.path.dirname(img_path), + directory_name) + dest_path = os.path.join(dest_directory, os.path.basename(img_path)) + + result = self.create_directory(dest_directory) + # Move the simlars file into the destination directory + if result: + result = self.move_file(img_path, dest_path, checksum2, db) + moved_imgs.add(img_path) + if not result: + has_errors = True + else: + has_errors = True + + + if similar: + dest_path = os.path.join(dest_directory, + os.path.basename(image)) + result = self.move_file(image, dest_path, checksum1, db) + moved_imgs.add(image) + if not result: + has_errors = True + + for moved_img in moved_imgs: + ci.file_paths.remove(moved_img) + + return self.summary, has_errors + + + def revert_compare(self, path, db): + + has_errors = False + path = self.check_path(path) + for dirname, dirnames, filenames, level in self.walklevel(path, None): + if dirname == os.path.join(path, '.elodie'): + continue + if dirname.find('similar_to') == 0: + continue + + for subdir in dirnames: + if subdir.find('similar_to') == 0: + file_names = os.listdir(os.path.abspath(os.path.join(dirname, subdir))) + for file_name in file_names: + # move file to initial folder + img_path = os.path.join(dirname, subdir, file_name) + if os.path.isdir(img_path): + continue + checksum = db.checksum(img_path) + dest_path = os.path.join(dirname, os.path.basename(img_path)) + result = self.move_file(img_path, dest_path, checksum, db) + if not result: + has_errors = True + # remove directory + try: + os.rmdir(os.path.join (dirname, subdir)) + except OSError as error: + self.logger.error(error) + + return self.summary, has_errors + def process_file(self, _file, destination, db, media, album_from_folder, mode, **kwargs): allow_duplicate = False diff --git a/elodie/media/photo.py b/elodie/media/photo.py index 7475230..a4bb3b0 100644 --- a/elodie/media/photo.py +++ b/elodie/media/photo.py @@ -5,8 +5,12 @@ image objects (JPG, DNG, etc.). .. moduleauthor:: Jaisen Mathai """ +import imagehash import imghdr +import logging +import numpy as np import os +from PIL import Image import time from .media import Media @@ -81,3 +85,88 @@ class Photo(Media): return False return extension in self.extensions + + +class CompareImages: + def __init__(self, file_paths, hash_size=8, logger=logging.getLogger()): + self.file_paths = file_paths + self.hash_size = hash_size + self.logger = logger + logger.setLevel(logging.INFO) + + def get_images(self): + ''' + :returns: img_path generator + ''' + for img_path in self.file_paths: + if imghdr.what(img_path) is not None: + yield img_path + + + def find_duplicates(self): + """ + Find duplicates + """ + + hashes = {} + duplicates = [] + # Searching for duplicates. + for img_path in self.get_images(): + if imghdr.what(img_path) is not None: + with Image.open(img_path) as img: + temp_hash = imagehash.average_hash(img, self.hash_size) + if temp_hash in hashes: + self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash])) + duplicates.append(img_path) + else: + hashes[temp_hash] = img_path + + return duplicates + + + def remove_duplicates(self, duplicates): + for duplicate in duplicates: + try: + os.remove(duplicate) + except OSError as error: + self.logger.error(error) + + + def remove_duplicates_interactive(self, duplicates): + if len(duplicates) != 0: + answer = input(f"Do you want to delete these {duplicates} images? Y/n: ") + if(answer.strip().lower() == 'y'): + self.remove_duplicates(duplicates) + self.logger.info(f'{duplicate} deleted successfully!') + else: + self.logger.info("No duplicates found") + + + def find_similar(self, image, similarity=80): + ''' + Find similar images + :returns: img_path generator + ''' + threshold = 1 - similarity/100 + diff_limit = int(threshold*(self.hash_size**2)) + + hash1 = '' + if imghdr.what(image) is not None: + with Image.open(image) as img: + hash1 = imagehash.average_hash(img, self.hash_size).hash + + self.logger.info(f'Finding similar images to {image}') + for img_path in self.get_images(): + if img_path == image: + continue + with Image.open(img_path) as img: + hash2 = imagehash.average_hash(img, self.hash_size).hash + + diff_images = np.count_nonzero(hash1 != hash2) + if diff_images <= diff_limit: + threshold_img = diff_images / (self.hash_size**2) + similarity_img = round((1 - threshold_img) * 100) + self.logger.info(f'{img_path} image found {similarity_img}% similar to {image}') + yield img_path + + diff --git a/requirements.txt b/requirements.txt index a828291..6eae61c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ click==6.6 +imagehash==4.2.1 requests==2.20.0 Send2Trash==1.3.0 configparser==3.5.0