Add compare command
This commit is contained in:
		
							parent
							
								
									1de9e963aa
								
							
						
					
					
						commit
						b4a8cc88cb
					
				
							
								
								
									
										50
									
								
								elodie.py
									
									
									
									
									
								
							
							
						
						
									
										50
									
								
								elodie.py
									
									
									
									
									
								
							@ -21,6 +21,7 @@ from elodie import log
 | 
			
		||||
from elodie.compatability import _decode
 | 
			
		||||
from elodie.config import load_config
 | 
			
		||||
from elodie.filesystem import FileSystem
 | 
			
		||||
from elodie.gui import CompareImageApp
 | 
			
		||||
from elodie.localstorage import Db
 | 
			
		||||
from elodie.media.media import Media, get_all_subclasses
 | 
			
		||||
from elodie.media.audio import Audio
 | 
			
		||||
@ -474,11 +475,60 @@ def _update(album, location, time, title, paths, debug):
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@click.command('compare')
 | 
			
		||||
@click.option('--debug', default=False, is_flag=True,
 | 
			
		||||
              help='Override the value in constants.py with True.')
 | 
			
		||||
@click.option('--dry-run', default=False, is_flag=True,
 | 
			
		||||
              help='Dry run only, no change made to the filesystem.')
 | 
			
		||||
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
 | 
			
		||||
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
 | 
			
		||||
        dir')
 | 
			
		||||
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
 | 
			
		||||
@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\
 | 
			
		||||
        compare')
 | 
			
		||||
@click.option('--similar-to', '-s', default=False, help='Similar to given\
 | 
			
		||||
        image')
 | 
			
		||||
@click.option('--similarity', '-S', default=80, help='Similarity level for\
 | 
			
		||||
        images')
 | 
			
		||||
@click.option('--verbose', '-v', default=False, is_flag=True,
 | 
			
		||||
              help='True if you want to see details of file processing')
 | 
			
		||||
@click.argument('path', nargs=1, required=True)
 | 
			
		||||
def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates,
 | 
			
		||||
        revert_compare, similar_to, similarity, verbose, path):
 | 
			
		||||
    '''Compare files in directories'''
 | 
			
		||||
 | 
			
		||||
    logger = logging.getLogger('elodie')
 | 
			
		||||
    if debug:
 | 
			
		||||
        logger.setLevel(logging.DEBUG)
 | 
			
		||||
    elif verbose:
 | 
			
		||||
        logger.setLevel(logging.INFO)
 | 
			
		||||
    else:
 | 
			
		||||
        logger.setLevel(logging.ERROR)
 | 
			
		||||
 | 
			
		||||
    # Initialize Db
 | 
			
		||||
    db = Db(path)
 | 
			
		||||
 | 
			
		||||
    filesystem = FileSystem(mode='move', dry_run=dry_run, logger=logger)
 | 
			
		||||
 | 
			
		||||
    if revert_compare:
 | 
			
		||||
        summary, has_errors = filesystem.revert_compare(path, db, dry_run)
 | 
			
		||||
    else:
 | 
			
		||||
        summary, has_errors = filesystem.sort_similar_images(path, db,
 | 
			
		||||
                similarity, dry_run)
 | 
			
		||||
 | 
			
		||||
    if verbose or debug:
 | 
			
		||||
        summary.write()
 | 
			
		||||
 | 
			
		||||
    if has_errors:
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@click.group()
 | 
			
		||||
def main():
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
main.add_command(_compare)
 | 
			
		||||
main.add_command(_import)
 | 
			
		||||
main.add_command(_sort)
 | 
			
		||||
main.add_command(_update)
 | 
			
		||||
 | 
			
		||||
@ -21,6 +21,7 @@ from elodie import constants
 | 
			
		||||
 | 
			
		||||
from elodie.localstorage import Db
 | 
			
		||||
from elodie.media.media import get_media_class, get_all_subclasses
 | 
			
		||||
from elodie.media.photo import CompareImages
 | 
			
		||||
from elodie.plugins.plugins import Plugins
 | 
			
		||||
from elodie.summary import Summary
 | 
			
		||||
 | 
			
		||||
@ -893,6 +894,149 @@ class FileSystem(object):
 | 
			
		||||
            return self.summary, has_errors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def check_path(self, path):
 | 
			
		||||
        path = os.path.abspath(os.path.expanduser(path))
 | 
			
		||||
 | 
			
		||||
        # some error checking
 | 
			
		||||
        if not os.path.exists(path):
 | 
			
		||||
            self.logger.error(f'Directory {path} does not exist')
 | 
			
		||||
            sys.exit(1)
 | 
			
		||||
 | 
			
		||||
        return path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def set_hash(self, result, src_path, dest_path, checksum, db):
 | 
			
		||||
        if result:
 | 
			
		||||
            # Check if file remain the same
 | 
			
		||||
            result = self.checkcomp(dest_path, checksum)
 | 
			
		||||
            has_errors = False
 | 
			
		||||
            if result:
 | 
			
		||||
                if not self.dry_run:
 | 
			
		||||
                    db.add_hash(checksum, dest_path)
 | 
			
		||||
                    db.update_hash_db()
 | 
			
		||||
 | 
			
		||||
                if dest_path:
 | 
			
		||||
                    self.logger.info(f'{src_path} -> {dest_path}')
 | 
			
		||||
 | 
			
		||||
                self.summary.append((src_path, dest_path))
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                self.logger.error(f'Files {src_path} and {dest_path} are not identical')
 | 
			
		||||
                # sys.exit(1)
 | 
			
		||||
                self.summary.append((src_path, False))
 | 
			
		||||
                has_errors = True
 | 
			
		||||
        else:
 | 
			
		||||
            self.summary.append((src_path, False))
 | 
			
		||||
            has_errors = True
 | 
			
		||||
 | 
			
		||||
        return has_errors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def move_file(self, img_path, dest_path, checksum, db):
 | 
			
		||||
        if not self.dry_run:
 | 
			
		||||
            try:
 | 
			
		||||
                shutil.move(img_path, dest_path)
 | 
			
		||||
            except OSError as error:
 | 
			
		||||
                self.logger.error(error)
 | 
			
		||||
 | 
			
		||||
        self.logger.info(f'move: {img_path} -> {dest_path}')
 | 
			
		||||
        return self.set_hash(True, img_path, dest_path, checksum, db)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def sort_similar_images(self, path, db, similarity=80):
 | 
			
		||||
 | 
			
		||||
        has_errors = False
 | 
			
		||||
        path = self.check_path(path)
 | 
			
		||||
        for dirname, dirnames, filenames, level in self.walklevel(path, None):
 | 
			
		||||
            if dirname == os.path.join(path, '.elodie'):
 | 
			
		||||
                continue
 | 
			
		||||
            if dirname.find('similar_to') == 0:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            file_paths = set()
 | 
			
		||||
            for filename in filenames:
 | 
			
		||||
                file_paths.add(os.path.join(dirname, filename))
 | 
			
		||||
 | 
			
		||||
            ci = CompareImages(file_paths, logger=self.logger)
 | 
			
		||||
 | 
			
		||||
            images = set([ i for i in ci.get_images() ])
 | 
			
		||||
            for image in images:
 | 
			
		||||
                if not os.path.isfile(image):
 | 
			
		||||
                    continue
 | 
			
		||||
                checksum1 = db.checksum(image)
 | 
			
		||||
                # Process files
 | 
			
		||||
                # media = get_media_class(src_path, False)
 | 
			
		||||
                # TODO compare metadata
 | 
			
		||||
                # if media:
 | 
			
		||||
                #     metadata = media.get_metadata()
 | 
			
		||||
                similar = False
 | 
			
		||||
                moved_imgs = set()
 | 
			
		||||
                for img_path in ci.find_similar(image, similarity):
 | 
			
		||||
                    similar = True
 | 
			
		||||
                    checksum2 = db.checksum(img_path)
 | 
			
		||||
                    # move image into directory
 | 
			
		||||
                    name = os.path.splitext(os.path.basename(image))[0]
 | 
			
		||||
                    directory_name = 'similar_to_' + name
 | 
			
		||||
                    dest_directory = os.path.join(os.path.dirname(img_path),
 | 
			
		||||
                            directory_name)
 | 
			
		||||
                    dest_path = os.path.join(dest_directory, os.path.basename(img_path))
 | 
			
		||||
 | 
			
		||||
                    result = self.create_directory(dest_directory)
 | 
			
		||||
                    # Move the simlars file into the destination directory
 | 
			
		||||
                    if result:
 | 
			
		||||
                        result = self.move_file(img_path, dest_path, checksum2, db)
 | 
			
		||||
                        moved_imgs.add(img_path)
 | 
			
		||||
                        if not result:
 | 
			
		||||
                            has_errors = True
 | 
			
		||||
                    else:
 | 
			
		||||
                        has_errors = True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                if similar:
 | 
			
		||||
                    dest_path = os.path.join(dest_directory,
 | 
			
		||||
                            os.path.basename(image))
 | 
			
		||||
                    result = self.move_file(image, dest_path, checksum1, db)
 | 
			
		||||
                    moved_imgs.add(image)
 | 
			
		||||
                    if not result:
 | 
			
		||||
                        has_errors = True
 | 
			
		||||
 | 
			
		||||
                for moved_img in moved_imgs:
 | 
			
		||||
                    ci.file_paths.remove(moved_img)
 | 
			
		||||
 | 
			
		||||
        return self.summary, has_errors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def revert_compare(self, path, db):
 | 
			
		||||
 | 
			
		||||
        has_errors = False
 | 
			
		||||
        path = self.check_path(path)
 | 
			
		||||
        for dirname, dirnames, filenames, level in self.walklevel(path, None):
 | 
			
		||||
            if dirname == os.path.join(path, '.elodie'):
 | 
			
		||||
                continue
 | 
			
		||||
            if dirname.find('similar_to') == 0:
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            for subdir in dirnames:
 | 
			
		||||
                if subdir.find('similar_to') == 0:
 | 
			
		||||
                    file_names = os.listdir(os.path.abspath(os.path.join(dirname, subdir)))
 | 
			
		||||
                    for file_name in file_names:
 | 
			
		||||
                        # move file to initial folder
 | 
			
		||||
                        img_path = os.path.join(dirname, subdir, file_name)
 | 
			
		||||
                        if os.path.isdir(img_path):
 | 
			
		||||
                            continue
 | 
			
		||||
                        checksum = db.checksum(img_path)
 | 
			
		||||
                        dest_path = os.path.join(dirname, os.path.basename(img_path))
 | 
			
		||||
                        result = self.move_file(img_path, dest_path, checksum, db)
 | 
			
		||||
                        if not result:
 | 
			
		||||
                            has_errors = True
 | 
			
		||||
                    # remove directory
 | 
			
		||||
                    try:
 | 
			
		||||
                        os.rmdir(os.path.join (dirname, subdir))
 | 
			
		||||
                    except OSError as error:
 | 
			
		||||
                        self.logger.error(error)
 | 
			
		||||
 | 
			
		||||
        return self.summary, has_errors
 | 
			
		||||
 | 
			
		||||
    def process_file(self, _file, destination, db, media, album_from_folder,
 | 
			
		||||
            mode, **kwargs):
 | 
			
		||||
        allow_duplicate = False
 | 
			
		||||
 | 
			
		||||
@ -5,8 +5,12 @@ image objects (JPG, DNG, etc.).
 | 
			
		||||
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import imagehash
 | 
			
		||||
import imghdr
 | 
			
		||||
import logging
 | 
			
		||||
import numpy as np
 | 
			
		||||
import os
 | 
			
		||||
from PIL import Image
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from .media import Media
 | 
			
		||||
@ -81,3 +85,88 @@ class Photo(Media):
 | 
			
		||||
                        return False
 | 
			
		||||
 | 
			
		||||
        return extension in self.extensions
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CompareImages:
 | 
			
		||||
    def __init__(self, file_paths, hash_size=8, logger=logging.getLogger()):
 | 
			
		||||
        self.file_paths = file_paths
 | 
			
		||||
        self.hash_size = hash_size
 | 
			
		||||
        self.logger = logger
 | 
			
		||||
        logger.setLevel(logging.INFO)
 | 
			
		||||
 | 
			
		||||
    def get_images(self):
 | 
			
		||||
        '''
 | 
			
		||||
        :returns: img_path generator
 | 
			
		||||
        '''
 | 
			
		||||
        for img_path in self.file_paths:
 | 
			
		||||
            if imghdr.what(img_path) is not None:
 | 
			
		||||
                yield img_path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def find_duplicates(self):
 | 
			
		||||
        """
 | 
			
		||||
        Find duplicates
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        hashes = {}
 | 
			
		||||
        duplicates = []
 | 
			
		||||
        # Searching for duplicates.
 | 
			
		||||
        for img_path in self.get_images():
 | 
			
		||||
            if imghdr.what(img_path) is not None:
 | 
			
		||||
                with Image.open(img_path) as img:
 | 
			
		||||
                    temp_hash = imagehash.average_hash(img, self.hash_size)
 | 
			
		||||
                    if temp_hash in hashes:
 | 
			
		||||
                        self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
 | 
			
		||||
                        duplicates.append(img_path)
 | 
			
		||||
                    else:
 | 
			
		||||
                        hashes[temp_hash] = img_path
 | 
			
		||||
 | 
			
		||||
        return duplicates
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def remove_duplicates(self, duplicates):
 | 
			
		||||
        for duplicate in duplicates:
 | 
			
		||||
            try:
 | 
			
		||||
                os.remove(duplicate)
 | 
			
		||||
            except OSError as error:
 | 
			
		||||
                self.logger.error(error)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def remove_duplicates_interactive(self, duplicates):
 | 
			
		||||
        if len(duplicates) != 0:
 | 
			
		||||
            answer = input(f"Do you want to delete these {duplicates} images? Y/n: ")
 | 
			
		||||
            if(answer.strip().lower() == 'y'):
 | 
			
		||||
                self.remove_duplicates(duplicates)
 | 
			
		||||
                self.logger.info(f'{duplicate} deleted successfully!')
 | 
			
		||||
        else:
 | 
			
		||||
            self.logger.info("No duplicates found")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def find_similar(self, image, similarity=80):
 | 
			
		||||
        '''
 | 
			
		||||
        Find similar images
 | 
			
		||||
        :returns: img_path generator
 | 
			
		||||
        '''
 | 
			
		||||
        threshold = 1 - similarity/100
 | 
			
		||||
        diff_limit = int(threshold*(self.hash_size**2))
 | 
			
		||||
 | 
			
		||||
        hash1 = ''
 | 
			
		||||
        if imghdr.what(image) is not None:
 | 
			
		||||
            with Image.open(image) as img:
 | 
			
		||||
                hash1 = imagehash.average_hash(img, self.hash_size).hash
 | 
			
		||||
 | 
			
		||||
        self.logger.info(f'Finding similar images to {image}')
 | 
			
		||||
        for img_path in self.get_images():
 | 
			
		||||
            if img_path == image:
 | 
			
		||||
                continue
 | 
			
		||||
            with Image.open(img_path) as img:
 | 
			
		||||
                hash2 = imagehash.average_hash(img, self.hash_size).hash
 | 
			
		||||
 | 
			
		||||
                diff_images = np.count_nonzero(hash1 != hash2)
 | 
			
		||||
                if diff_images <= diff_limit:
 | 
			
		||||
                    threshold_img = diff_images / (self.hash_size**2)
 | 
			
		||||
                    similarity_img = round((1 - threshold_img) * 100)
 | 
			
		||||
                    self.logger.info(f'{img_path} image found {similarity_img}% similar to {image}')
 | 
			
		||||
                    yield img_path
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
click==6.6
 | 
			
		||||
imagehash==4.2.1
 | 
			
		||||
requests==2.20.0
 | 
			
		||||
Send2Trash==1.3.0
 | 
			
		||||
configparser==3.5.0
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user