Add compare command
This commit is contained in:
parent
1de9e963aa
commit
b4a8cc88cb
50
elodie.py
50
elodie.py
|
@ -21,6 +21,7 @@ from elodie import log
|
|||
from elodie.compatability import _decode
|
||||
from elodie.config import load_config
|
||||
from elodie.filesystem import FileSystem
|
||||
from elodie.gui import CompareImageApp
|
||||
from elodie.localstorage import Db
|
||||
from elodie.media.media import Media, get_all_subclasses
|
||||
from elodie.media.audio import Audio
|
||||
|
@ -474,11 +475,60 @@ def _update(album, location, time, title, paths, debug):
|
|||
sys.exit(1)
|
||||
|
||||
|
||||
@click.command('compare')
|
||||
@click.option('--debug', default=False, is_flag=True,
|
||||
help='Override the value in constants.py with True.')
|
||||
@click.option('--dry-run', default=False, is_flag=True,
|
||||
help='Dry run only, no change made to the filesystem.')
|
||||
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
|
||||
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
|
||||
dir')
|
||||
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
|
||||
@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\
|
||||
compare')
|
||||
@click.option('--similar-to', '-s', default=False, help='Similar to given\
|
||||
image')
|
||||
@click.option('--similarity', '-S', default=80, help='Similarity level for\
|
||||
images')
|
||||
@click.option('--verbose', '-v', default=False, is_flag=True,
|
||||
help='True if you want to see details of file processing')
|
||||
@click.argument('path', nargs=1, required=True)
|
||||
def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates,
|
||||
revert_compare, similar_to, similarity, verbose, path):
|
||||
'''Compare files in directories'''
|
||||
|
||||
logger = logging.getLogger('elodie')
|
||||
if debug:
|
||||
logger.setLevel(logging.DEBUG)
|
||||
elif verbose:
|
||||
logger.setLevel(logging.INFO)
|
||||
else:
|
||||
logger.setLevel(logging.ERROR)
|
||||
|
||||
# Initialize Db
|
||||
db = Db(path)
|
||||
|
||||
filesystem = FileSystem(mode='move', dry_run=dry_run, logger=logger)
|
||||
|
||||
if revert_compare:
|
||||
summary, has_errors = filesystem.revert_compare(path, db, dry_run)
|
||||
else:
|
||||
summary, has_errors = filesystem.sort_similar_images(path, db,
|
||||
similarity, dry_run)
|
||||
|
||||
if verbose or debug:
|
||||
summary.write()
|
||||
|
||||
if has_errors:
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
pass
|
||||
|
||||
|
||||
main.add_command(_compare)
|
||||
main.add_command(_import)
|
||||
main.add_command(_sort)
|
||||
main.add_command(_update)
|
||||
|
|
|
@ -21,6 +21,7 @@ from elodie import constants
|
|||
|
||||
from elodie.localstorage import Db
|
||||
from elodie.media.media import get_media_class, get_all_subclasses
|
||||
from elodie.media.photo import CompareImages
|
||||
from elodie.plugins.plugins import Plugins
|
||||
from elodie.summary import Summary
|
||||
|
||||
|
@ -893,6 +894,149 @@ class FileSystem(object):
|
|||
return self.summary, has_errors
|
||||
|
||||
|
||||
def check_path(self, path):
|
||||
path = os.path.abspath(os.path.expanduser(path))
|
||||
|
||||
# some error checking
|
||||
if not os.path.exists(path):
|
||||
self.logger.error(f'Directory {path} does not exist')
|
||||
sys.exit(1)
|
||||
|
||||
return path
|
||||
|
||||
|
||||
def set_hash(self, result, src_path, dest_path, checksum, db):
|
||||
if result:
|
||||
# Check if file remain the same
|
||||
result = self.checkcomp(dest_path, checksum)
|
||||
has_errors = False
|
||||
if result:
|
||||
if not self.dry_run:
|
||||
db.add_hash(checksum, dest_path)
|
||||
db.update_hash_db()
|
||||
|
||||
if dest_path:
|
||||
self.logger.info(f'{src_path} -> {dest_path}')
|
||||
|
||||
self.summary.append((src_path, dest_path))
|
||||
|
||||
else:
|
||||
self.logger.error(f'Files {src_path} and {dest_path} are not identical')
|
||||
# sys.exit(1)
|
||||
self.summary.append((src_path, False))
|
||||
has_errors = True
|
||||
else:
|
||||
self.summary.append((src_path, False))
|
||||
has_errors = True
|
||||
|
||||
return has_errors
|
||||
|
||||
|
||||
def move_file(self, img_path, dest_path, checksum, db):
|
||||
if not self.dry_run:
|
||||
try:
|
||||
shutil.move(img_path, dest_path)
|
||||
except OSError as error:
|
||||
self.logger.error(error)
|
||||
|
||||
self.logger.info(f'move: {img_path} -> {dest_path}')
|
||||
return self.set_hash(True, img_path, dest_path, checksum, db)
|
||||
|
||||
|
||||
def sort_similar_images(self, path, db, similarity=80):
|
||||
|
||||
has_errors = False
|
||||
path = self.check_path(path)
|
||||
for dirname, dirnames, filenames, level in self.walklevel(path, None):
|
||||
if dirname == os.path.join(path, '.elodie'):
|
||||
continue
|
||||
if dirname.find('similar_to') == 0:
|
||||
continue
|
||||
|
||||
file_paths = set()
|
||||
for filename in filenames:
|
||||
file_paths.add(os.path.join(dirname, filename))
|
||||
|
||||
ci = CompareImages(file_paths, logger=self.logger)
|
||||
|
||||
images = set([ i for i in ci.get_images() ])
|
||||
for image in images:
|
||||
if not os.path.isfile(image):
|
||||
continue
|
||||
checksum1 = db.checksum(image)
|
||||
# Process files
|
||||
# media = get_media_class(src_path, False)
|
||||
# TODO compare metadata
|
||||
# if media:
|
||||
# metadata = media.get_metadata()
|
||||
similar = False
|
||||
moved_imgs = set()
|
||||
for img_path in ci.find_similar(image, similarity):
|
||||
similar = True
|
||||
checksum2 = db.checksum(img_path)
|
||||
# move image into directory
|
||||
name = os.path.splitext(os.path.basename(image))[0]
|
||||
directory_name = 'similar_to_' + name
|
||||
dest_directory = os.path.join(os.path.dirname(img_path),
|
||||
directory_name)
|
||||
dest_path = os.path.join(dest_directory, os.path.basename(img_path))
|
||||
|
||||
result = self.create_directory(dest_directory)
|
||||
# Move the simlars file into the destination directory
|
||||
if result:
|
||||
result = self.move_file(img_path, dest_path, checksum2, db)
|
||||
moved_imgs.add(img_path)
|
||||
if not result:
|
||||
has_errors = True
|
||||
else:
|
||||
has_errors = True
|
||||
|
||||
|
||||
if similar:
|
||||
dest_path = os.path.join(dest_directory,
|
||||
os.path.basename(image))
|
||||
result = self.move_file(image, dest_path, checksum1, db)
|
||||
moved_imgs.add(image)
|
||||
if not result:
|
||||
has_errors = True
|
||||
|
||||
for moved_img in moved_imgs:
|
||||
ci.file_paths.remove(moved_img)
|
||||
|
||||
return self.summary, has_errors
|
||||
|
||||
|
||||
def revert_compare(self, path, db):
|
||||
|
||||
has_errors = False
|
||||
path = self.check_path(path)
|
||||
for dirname, dirnames, filenames, level in self.walklevel(path, None):
|
||||
if dirname == os.path.join(path, '.elodie'):
|
||||
continue
|
||||
if dirname.find('similar_to') == 0:
|
||||
continue
|
||||
|
||||
for subdir in dirnames:
|
||||
if subdir.find('similar_to') == 0:
|
||||
file_names = os.listdir(os.path.abspath(os.path.join(dirname, subdir)))
|
||||
for file_name in file_names:
|
||||
# move file to initial folder
|
||||
img_path = os.path.join(dirname, subdir, file_name)
|
||||
if os.path.isdir(img_path):
|
||||
continue
|
||||
checksum = db.checksum(img_path)
|
||||
dest_path = os.path.join(dirname, os.path.basename(img_path))
|
||||
result = self.move_file(img_path, dest_path, checksum, db)
|
||||
if not result:
|
||||
has_errors = True
|
||||
# remove directory
|
||||
try:
|
||||
os.rmdir(os.path.join (dirname, subdir))
|
||||
except OSError as error:
|
||||
self.logger.error(error)
|
||||
|
||||
return self.summary, has_errors
|
||||
|
||||
def process_file(self, _file, destination, db, media, album_from_folder,
|
||||
mode, **kwargs):
|
||||
allow_duplicate = False
|
||||
|
|
|
@ -5,8 +5,12 @@ image objects (JPG, DNG, etc.).
|
|||
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
|
||||
"""
|
||||
|
||||
import imagehash
|
||||
import imghdr
|
||||
import logging
|
||||
import numpy as np
|
||||
import os
|
||||
from PIL import Image
|
||||
import time
|
||||
|
||||
from .media import Media
|
||||
|
@ -81,3 +85,88 @@ class Photo(Media):
|
|||
return False
|
||||
|
||||
return extension in self.extensions
|
||||
|
||||
|
||||
class CompareImages:
|
||||
def __init__(self, file_paths, hash_size=8, logger=logging.getLogger()):
|
||||
self.file_paths = file_paths
|
||||
self.hash_size = hash_size
|
||||
self.logger = logger
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
def get_images(self):
|
||||
'''
|
||||
:returns: img_path generator
|
||||
'''
|
||||
for img_path in self.file_paths:
|
||||
if imghdr.what(img_path) is not None:
|
||||
yield img_path
|
||||
|
||||
|
||||
def find_duplicates(self):
|
||||
"""
|
||||
Find duplicates
|
||||
"""
|
||||
|
||||
hashes = {}
|
||||
duplicates = []
|
||||
# Searching for duplicates.
|
||||
for img_path in self.get_images():
|
||||
if imghdr.what(img_path) is not None:
|
||||
with Image.open(img_path) as img:
|
||||
temp_hash = imagehash.average_hash(img, self.hash_size)
|
||||
if temp_hash in hashes:
|
||||
self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
|
||||
duplicates.append(img_path)
|
||||
else:
|
||||
hashes[temp_hash] = img_path
|
||||
|
||||
return duplicates
|
||||
|
||||
|
||||
def remove_duplicates(self, duplicates):
|
||||
for duplicate in duplicates:
|
||||
try:
|
||||
os.remove(duplicate)
|
||||
except OSError as error:
|
||||
self.logger.error(error)
|
||||
|
||||
|
||||
def remove_duplicates_interactive(self, duplicates):
|
||||
if len(duplicates) != 0:
|
||||
answer = input(f"Do you want to delete these {duplicates} images? Y/n: ")
|
||||
if(answer.strip().lower() == 'y'):
|
||||
self.remove_duplicates(duplicates)
|
||||
self.logger.info(f'{duplicate} deleted successfully!')
|
||||
else:
|
||||
self.logger.info("No duplicates found")
|
||||
|
||||
|
||||
def find_similar(self, image, similarity=80):
|
||||
'''
|
||||
Find similar images
|
||||
:returns: img_path generator
|
||||
'''
|
||||
threshold = 1 - similarity/100
|
||||
diff_limit = int(threshold*(self.hash_size**2))
|
||||
|
||||
hash1 = ''
|
||||
if imghdr.what(image) is not None:
|
||||
with Image.open(image) as img:
|
||||
hash1 = imagehash.average_hash(img, self.hash_size).hash
|
||||
|
||||
self.logger.info(f'Finding similar images to {image}')
|
||||
for img_path in self.get_images():
|
||||
if img_path == image:
|
||||
continue
|
||||
with Image.open(img_path) as img:
|
||||
hash2 = imagehash.average_hash(img, self.hash_size).hash
|
||||
|
||||
diff_images = np.count_nonzero(hash1 != hash2)
|
||||
if diff_images <= diff_limit:
|
||||
threshold_img = diff_images / (self.hash_size**2)
|
||||
similarity_img = round((1 - threshold_img) * 100)
|
||||
self.logger.info(f'{img_path} image found {similarity_img}% similar to {image}')
|
||||
yield img_path
|
||||
|
||||
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
click==6.6
|
||||
imagehash==4.2.1
|
||||
requests==2.20.0
|
||||
Send2Trash==1.3.0
|
||||
configparser==3.5.0
|
||||
|
|
Loading…
Reference in New Issue