Add compare command
This commit is contained in:
parent
1de9e963aa
commit
b4a8cc88cb
50
elodie.py
50
elodie.py
|
@ -21,6 +21,7 @@ from elodie import log
|
||||||
from elodie.compatability import _decode
|
from elodie.compatability import _decode
|
||||||
from elodie.config import load_config
|
from elodie.config import load_config
|
||||||
from elodie.filesystem import FileSystem
|
from elodie.filesystem import FileSystem
|
||||||
|
from elodie.gui import CompareImageApp
|
||||||
from elodie.localstorage import Db
|
from elodie.localstorage import Db
|
||||||
from elodie.media.media import Media, get_all_subclasses
|
from elodie.media.media import Media, get_all_subclasses
|
||||||
from elodie.media.audio import Audio
|
from elodie.media.audio import Audio
|
||||||
|
@ -474,11 +475,60 @@ def _update(album, location, time, title, paths, debug):
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@click.command('compare')
|
||||||
|
@click.option('--debug', default=False, is_flag=True,
|
||||||
|
help='Override the value in constants.py with True.')
|
||||||
|
@click.option('--dry-run', default=False, is_flag=True,
|
||||||
|
help='Dry run only, no change made to the filesystem.')
|
||||||
|
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
|
||||||
|
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
|
||||||
|
dir')
|
||||||
|
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
|
||||||
|
@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\
|
||||||
|
compare')
|
||||||
|
@click.option('--similar-to', '-s', default=False, help='Similar to given\
|
||||||
|
image')
|
||||||
|
@click.option('--similarity', '-S', default=80, help='Similarity level for\
|
||||||
|
images')
|
||||||
|
@click.option('--verbose', '-v', default=False, is_flag=True,
|
||||||
|
help='True if you want to see details of file processing')
|
||||||
|
@click.argument('path', nargs=1, required=True)
|
||||||
|
def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates,
|
||||||
|
revert_compare, similar_to, similarity, verbose, path):
|
||||||
|
'''Compare files in directories'''
|
||||||
|
|
||||||
|
logger = logging.getLogger('elodie')
|
||||||
|
if debug:
|
||||||
|
logger.setLevel(logging.DEBUG)
|
||||||
|
elif verbose:
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
else:
|
||||||
|
logger.setLevel(logging.ERROR)
|
||||||
|
|
||||||
|
# Initialize Db
|
||||||
|
db = Db(path)
|
||||||
|
|
||||||
|
filesystem = FileSystem(mode='move', dry_run=dry_run, logger=logger)
|
||||||
|
|
||||||
|
if revert_compare:
|
||||||
|
summary, has_errors = filesystem.revert_compare(path, db, dry_run)
|
||||||
|
else:
|
||||||
|
summary, has_errors = filesystem.sort_similar_images(path, db,
|
||||||
|
similarity, dry_run)
|
||||||
|
|
||||||
|
if verbose or debug:
|
||||||
|
summary.write()
|
||||||
|
|
||||||
|
if has_errors:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
@click.group()
|
@click.group()
|
||||||
def main():
|
def main():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
main.add_command(_compare)
|
||||||
main.add_command(_import)
|
main.add_command(_import)
|
||||||
main.add_command(_sort)
|
main.add_command(_sort)
|
||||||
main.add_command(_update)
|
main.add_command(_update)
|
||||||
|
|
|
@ -21,6 +21,7 @@ from elodie import constants
|
||||||
|
|
||||||
from elodie.localstorage import Db
|
from elodie.localstorage import Db
|
||||||
from elodie.media.media import get_media_class, get_all_subclasses
|
from elodie.media.media import get_media_class, get_all_subclasses
|
||||||
|
from elodie.media.photo import CompareImages
|
||||||
from elodie.plugins.plugins import Plugins
|
from elodie.plugins.plugins import Plugins
|
||||||
from elodie.summary import Summary
|
from elodie.summary import Summary
|
||||||
|
|
||||||
|
@ -893,6 +894,149 @@ class FileSystem(object):
|
||||||
return self.summary, has_errors
|
return self.summary, has_errors
|
||||||
|
|
||||||
|
|
||||||
|
def check_path(self, path):
|
||||||
|
path = os.path.abspath(os.path.expanduser(path))
|
||||||
|
|
||||||
|
# some error checking
|
||||||
|
if not os.path.exists(path):
|
||||||
|
self.logger.error(f'Directory {path} does not exist')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
|
def set_hash(self, result, src_path, dest_path, checksum, db):
|
||||||
|
if result:
|
||||||
|
# Check if file remain the same
|
||||||
|
result = self.checkcomp(dest_path, checksum)
|
||||||
|
has_errors = False
|
||||||
|
if result:
|
||||||
|
if not self.dry_run:
|
||||||
|
db.add_hash(checksum, dest_path)
|
||||||
|
db.update_hash_db()
|
||||||
|
|
||||||
|
if dest_path:
|
||||||
|
self.logger.info(f'{src_path} -> {dest_path}')
|
||||||
|
|
||||||
|
self.summary.append((src_path, dest_path))
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.logger.error(f'Files {src_path} and {dest_path} are not identical')
|
||||||
|
# sys.exit(1)
|
||||||
|
self.summary.append((src_path, False))
|
||||||
|
has_errors = True
|
||||||
|
else:
|
||||||
|
self.summary.append((src_path, False))
|
||||||
|
has_errors = True
|
||||||
|
|
||||||
|
return has_errors
|
||||||
|
|
||||||
|
|
||||||
|
def move_file(self, img_path, dest_path, checksum, db):
|
||||||
|
if not self.dry_run:
|
||||||
|
try:
|
||||||
|
shutil.move(img_path, dest_path)
|
||||||
|
except OSError as error:
|
||||||
|
self.logger.error(error)
|
||||||
|
|
||||||
|
self.logger.info(f'move: {img_path} -> {dest_path}')
|
||||||
|
return self.set_hash(True, img_path, dest_path, checksum, db)
|
||||||
|
|
||||||
|
|
||||||
|
def sort_similar_images(self, path, db, similarity=80):
|
||||||
|
|
||||||
|
has_errors = False
|
||||||
|
path = self.check_path(path)
|
||||||
|
for dirname, dirnames, filenames, level in self.walklevel(path, None):
|
||||||
|
if dirname == os.path.join(path, '.elodie'):
|
||||||
|
continue
|
||||||
|
if dirname.find('similar_to') == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
file_paths = set()
|
||||||
|
for filename in filenames:
|
||||||
|
file_paths.add(os.path.join(dirname, filename))
|
||||||
|
|
||||||
|
ci = CompareImages(file_paths, logger=self.logger)
|
||||||
|
|
||||||
|
images = set([ i for i in ci.get_images() ])
|
||||||
|
for image in images:
|
||||||
|
if not os.path.isfile(image):
|
||||||
|
continue
|
||||||
|
checksum1 = db.checksum(image)
|
||||||
|
# Process files
|
||||||
|
# media = get_media_class(src_path, False)
|
||||||
|
# TODO compare metadata
|
||||||
|
# if media:
|
||||||
|
# metadata = media.get_metadata()
|
||||||
|
similar = False
|
||||||
|
moved_imgs = set()
|
||||||
|
for img_path in ci.find_similar(image, similarity):
|
||||||
|
similar = True
|
||||||
|
checksum2 = db.checksum(img_path)
|
||||||
|
# move image into directory
|
||||||
|
name = os.path.splitext(os.path.basename(image))[0]
|
||||||
|
directory_name = 'similar_to_' + name
|
||||||
|
dest_directory = os.path.join(os.path.dirname(img_path),
|
||||||
|
directory_name)
|
||||||
|
dest_path = os.path.join(dest_directory, os.path.basename(img_path))
|
||||||
|
|
||||||
|
result = self.create_directory(dest_directory)
|
||||||
|
# Move the simlars file into the destination directory
|
||||||
|
if result:
|
||||||
|
result = self.move_file(img_path, dest_path, checksum2, db)
|
||||||
|
moved_imgs.add(img_path)
|
||||||
|
if not result:
|
||||||
|
has_errors = True
|
||||||
|
else:
|
||||||
|
has_errors = True
|
||||||
|
|
||||||
|
|
||||||
|
if similar:
|
||||||
|
dest_path = os.path.join(dest_directory,
|
||||||
|
os.path.basename(image))
|
||||||
|
result = self.move_file(image, dest_path, checksum1, db)
|
||||||
|
moved_imgs.add(image)
|
||||||
|
if not result:
|
||||||
|
has_errors = True
|
||||||
|
|
||||||
|
for moved_img in moved_imgs:
|
||||||
|
ci.file_paths.remove(moved_img)
|
||||||
|
|
||||||
|
return self.summary, has_errors
|
||||||
|
|
||||||
|
|
||||||
|
def revert_compare(self, path, db):
|
||||||
|
|
||||||
|
has_errors = False
|
||||||
|
path = self.check_path(path)
|
||||||
|
for dirname, dirnames, filenames, level in self.walklevel(path, None):
|
||||||
|
if dirname == os.path.join(path, '.elodie'):
|
||||||
|
continue
|
||||||
|
if dirname.find('similar_to') == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for subdir in dirnames:
|
||||||
|
if subdir.find('similar_to') == 0:
|
||||||
|
file_names = os.listdir(os.path.abspath(os.path.join(dirname, subdir)))
|
||||||
|
for file_name in file_names:
|
||||||
|
# move file to initial folder
|
||||||
|
img_path = os.path.join(dirname, subdir, file_name)
|
||||||
|
if os.path.isdir(img_path):
|
||||||
|
continue
|
||||||
|
checksum = db.checksum(img_path)
|
||||||
|
dest_path = os.path.join(dirname, os.path.basename(img_path))
|
||||||
|
result = self.move_file(img_path, dest_path, checksum, db)
|
||||||
|
if not result:
|
||||||
|
has_errors = True
|
||||||
|
# remove directory
|
||||||
|
try:
|
||||||
|
os.rmdir(os.path.join (dirname, subdir))
|
||||||
|
except OSError as error:
|
||||||
|
self.logger.error(error)
|
||||||
|
|
||||||
|
return self.summary, has_errors
|
||||||
|
|
||||||
def process_file(self, _file, destination, db, media, album_from_folder,
|
def process_file(self, _file, destination, db, media, album_from_folder,
|
||||||
mode, **kwargs):
|
mode, **kwargs):
|
||||||
allow_duplicate = False
|
allow_duplicate = False
|
||||||
|
|
|
@ -5,8 +5,12 @@ image objects (JPG, DNG, etc.).
|
||||||
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
|
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import imagehash
|
||||||
import imghdr
|
import imghdr
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
import os
|
import os
|
||||||
|
from PIL import Image
|
||||||
import time
|
import time
|
||||||
|
|
||||||
from .media import Media
|
from .media import Media
|
||||||
|
@ -81,3 +85,88 @@ class Photo(Media):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return extension in self.extensions
|
return extension in self.extensions
|
||||||
|
|
||||||
|
|
||||||
|
class CompareImages:
|
||||||
|
def __init__(self, file_paths, hash_size=8, logger=logging.getLogger()):
|
||||||
|
self.file_paths = file_paths
|
||||||
|
self.hash_size = hash_size
|
||||||
|
self.logger = logger
|
||||||
|
logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
def get_images(self):
|
||||||
|
'''
|
||||||
|
:returns: img_path generator
|
||||||
|
'''
|
||||||
|
for img_path in self.file_paths:
|
||||||
|
if imghdr.what(img_path) is not None:
|
||||||
|
yield img_path
|
||||||
|
|
||||||
|
|
||||||
|
def find_duplicates(self):
|
||||||
|
"""
|
||||||
|
Find duplicates
|
||||||
|
"""
|
||||||
|
|
||||||
|
hashes = {}
|
||||||
|
duplicates = []
|
||||||
|
# Searching for duplicates.
|
||||||
|
for img_path in self.get_images():
|
||||||
|
if imghdr.what(img_path) is not None:
|
||||||
|
with Image.open(img_path) as img:
|
||||||
|
temp_hash = imagehash.average_hash(img, self.hash_size)
|
||||||
|
if temp_hash in hashes:
|
||||||
|
self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
|
||||||
|
duplicates.append(img_path)
|
||||||
|
else:
|
||||||
|
hashes[temp_hash] = img_path
|
||||||
|
|
||||||
|
return duplicates
|
||||||
|
|
||||||
|
|
||||||
|
def remove_duplicates(self, duplicates):
|
||||||
|
for duplicate in duplicates:
|
||||||
|
try:
|
||||||
|
os.remove(duplicate)
|
||||||
|
except OSError as error:
|
||||||
|
self.logger.error(error)
|
||||||
|
|
||||||
|
|
||||||
|
def remove_duplicates_interactive(self, duplicates):
|
||||||
|
if len(duplicates) != 0:
|
||||||
|
answer = input(f"Do you want to delete these {duplicates} images? Y/n: ")
|
||||||
|
if(answer.strip().lower() == 'y'):
|
||||||
|
self.remove_duplicates(duplicates)
|
||||||
|
self.logger.info(f'{duplicate} deleted successfully!')
|
||||||
|
else:
|
||||||
|
self.logger.info("No duplicates found")
|
||||||
|
|
||||||
|
|
||||||
|
def find_similar(self, image, similarity=80):
|
||||||
|
'''
|
||||||
|
Find similar images
|
||||||
|
:returns: img_path generator
|
||||||
|
'''
|
||||||
|
threshold = 1 - similarity/100
|
||||||
|
diff_limit = int(threshold*(self.hash_size**2))
|
||||||
|
|
||||||
|
hash1 = ''
|
||||||
|
if imghdr.what(image) is not None:
|
||||||
|
with Image.open(image) as img:
|
||||||
|
hash1 = imagehash.average_hash(img, self.hash_size).hash
|
||||||
|
|
||||||
|
self.logger.info(f'Finding similar images to {image}')
|
||||||
|
for img_path in self.get_images():
|
||||||
|
if img_path == image:
|
||||||
|
continue
|
||||||
|
with Image.open(img_path) as img:
|
||||||
|
hash2 = imagehash.average_hash(img, self.hash_size).hash
|
||||||
|
|
||||||
|
diff_images = np.count_nonzero(hash1 != hash2)
|
||||||
|
if diff_images <= diff_limit:
|
||||||
|
threshold_img = diff_images / (self.hash_size**2)
|
||||||
|
similarity_img = round((1 - threshold_img) * 100)
|
||||||
|
self.logger.info(f'{img_path} image found {similarity_img}% similar to {image}')
|
||||||
|
yield img_path
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
click==6.6
|
click==6.6
|
||||||
|
imagehash==4.2.1
|
||||||
requests==2.20.0
|
requests==2.20.0
|
||||||
Send2Trash==1.3.0
|
Send2Trash==1.3.0
|
||||||
configparser==3.5.0
|
configparser==3.5.0
|
||||||
|
|
Loading…
Reference in New Issue