Add Glob option for filtering path

This commit is contained in:
Cédric Leporcq 2021-09-18 22:06:34 +02:00
parent 6af9d5d879
commit 63b154b8f3
9 changed files with 381 additions and 237 deletions

View File

@ -9,8 +9,8 @@ dirs_path={%Y}/{%m-%b}-{city}-{folder}
name={%Y%m%d-%H%M%S}-%u{original_name}.%l{ext} name={%Y%m%d-%H%M%S}-%u{original_name}.%l{ext}
[Exclusions] [Exclusions]
name1=.directory path1=**/.directory
name2=.DS_Store path2=**/.DS_Store
[Geolocation] [Geolocation]
geocoder=Nominatim geocoder=Nominatim

181
ordigi.py
View File

@ -3,7 +3,6 @@
import os import os
import re import re
import sys import sys
from datetime import datetime
import click import click
@ -16,17 +15,56 @@ from ordigi.media import Media, get_all_subclasses
from ordigi.summary import Summary from ordigi.summary import Summary
_logger_options = [
click.option('--debug', default=False, is_flag=True,
help='Override the value in constants.py with True.'),
click.option('--verbose', '-v', default=False, is_flag=True,
help='True if you want to see details of file processing')
]
_dry_run_options = [
click.option('--dry-run', default=False, is_flag=True,
help='Dry run only, no change made to the filesystem.')
]
_filter_option = [
click.option('--exclude', '-e', default=set(), multiple=True,
help='Directories or files to exclude.'),
click.option('--filter-by-ext', '-f', default=set(), multiple=True,
help="""Use filename
extension to filter files for sorting. If value is '*', use
common media file extension for filtering. Ignored files remain in
the same directory structure""" ),
click.option('--glob', '-g', default='**/*',
help='Glob file selection')
]
def print_help(command): def print_help(command):
click.echo(command.get_help(click.Context(sort))) click.echo(command.get_help(click.Context(sort)))
def add_options(options):
def _add_options(func):
for option in reversed(options):
func = option(func)
return func
return _add_options
def _get_exclude(opt, exclude):
# if no exclude list was passed in we check if there's a config
if len(exclude) == 0:
exclude = opt['exclude']
return set(exclude)
@click.command('sort') @click.command('sort')
@add_options(_logger_options)
@add_options(_dry_run_options)
@add_options(_filter_option)
@click.option('--album-from-folder', default=False, is_flag=True, @click.option('--album-from-folder', default=False, is_flag=True,
help="Use images' folders as their album names.") help="Use images' folders as their album names.")
@click.option('--debug', default=False, is_flag=True,
help='Override the value in constants.py with True.')
@click.option('--dry-run', default=False, is_flag=True,
help='Dry run only, no change made to the filesystem.')
@click.option('--destination', '-d', type=click.Path(file_okay=False), @click.option('--destination', '-d', type=click.Path(file_okay=False),
default=None, help='Sort files into this directory.') default=None, help='Sort files into this directory.')
@click.option('--clean', '-C', default=False, is_flag=True, @click.option('--clean', '-C', default=False, is_flag=True,
@ -34,16 +72,10 @@ def print_help(command):
@click.option('--copy', '-c', default=False, is_flag=True, @click.option('--copy', '-c', default=False, is_flag=True,
help='True if you want files to be copied over from src_dir to\ help='True if you want files to be copied over from src_dir to\
dest_dir rather than moved') dest_dir rather than moved')
@click.option('--exclude-regex', '-e', default=set(), multiple=True, @click.option('--ignore-tags', '-I', default=set(), multiple=True,
help='Regular expression for directories or files to exclude.')
@click.option('--filter-by-ext', '-f', default=set(), multiple=True, help='''Use filename
extension to filter files for sorting. If value is '*', use
common media file extension for filtering. Ignored files remain in
the same directory structure''' )
@click.option('--ignore-tags', '-i', default=set(), multiple=True,
help='Specific tags or group that will be ignored when\ help='Specific tags or group that will be ignored when\
searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' ) searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' )
@click.option('--interactive', default=False, is_flag=True, @click.option('--interactive', '-i', default=False, is_flag=True,
help="Interactive mode") help="Interactive mode")
@click.option('--max-deep', '-m', default=None, @click.option('--max-deep', '-m', default=None,
help='Maximum level to proceed. Number from 0 to desired level.') help='Maximum level to proceed. Number from 0 to desired level.')
@ -52,28 +84,31 @@ def print_help(command):
and a file hash') and a file hash')
@click.option('--reset-cache', '-r', default=False, is_flag=True, @click.option('--reset-cache', '-r', default=False, is_flag=True,
help='Regenerate the hash.json and location.json database ') help='Regenerate the hash.json and location.json database ')
@click.option('--verbose', '-v', default=False, is_flag=True,
help='True if you want to see details of file processing')
@click.argument('paths', required=True, nargs=-1, type=click.Path()) @click.argument('paths', required=True, nargs=-1, type=click.Path())
def _sort(album_from_folder, debug, dry_run, destination, clean, copy, def sort(**kwargs):
exclude_regex, interactive, filter_by_ext, ignore_tags,
max_deep, remove_duplicates, reset_cache, verbose, paths):
"""Sort files or directories by reading their EXIF and organizing them """Sort files or directories by reading their EXIF and organizing them
according to ordigi.conf preferences. according to ordigi.conf preferences.
""" """
if copy: debug = kwargs['debug']
destination = kwargs['destination']
verbose = kwargs['verbose']
paths = kwargs['paths']
if kwargs['copy']:
mode = 'copy' mode = 'copy'
else: else:
mode = 'move' mode = 'move'
logger = log.get_logger(verbose, debug) logger = log.get_logger(verbose, debug)
max_deep = kwargs['max_deep']
if max_deep is not None: if max_deep is not None:
max_deep = int(max_deep) max_deep = int(max_deep)
cache = True cache = True
if reset_cache: if kwargs['reset_cache']:
cache = False cache = False
if len(paths) > 1: if len(paths) > 1:
@ -89,28 +124,25 @@ def _sort(album_from_folder, debug, dry_run, destination, clean, copy,
sys.exit(1) sys.exit(1)
paths = set(paths) paths = set(paths)
filter_by_ext = set(filter_by_ext)
config = Config(constants.CONFIG_FILE) config = Config(constants.CONFIG_FILE)
opt = config.get_options() opt = config.get_options()
# if no exclude list was passed in we check if there's a config exclude = _get_exclude(opt, kwargs['exclude'])
if len(exclude_regex) == 0: filter_by_ext = set(kwargs['filter_by_ext'])
exclude_regex = opt['exclude_regex']
exclude_regex_list = set(exclude_regex)
collection = Collection(destination, opt['path_format'], collection = Collection(destination, opt['path_format'],
album_from_folder, cache, opt['day_begins'], dry_run, kwargs['album_from_folder'], cache, opt['day_begins'], kwargs['dry_run'],
exclude_regex_list, filter_by_ext, interactive, exclude, filter_by_ext, kwargs['glob'], kwargs['interactive'],
logger, max_deep, mode) logger, max_deep, mode)
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout']) opt['timeout'])
summary, has_errors = collection.sort_files(paths, loc, summary, has_errors = collection.sort_files(paths, loc,
remove_duplicates, ignore_tags) kwargs['remove_duplicates'], kwargs['ignore_tags'])
if clean: if kwargs['clean']:
remove_empty_folders(destination, logger) remove_empty_folders(destination, logger)
if verbose or debug: if verbose or debug:
@ -141,12 +173,11 @@ def remove_empty_folders(path, logger, remove_root=True):
@click.command('clean') @click.command('clean')
@click.option('--debug', default=False, is_flag=True, @add_options(_logger_options)
help='Override the value in constants.py with True.') @add_options(_dry_run_options)
@add_options(_filter_option)
@click.option('--dedup-regex', '-d', default=set(), multiple=True, @click.option('--dedup-regex', '-d', default=set(), multiple=True,
help='Regex to match duplicate strings parts') help='Regex to match duplicate strings parts')
@click.option('--dry-run', default=False, is_flag=True,
help='Dry run only, no change made to the filesystem.')
@click.option('--folders', '-f', default=False, is_flag=True, @click.option('--folders', '-f', default=False, is_flag=True,
help='Remove empty folders') help='Remove empty folders')
@click.option('--max-deep', '-m', default=None, @click.option('--max-deep', '-m', default=None,
@ -158,15 +189,20 @@ def remove_empty_folders(path, logger, remove_root=True):
and a file hash') and a file hash')
@click.option('--root', '-r', type=click.Path(file_okay=False), @click.option('--root', '-r', type=click.Path(file_okay=False),
default=None, help='Root dir of media collection. If not set, use path') default=None, help='Root dir of media collection. If not set, use path')
@click.option('--verbose', '-v', default=False,
help='True if you want to see details of file processing')
@click.argument('path', required=True, nargs=1, type=click.Path()) @click.argument('path', required=True, nargs=1, type=click.Path())
def _clean(debug, dedup_regex, dry_run, folders, max_deep, path_string, remove_duplicates, root, verbose, path): def clean(**kwargs):
"""Remove empty folders """Remove empty folders
Usage: clean [--verbose|--debug] directory [removeRoot]""" Usage: clean [--verbose|--debug] directory [removeRoot]"""
logger = log.get_logger(verbose, debug) debug = kwargs['debug']
dry_run = kwargs['dry_run']
folders = kwargs['folders']
root = kwargs['root']
verbose = kwargs['verbose']
path = kwargs['path']
logger = log.get_logger(verbose, debug)
clean_all = False clean_all = False
if not folders: if not folders:
clean_all = True clean_all = True
@ -176,10 +212,15 @@ def _clean(debug, dedup_regex, dry_run, folders, max_deep, path_string, remove_d
config = Config(constants.CONFIG_FILE) config = Config(constants.CONFIG_FILE)
opt = config.get_options() opt = config.get_options()
if path_string: exclude = _get_exclude(opt, kwargs['exclude'])
collection = Collection(root, opt['path_format'], dry_run=dry_run, logger=logger, max_deep=max_deep, mode='move') filter_by_ext = set(kwargs['filter_by_ext'])
dedup_regex = list(dedup_regex)
summary, has_errors = collection.dedup_regex(path, dedup_regex, logger, remove_duplicates) if kwargs['path_string']:
collection = Collection(root, opt['path_format'], dry_run=dry_run,
exclude=exclude, filter_by_ext=filter_by_ext, glob=kwargs['glob'],
logger=logger, max_deep=kwargs['max_deep'], mode='move')
dedup_regex = list(kwargs['dedup_regex'])
summary, has_errors = collection.dedup_regex(path, dedup_regex, logger, kwargs['remove_duplicates'])
if clean_all or folders: if clean_all or folders:
remove_empty_folders(path, logger) remove_empty_folders(path, logger)
@ -192,11 +233,10 @@ def _clean(debug, dedup_regex, dry_run, folders, max_deep, path_string, remove_d
@click.command('generate-db') @click.command('generate-db')
@add_options(_logger_options)
@click.option('--path', type=click.Path(file_okay=False), @click.option('--path', type=click.Path(file_okay=False),
required=True, help='Path of your photo library.') required=True, help='Path of your photo library.')
@click.option('--debug', default=False, is_flag=True, def generate_db(**kwargs):
help='Override the value in constants.py with True.')
def _generate_db(path, debug):
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files. """Regenerate the hash.json database which contains all of the sha256 signatures of media files.
""" """
# TODO # TODO
@ -204,21 +244,19 @@ def _generate_db(path, debug):
@click.command('verify') @click.command('verify')
@add_options(_logger_options)
@click.option('--path', type=click.Path(file_okay=False), @click.option('--path', type=click.Path(file_okay=False),
required=True, help='Path of your photo library.') required=True, help='Path of your photo library.')
@click.option('--debug', default=False, is_flag=True, def verify(**kwargs):
help='Override the value in constants.py with True.')
def _verify(path, debug):
"""Verify hashes""" """Verify hashes"""
# TODO # TODO
pass pass
@click.command('compare') @click.command('compare')
@click.option('--debug', default=False, is_flag=True, @add_options(_logger_options)
help='Override the value in constants.py with True.') @add_options(_dry_run_options)
@click.option('--dry-run', default=False, is_flag=True, @add_options(_filter_option)
help='Dry run only, no change made to the filesystem.')
@click.option('--find-duplicates', '-f', default=False, is_flag=True) @click.option('--find-duplicates', '-f', default=False, is_flag=True)
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\ @click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
dir') dir')
@ -231,27 +269,35 @@ def _verify(path, debug):
image') image')
@click.option('--similarity', '-S', default=80, help='Similarity level for\ @click.option('--similarity', '-S', default=80, help='Similarity level for\
images') images')
@click.option('--verbose', '-v', default=False, is_flag=True,
help='True if you want to see details of file processing')
@click.argument('path', nargs=1, required=True) @click.argument('path', nargs=1, required=True)
def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates, def compare(**kwargs):
revert_compare, root, similar_to, similarity, verbose, path):
'''Compare files in directories''' '''Compare files in directories'''
logger = log.get_logger(verbose, debug) debug = kwargs['debug']
dry_run = kwargs['dry_run']
root = kwargs['root']
verbose = kwargs['verbose']
path = kwargs['path']
logger = log.get_logger(verbose, debug)
if not root: if not root:
root = path root = kwargs['path']
config = Config(constants.CONFIG_FILE) config = Config(constants.CONFIG_FILE)
opt = config.get_options() opt = config.get_options()
collection = Collection(root, None, mode='move', dry_run=dry_run, logger=logger) exclude = _get_exclude(opt, kwargs['exclude'])
filter_by_ext = set(kwargs['filter_by_ext'])
if revert_compare: collection = Collection(root, None, exclude=exclude,
summary, has_errors = collection.revert_compare(path, dry_run) filter_by_ext=filter_by_ext, glob=kwargs['glob'],
mode='move', dry_run=dry_run, logger=logger)
if kwargs['revert_compare']:
summary, has_errors = collection.revertcompare(path, dry_run)
else: else:
summary, has_errors = collection.sort_similar_images(path, similarity) summary, has_errors = collection.sort_similar_images(path, kwargs['similarity'])
if verbose or debug: if verbose or debug:
summary.write() summary.write()
@ -261,16 +307,17 @@ def _compare(debug, dry_run, find_duplicates, output_dir, remove_duplicates,
@click.group() @click.group()
def main(): def main(**kwargs):
pass pass
main.add_command(_clean) main.add_command(clean)
main.add_command(_compare) main.add_command(compare)
main.add_command(_sort) main.add_command(sort)
main.add_command(_generate_db) main.add_command(generate_db)
main.add_command(_verify) main.add_command(verify)
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -4,10 +4,11 @@ General file system methods.
from builtins import object from builtins import object
import filecmp import filecmp
from fnmatch import fnmatch
import hashlib import hashlib
import logging import logging
import os import os
from pathlib import Path from pathlib import Path, PurePath
import re import re
import sys import sys
import shutil import shutil
@ -16,7 +17,7 @@ from datetime import datetime, timedelta
from ordigi import media from ordigi import media
from ordigi.database import Sqlite from ordigi.database import Sqlite
from ordigi.media import Media, get_all_subclasses from ordigi.media import Media, get_all_subclasses
from ordigi.images import Images from ordigi.images import Image, Images
from ordigi.summary import Summary from ordigi.summary import Summary
from ordigi.utils import get_date_regex, camel2snake from ordigi.utils import get_date_regex, camel2snake
@ -25,9 +26,9 @@ class Collection(object):
"""Class of the media collection.""" """Class of the media collection."""
def __init__(self, root, path_format, album_from_folder=False, def __init__(self, root, path_format, album_from_folder=False,
cache=False, day_begins=0, dry_run=False, exclude_regex_list=set(), cache=False, day_begins=0, dry_run=False, exclude=set(),
filter_by_ext=set(), interactive=False, logger=logging.getLogger(), filter_by_ext=set(), glob='**/*', interactive=False,
max_deep=None, mode='copy'): logger=logging.getLogger(), max_deep=None, mode='copy'):
# Attributes # Attributes
self.root = Path(root).expanduser().absolute() self.root = Path(root).expanduser().absolute()
@ -43,7 +44,7 @@ class Collection(object):
self.cache = cache self.cache = cache
self.day_begins = day_begins self.day_begins = day_begins
self.dry_run = dry_run self.dry_run = dry_run
self.exclude_regex_list = exclude_regex_list self.exclude = exclude
if '%media' in filter_by_ext: if '%media' in filter_by_ext:
filter_by_ext.remove('%media') filter_by_ext.remove('%media')
@ -51,6 +52,7 @@ class Collection(object):
else: else:
self.filter_by_ext = filter_by_ext self.filter_by_ext = filter_by_ext
self.glob = glob
self.items = self.get_items() self.items = self.get_items()
self.interactive = interactive self.interactive = interactive
self.logger = logger self.logger = logger
@ -91,6 +93,47 @@ class Collection(object):
return date return date
def _get_folders(self, folders, mask):
"""
Get folders part
:params: Part, list
:returns: list
"""
n = len(folders) - 1
if not re.search(r':', mask):
a = re.compile(r'[0-9]')
match = re.search(a, mask)
if match:
# single folder example: folders[1]
i = int(match[0])
if i > n:
# i is out of range, use ''
return ['']
else:
return folders[i]
else:
# all folders example: folders
return folders
else:
# multiple folder selection: example folders[1:3]
a = re.compile(r'[0-9]:')
b = re.compile(r':[0-9]')
begin = int(re.search(a, mask)[0][0])
end = int(re.search(b, mask)[0][1])
if begin > n:
# no matched folders
return ['']
if end > n:
end = n
if begin >= end:
return ['']
else:
# select matched folders
return folders[begin:end]
def get_part(self, item, mask, metadata, subdirs): def get_part(self, item, mask, metadata, subdirs):
"""Parse a specific folder's name given a mask and metadata. """Parse a specific folder's name given a mask and metadata.
@ -123,9 +166,8 @@ class Collection(object):
part = os.path.basename(subdirs) part = os.path.basename(subdirs)
elif item == 'folders': elif item == 'folders':
folders = Path(subdirs).parts folders = subdirs.parts
folders = eval(mask) folders = self._get_folders(folders, mask)
part = os.path.join(*folders) part = os.path.join(*folders)
elif item in ('album','camera_make', 'camera_model', 'city', 'country', elif item in ('album','camera_make', 'camera_model', 'city', 'country',
@ -169,7 +211,7 @@ class Collection(object):
return this_part return this_part
def get_path(self, metadata, subdirs='', whitespace_sub='_'): def get_path(self, metadata, subdirs, whitespace_sub='_'):
"""path_format: {%Y-%d-%m}/%u{city}/{album} """path_format: {%Y-%d-%m}/%u{city}/{album}
Returns file path. Returns file path.
@ -295,28 +337,6 @@ class Collection(object):
return self.summary, has_errors return self.summary, has_errors
def should_exclude(self, path, regex_list=set()):
if(len(regex_list) == 0):
return False
return any(regex.search(path) for regex in regex_list)
def walklevel(self, src_path, maxlevel=None):
"""
Walk into input directory recursively until desired maxlevel
source: https://stackoverflow.com/questions/229186/os-walk-without-digging-into-directories-below
"""
src_path = src_path.rstrip(os.path.sep)
if not os.path.isdir(src_path):
return None
num_sep = src_path.count(os.path.sep)
for root, dirs, files in os.walk(src_path):
level = root.count(os.path.sep) - num_sep
yield root, dirs, files, level
if maxlevel is not None and level >= maxlevel:
del dirs[:]
def remove(self, file_path): def remove(self, file_path):
if not self.dry_run: if not self.dry_run:
os.remove(file_path) os.remove(file_path)
@ -421,43 +441,90 @@ class Collection(object):
return items return items
def get_files_in_path(self, path, extensions=set()): def walklevel(self, src_path, maxlevel=None):
"""
Walk into input directory recursively until desired maxlevel
source: https://stackoverflow.com/questions/229186/os-walk-without-digging-into-directories-below
"""
src_path = str(src_path)
if not os.path.isdir(src_path):
return None
num_sep = src_path.count(os.path.sep)
for root, dirs, files in os.walk(src_path):
level = root.count(os.path.sep) - num_sep
yield root, dirs, files, level
if maxlevel is not None and level >= maxlevel:
del dirs[:]
def level(self, path):
"""
:param: Path
:return: int
"""
# if isinstance(path, str):
# # To remove trailing '/' chars
# path = Path(path)
# path = str(path)
return len(path.parts) - 1
# TODO move to utils.. or CPath..
def _get_files_in_path(self, path, glob='**/*', maxlevel=None, extensions=set()):
"""Recursively get files which match a path and extension. """Recursively get files which match a path and extension.
:param str path string: Path to start recursive file listing :param str path string: Path to start recursive file listing
:param tuple(str) extensions: File extensions to include (whitelist) :param tuple(str) extensions: File extensions to include (whitelist)
:returns: file_path, subdirs :returns: Path file_path, Path subdirs
""" """
file_list = set() for path0 in path.glob(glob):
if os.path.isfile(path): if path0.is_dir():
file_list.add((path, ''))
# Create a list of compiled regular expressions to match against the file path
compiled_regex_list = [re.compile(regex) for regex in self.exclude_regex_list]
subdirs = ''
for dirname, dirnames, filenames, level in self.walklevel(path,
self.max_deep):
should_exclude_dir = self.should_exclude(dirname, compiled_regex_list)
if dirname == os.path.join(path, '.ordigi') or should_exclude_dir:
continue continue
else:
file_path = path0
parts = file_path.parts
subdirs = file_path.relative_to(path).parent
if glob == '*':
level = 0
else:
level = len(subdirs.parts)
if level > 0: if file_path.parts[0] == '.ordigi':
subdirs = os.path.join(subdirs, os.path.basename(dirname)) continue
if maxlevel is not None:
if level > maxlevel:
continue
for exclude in self.exclude:
if fnmatch(file_path, exclude):
continue
for filename in filenames:
# If file extension is in `extensions`
# And if file path is not in exclude regexes
# Then append to the list
filename_path = os.path.join(dirname, filename)
if ( if (
extensions == set() extensions == set()
or os.path.splitext(filename)[1][1:].lower() in extensions or PurePath(file_path).suffix.lower() in extensions
and not self.should_exclude(filename, compiled_regex_list)
): ):
file_list.add((filename, subdirs)) # return file_path and subdir
yield file_path
return file_list def _create_directory(self, directory_path):
"""Create a directory if it does not already exist.
:param Path: A fully qualified path of the to create.
:returns: bool
"""
try:
if directory_path.exists():
return True
else:
if not self.dry_run:
directory_path.mkdir(parents=True, exist_ok=True)
self.logger.info(f'Create {directory_path}')
return True
except OSError:
# OSError is thrown for cases like no permission
pass
return False
def create_directory(self, directory_path): def create_directory(self, directory_path):
"""Create a directory if it does not already exist. """Create a directory if it does not already exist.
@ -480,6 +547,20 @@ class Collection(object):
return False return False
def _check_path(self, path):
"""
:param: str path
:return: Path path
"""
path = Path(path).expanduser().absolute()
# some error checking
if not path.exists():
self.logger.error(f'Directory {path} does not exist')
sys.exit(1)
return path
def check_path(self, path): def check_path(self, path):
path = os.path.abspath(os.path.expanduser(path)) path = os.path.abspath(os.path.expanduser(path))
@ -500,7 +581,7 @@ class Collection(object):
def dedup_regex(self, path, dedup_regex, logger, remove_duplicates=False): def dedup_regex(self, path, dedup_regex, logger, remove_duplicates=False):
# cycle throught files # cycle throught files
has_errors = False has_errors = False
path = self.check_path(path) path = self._check_path(path)
# Delimiter regex # Delimiter regex
delim = r'[-_ .]' delim = r'[-_ .]'
# Numeric date item regex # Numeric date item regex
@ -518,11 +599,9 @@ class Collection(object):
] ]
conflict_file_list = [] conflict_file_list = []
for filename, subdirs in self.get_files_in_path(path): for src_path in self._get_files_in_path(path, glob=self.glob):
file_path = os.path.join(path, subdirs, filename)
src_checksum = self.checksum(src_path) src_checksum = self.checksum(src_path)
file_path = Path(src_path).relative_to(self.root) path_parts = src_path.relative_to(self.root).parts
path_parts = file_path.parts
dedup_path = [] dedup_path = []
for path_part in path_parts: for path_part in path_parts:
items = [] items = []
@ -536,8 +615,11 @@ class Collection(object):
dedup_path.append(''.join(filtered_items)) dedup_path.append(''.join(filtered_items))
# Dedup path # Dedup path
dest_path = os.path.join(self.root, *dedup_path) dest_path = self.root.joinpath(*dedup_path)
self.create_directory(os.path.dirname(dest_path)) self._create_directory(dest_path.parent.name)
src_path = str(src_path)
dest_path = str(dest_path)
result = self.sort_file(src_path, dest_path, remove_duplicates) result = self.sort_file(src_path, dest_path, remove_duplicates)
if result: if result:
@ -563,28 +645,29 @@ class Collection(object):
""" """
has_errors = False has_errors = False
for path in paths: for path in paths:
path = self.check_path(path) path = self._check_path(path)
conflict_file_list = [] conflict_file_list = []
for filename, subdirs in self.get_files_in_path(path, for src_path in self._get_files_in_path(path, glob=self.glob,
extensions=self.filter_by_ext): extensions=self.filter_by_ext):
src_path = os.path.join(path, subdirs, filename) subdirs = src_path.relative_to(path).parent
# Process files # Process files
src_checksum = self.checksum(src_path) src_checksum = self.checksum(src_path)
media = Media(path, subdirs, filename, self.album_from_folder, ignore_tags, media = Media(src_path, path, self.album_from_folder, ignore_tags,
self.interactive, self.logger) self.interactive, self.logger)
if media: if media:
metadata = media.get_metadata(loc, self.db, self.cache) metadata = media.get_metadata(loc, self.db, self.cache)
# Get the destination path according to metadata # Get the destination path according to metadata
file_path = self.get_path(metadata, subdirs=subdirs) file_path = Path(self.get_path(metadata, subdirs))
else: else:
# Keep same directory structure # Keep same directory structure
file_path = os.path.relpath(src_path, path) file_path = src_path.relative_to(path)
dest_directory = os.path.join(self.root, dest_directory = self.root / file_path.parent
os.path.dirname(file_path)) self._create_directory(dest_directory)
dest_path = os.path.join(self.root, file_path)
self.create_directory(dest_directory) # Convert paths to string
src_path = str(src_path)
dest_path = str(self.root / file_path)
result = self.sort_file(src_path, dest_path, remove_duplicates) result = self.sort_file(src_path, dest_path, remove_duplicates)
@ -640,65 +723,70 @@ class Collection(object):
self.logger.info(f'move: {img_path} -> {dest_path}') self.logger.info(f'move: {img_path} -> {dest_path}')
return self.set_hash(True, img_path, dest_path, checksum) return self.set_hash(True, img_path, dest_path, checksum)
def sort_similar_images(self, path, similarity=80): def _get_images(self, path):
"""
:returns: iter
"""
for src_path in self._get_files_in_path(path, glob=self.glob,
extensions=self.filter_by_ext):
dirname = src_path.parent.name
has_errors = False
path = self.check_path(path)
for dirname, dirnames, filenames, level in self.walklevel(path, None):
if dirname == os.path.join(path, '.ordigi'):
continue
if dirname.find('similar_to') == 0: if dirname.find('similar_to') == 0:
continue continue
file_paths = set() image = Image(src_path)
for filename in filenames:
file_paths.add(os.path.join(dirname, filename))
i = Images(file_paths, logger=self.logger) if image.is_image():
yield src_path
images = set([ i for i in i.get_images() ]) def sort_similar_images(self, path, similarity=80):
for image in images:
if not os.path.isfile(image):
continue
checksum1 = self.checksum(image)
# Process files
# media = Media(src_path, False, self.logger)
# TODO compare metadata
# if media:
# metadata = media.get_metadata()
similar = False
moved_imgs = set()
for img_path in i.find_similar(image, similarity):
similar = True
checksum2 = self.checksum(img_path)
# move image into directory
name = os.path.splitext(os.path.basename(image))[0]
directory_name = 'similar_to_' + name
dest_directory = os.path.join(os.path.dirname(img_path),
directory_name)
dest_path = os.path.join(dest_directory, os.path.basename(img_path))
result = self.create_directory(dest_directory) has_errors = False
# Move the simlars file into the destination directory path = self._check_path(path)
if result: img_paths = set([ x for x in self._get_images(path) ])
result = self.move_file(img_path, dest_path, checksum2) i = Images(img_paths, logger=self.logger)
moved_imgs.add(img_path) for image in img_paths:
if not result: if not os.path.isfile(image):
has_errors = True continue
else: checksum1 = self.checksum(image)
has_errors = True # Process files
# media = Media(src_path, False, self.logger)
# TODO compare metadata
# if media:
# metadata = media.get_metadata()
similar = False
moved_imgs = set()
for img_path in i.find_similar(image, similarity):
similar = True
checksum2 = self.checksum(img_path)
# move image into directory
name = os.path.splitext(os.path.basename(image))[0]
directory_name = 'similar_to_' + name
dest_directory = os.path.join(os.path.dirname(img_path),
directory_name)
dest_path = os.path.join(dest_directory, os.path.basename(img_path))
result = self.create_directory(dest_directory)
if similar: # Move the simlars file into the destination directory
dest_path = os.path.join(dest_directory, if result:
os.path.basename(image)) result = self.move_file(img_path, dest_path, checksum2)
result = self.move_file(image, dest_path, checksum1) moved_imgs.add(img_path)
moved_imgs.add(image)
if not result: if not result:
has_errors = True has_errors = True
else:
has_errors = True
# for moved_img in moved_imgs:
# os.remove(moved_img) if similar:
dest_path = os.path.join(dest_directory,
os.path.basename(image))
result = self.move_file(image, dest_path, checksum1)
moved_imgs.add(image)
if not result:
has_errors = True
# for moved_img in moved_imgs:
# os.remove(moved_img)
return self.summary, has_errors return self.summary, has_errors

View File

@ -86,7 +86,7 @@ class Config:
options['day_begins'] = 0 options['day_begins'] = 0
if 'Exclusions' in self.conf: if 'Exclusions' in self.conf:
options['exclude_regex'] = [value for key, value in self.conf.items('Exclusions')] options['exclude'] = [value for key, value in self.conf.items('Exclusions')]
return options return options

View File

@ -75,33 +75,33 @@ class Images():
#: Valid extensions for image files. #: Valid extensions for image files.
extensions = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2') extensions = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
def __init__(self, file_paths=None, hash_size=8, logger=logging.getLogger()): def __init__(self, img_paths=set(), hash_size=8, logger=logging.getLogger()):
self.file_paths = file_paths self.img_paths = img_paths
self.hash_size = hash_size
self.duplicates = [] self.duplicates = []
self.hash_size = hash_size
self.logger = logger self.logger = logger
def get_images(self): def add_images(self, file_paths):
''':returns: img_path generator ''':returns: img_path generator
''' '''
for img_path in self.file_paths: for img_path in file_paths:
image = Image(img_path) image = Image(img_path)
if image.is_image(): if image.is_image():
yield img_path self.img_paths.add(img_path)
def get_images_hashes(self): def get_images_hashes(self):
"""Get image hashes""" """Get image hashes"""
hashes = {} hashes = {}
# Searching for duplicates. # Searching for duplicates.
for img_path in self.get_images(): for img_path in self.img_paths:
with img.open(img_path) as img: with img.open(img_path) as img:
yield imagehash.average_hash(img, self.hash_size) yield imagehash.average_hash(img, self.hash_size)
def find_duplicates(self, img_path): def find_duplicates(self, img_path):
"""Find duplicates""" """Find duplicates"""
duplicates = [] duplicates = []
for temp_hash in get_images_hashes(self.file_paths): for temp_hash in get_images_hashes(self.img_paths):
if temp_hash in hashes: if temp_hash in hashes:
self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash])) self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
duplicates.append(img_path) duplicates.append(img_path)
@ -150,7 +150,7 @@ class Images():
threshold = 1 - similarity/100 threshold = 1 - similarity/100
diff_limit = int(threshold*(self.hash_size**2)) diff_limit = int(threshold*(self.hash_size**2))
for img_path in self.get_images(): for img_path in self.img_paths:
if img_path == image: if img_path == image:
continue continue
hash2 = image.get_hash() hash2 = image.get_hash()

View File

@ -30,12 +30,16 @@ class Media():
extensions = PHOTO + AUDIO + VIDEO extensions = PHOTO + AUDIO + VIDEO
def __init__(self, path, subdirs, filename, album_from_folder=False, ignore_tags=set(), def __init__(self, file_path, root, album_from_folder=False, ignore_tags=set(),
interactive=False, logger=logging.getLogger()): interactive=False, logger=logging.getLogger()):
self.path = path """
self.subdirs = subdirs :params: Path, Path, bool, set, bool, Logger
self.filename = filename """
self.file_path = os.path.join(path, subdirs, filename) self.file_path = str(file_path)
self.root = str(root)
self.subdirs = str(file_path.relative_to(root).parent)
self.folder = str(file_path.parent.name)
self.filename = str(file_path.name)
self.album_from_folder = album_from_folder self.album_from_folder = album_from_folder
self.ignore_tags = ignore_tags self.ignore_tags = ignore_tags
@ -262,14 +266,14 @@ class Media():
self.metadata[key] = formated_data self.metadata[key] = formated_data
self.metadata['src_path'] = self.path self.metadata['src_path'] = self.root
self.metadata['subdirs'] = self.subdirs self.metadata['subdirs'] = self.subdirs
self.metadata['filename'] = self.filename self.metadata['filename'] = self.filename
self.metadata['date_taken'] = self.get_date_taken() self.metadata['date_taken'] = self.get_date_taken()
if self.album_from_folder: if self.album_from_folder:
album = self.metadata['album'] album = self.metadata['album']
folder = os.path.basename(self.subdirs) folder = self.folder
if album and album != '': if album and album != '':
if self.interactive: if self.interactive:
print(f"Conflict for file: {self.file_path}") print(f"Conflict for file: {self.file_path}")
@ -351,7 +355,7 @@ class Media():
:returns: value (str) :returns: value (str)
""" """
return ExifTool(self.file_path, self.logger).setvalue(tag, value) return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
def set_date_taken(self, date_key, time): def set_date_taken(self, date_key, time):
"""Set the date/time a photo was taken. """Set the date/time a photo was taken.
@ -400,9 +404,7 @@ class Media():
:returns: bool :returns: bool
""" """
folder = os.path.basename(os.path.dirname(self.file_path)) return self.set_value('album', self.folder)
return self.set_value('album', folder)
def get_all_subclasses(cls=None): def get_all_subclasses(cls=None):

View File

@ -22,12 +22,11 @@ def reset_singletons():
@pytest.fixture(scope="session") @pytest.fixture(scope="session")
def sample_files_paths(tmpdir_factory): def sample_files_paths(tmpdir_factory):
tmp_path = tmpdir_factory.mktemp("ordigi-src-") tmp_path = Path(tmpdir_factory.mktemp("ordigi-src-"))
paths = Path(ORDIGI_PATH, 'samples/test_exif').glob('*') path = Path(ORDIGI_PATH, 'samples/test_exif')
shutil.copytree(path, tmp_path / path.name)
paths = Path(tmp_path).glob('**/*')
file_paths = [x for x in paths if x.is_file()] file_paths = [x for x in paths if x.is_file()]
for file_path in file_paths:
source_path = tmp_path.join(file_path.name)
shutil.copyfile(file_path, source_path)
return tmp_path, file_paths return tmp_path, file_paths

View File

@ -22,7 +22,7 @@ class TestCollection:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_class(cls, sample_files_paths): def setup_class(cls, sample_files_paths):
cls.src_paths, cls.file_paths = sample_files_paths cls.src_path, cls.file_paths = sample_files_paths
cls.path_format = constants.default_path + '/' + constants.default_name cls.path_format = constants.default_path + '/' + constants.default_name
def teardown_class(self): def teardown_class(self):
@ -57,9 +57,9 @@ class TestCollection:
'{%Y-%m-%b}' '{%Y-%m-%b}'
] ]
subdirs = Path('a', 'b', 'c', 'd')
for file_path in self.file_paths: for file_path in self.file_paths:
media = Media(os.path.dirname(file_path), '', os.path.basename(file_path)) media = Media(file_path, self.src_path)
subdirs = file_path.relative_to(self.src_path).parent
exif_tags = {} exif_tags = {}
for key in ('album', 'camera_make', 'camera_model', 'latitude', for key in ('album', 'camera_make', 'camera_model', 'latitude',
'longitude', 'original_name', 'title'): 'longitude', 'original_name', 'title'):
@ -83,10 +83,7 @@ class TestCollection:
elif item == 'folder': elif item == 'folder':
assert part == subdirs.name, file_path assert part == subdirs.name, file_path
elif item == 'folders': elif item == 'folders':
if platform == "win32": assert part in str(subdirs)
assert '\\' in part, file_path
else:
assert '/' in part, file_path
elif item == 'ext': elif item == 'ext':
assert part == file_path.suffix[1:], file_path assert part == file_path.suffix[1:], file_path
elif item == 'name': elif item == 'name':
@ -115,7 +112,7 @@ class TestCollection:
collection = Collection(tmp_path, self.path_format) collection = Collection(tmp_path, self.path_format)
for file_path in self.file_paths: for file_path in self.file_paths:
exif_data = ExifToolCaching(str(file_path)).asdict() exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(os.path.dirname(file_path), '', os.path.basename(file_path)) media = Media(file_path, self.src_path)
metadata = media.get_metadata() metadata = media.get_metadata()
date_taken = media.get_date_taken() date_taken = media.get_date_taken()
@ -139,22 +136,22 @@ class TestCollection:
def test_sort_files(self, tmp_path): def test_sort_files(self, tmp_path):
collection = Collection(tmp_path, self.path_format, album_from_folder=True) collection = Collection(tmp_path, self.path_format, album_from_folder=True)
loc = GeoLocation() loc = GeoLocation()
summary, has_errors = collection.sort_files([self.src_paths], loc) summary, has_errors = collection.sort_files([self.src_path], loc)
# Summary is created and there is no errors # Summary is created and there is no errors
assert summary, summary assert summary, summary
assert not has_errors, has_errors assert not has_errors, has_errors
for file_path in tmp_path.glob('*/**/*.*'): for file_path in tmp_path.glob('**/*'):
if '.db' not in str(file_path): if '.db' not in str(file_path):
media = Media(os.path.dirname(file_path), '', os.path.basename(file_path), album_from_folder=True) media = Media(file_path, tmp_path, album_from_folder=True)
media.get_exif_metadata() media.get_exif_metadata()
for value in media._get_key_values('album'): for value in media._get_key_values('album'):
assert value != '' or None assert value != '' or None
# test with populated dest dir # test with populated dest dir
randomize_files(tmp_path) randomize_files(tmp_path)
summary, has_errors = collection.sort_files([self.src_paths], loc) summary, has_errors = collection.sort_files([self.src_path], loc)
assert summary, summary assert summary, summary
assert not has_errors, has_errors assert not has_errors, has_errors
@ -165,14 +162,14 @@ class TestCollection:
loc = GeoLocation() loc = GeoLocation()
randomize_db(tmp_path) randomize_db(tmp_path)
with pytest.raises(sqlite3.DatabaseError) as e: with pytest.raises(sqlite3.DatabaseError) as e:
summary, has_errors = collection.sort_files([self.src_paths], loc) summary, has_errors = collection.sort_files([self.src_path], loc)
def test_sort_file(self, tmp_path): def test_sort_file(self, tmp_path):
for mode in 'copy', 'move': for mode in 'copy', 'move':
collection = Collection(tmp_path, self.path_format, mode=mode) collection = Collection(tmp_path, self.path_format, mode=mode)
# copy mode # copy mode
src_path = Path(self.src_paths, 'photo.png') src_path = Path(self.src_path, 'test_exif', 'photo.png')
name = 'photo_' + mode + '.png' name = 'photo_' + mode + '.png'
dest_path = Path(tmp_path, name) dest_path = Path(tmp_path, name)
src_checksum = collection.checksum(src_path) src_checksum = collection.checksum(src_path)
@ -191,6 +188,15 @@ class TestCollection:
# TODO check date # TODO check date
#- Sort similar images into a directory def test__get_files_in_path(self, tmp_path):
collection = Collection(tmp_path, self.path_format, exclude='**/*.dng')
paths = [x for x in collection._get_files_in_path(self.src_path,
maxlevel=1, glob='**/photo*')]
assert len(paths) == 6
for path in paths:
assert isinstance(path, Path)
# TODO Sort similar images into a directory
# collection.sort_similar # collection.sort_similar

View File

@ -18,14 +18,14 @@ class TestMetadata:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_class(cls, sample_files_paths): def setup_class(cls, sample_files_paths):
cls.src_paths, cls.file_paths = sample_files_paths cls.src_path, cls.file_paths = sample_files_paths
cls.ignore_tags = ('EXIF:CreateDate', 'File:FileModifyDate', cls.ignore_tags = ('EXIF:CreateDate', 'File:FileModifyDate',
'File:FileAccessDate', 'EXIF:Make', 'Composite:LightValue') 'File:FileAccessDate', 'EXIF:Make', 'Composite:LightValue')
def get_media(self): def get_media(self):
for file_path in self.file_paths: for file_path in self.file_paths:
self.exif_data = ExifTool(str(file_path)).asdict() self.exif_data = ExifTool(file_path).asdict()
yield file_path, Media(os.path.dirname(file_path), '', os.path.basename(file_path), album_from_folder=True, ignore_tags=self.ignore_tags) yield file_path, Media(file_path, self.src_path, album_from_folder=True, ignore_tags=self.ignore_tags)
def test_get_metadata(self): def test_get_metadata(self):
for file_path, media in self.get_media(): for file_path, media in self.get_media():
@ -51,8 +51,10 @@ class TestMetadata:
assert value is None assert value is None
if key == 'album': if key == 'album':
if 'with-album' in str(file_path): for album in media._get_key_values('album'):
assert value == "Test Album" if album is not None and album != '':
assert value == album
break
else: else:
assert value == file_path.parent.name assert value == file_path.parent.name