Refactoring cli and collection modules and fix tests
This commit is contained in:
parent
bfb2153eb8
commit
0b81f89eee
|
@ -110,7 +110,7 @@ _sort_options = [
|
|||
]
|
||||
|
||||
def print_help(command):
|
||||
click.echo(command.get_help(click.Context(sort)))
|
||||
click.echo(command.get_help(click.Context(command)))
|
||||
|
||||
|
||||
def add_options(options):
|
||||
|
@ -140,6 +140,16 @@ def _get_paths(paths, root):
|
|||
|
||||
return paths, root
|
||||
|
||||
def _get_subpaths(relpaths, root):
|
||||
if not relpaths:
|
||||
paths = {root}
|
||||
else:
|
||||
paths = set()
|
||||
for relpath in relpaths:
|
||||
paths.add(os.path.join(root, relpath))
|
||||
|
||||
return paths, root
|
||||
|
||||
|
||||
@click.command('import')
|
||||
@add_options(_logger_options)
|
||||
|
@ -238,10 +248,7 @@ def _sort(**kwargs):
|
|||
|
||||
subdirs = kwargs['subdirs']
|
||||
root = kwargs['dest']
|
||||
subpaths, root = _get_paths(subdirs, root)
|
||||
paths = set()
|
||||
for subpath in subpaths:
|
||||
paths.add(os.path.join(root, subpath))
|
||||
paths, root = _get_subpaths(subdirs, root)
|
||||
|
||||
cache = True
|
||||
if kwargs['reset_cache']:
|
||||
|
@ -317,10 +324,9 @@ def _sort(**kwargs):
|
|||
help='True to remove files that are exactly the same in name and a file hash',
|
||||
)
|
||||
@click.argument('subdirs', required=False, nargs=-1, type=click.Path())
|
||||
@click.argument('dest', required=True, nargs=1, type=click.Path())
|
||||
@click.argument('collection', required=True, nargs=1, type=click.Path())
|
||||
def _clean(**kwargs):
|
||||
"""Remove empty folders
|
||||
Usage: clean [--verbose|--debug] directory [removeRoot]"""
|
||||
"""Remove empty folders"""
|
||||
|
||||
dry_run = kwargs['dry_run']
|
||||
folders = kwargs['folders']
|
||||
|
@ -328,9 +334,8 @@ def _clean(**kwargs):
|
|||
logger = log.get_logger(level=log_level)
|
||||
|
||||
subdirs = kwargs['subdirs']
|
||||
root = kwargs['dest']
|
||||
paths, root = _get_paths(subdirs, root)
|
||||
paths = os.path.join(root, subdirs)
|
||||
root = kwargs['collection']
|
||||
paths, root = _get_subpaths(subdirs, root)
|
||||
|
||||
clean_all = False
|
||||
if not folders:
|
||||
|
@ -352,13 +357,13 @@ def _clean(**kwargs):
|
|||
max_deep=opt['max_deep'],
|
||||
)
|
||||
|
||||
for path in paths:
|
||||
if kwargs['path_string']:
|
||||
dedup_regex = list(kwargs['dedup_regex'])
|
||||
dedup_regex = set(kwargs['dedup_regex'])
|
||||
collection.dedup_regex(
|
||||
path, dedup_regex, kwargs['remove_duplicates']
|
||||
paths, dedup_regex, kwargs['remove_duplicates']
|
||||
)
|
||||
|
||||
for path in paths:
|
||||
if clean_all or folders:
|
||||
collection.remove_empty_folders(path)
|
||||
|
||||
|
@ -446,21 +451,7 @@ def _check(**kwargs):
|
|||
@add_options(_dry_run_options)
|
||||
@add_options(_filter_options)
|
||||
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
|
||||
@click.option(
|
||||
'--output-dir',
|
||||
'-o',
|
||||
default=False,
|
||||
is_flag=True,
|
||||
help='output dir',
|
||||
)
|
||||
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
|
||||
@click.option(
|
||||
'--revert-compare',
|
||||
'-R',
|
||||
default=False,
|
||||
is_flag=True,
|
||||
help='Revert compare',
|
||||
)
|
||||
@click.option(
|
||||
'--similar-to',
|
||||
'-s',
|
||||
|
@ -474,7 +465,7 @@ def _check(**kwargs):
|
|||
help='Similarity level for images',
|
||||
)
|
||||
@click.argument('subdirs', required=False, nargs=-1, type=click.Path())
|
||||
@click.argument('dest', required=True, nargs=1, type=click.Path())
|
||||
@click.argument('collection', required=True, nargs=1, type=click.Path())
|
||||
def _compare(**kwargs):
|
||||
"""
|
||||
Sort similar images in directories
|
||||
|
@ -482,18 +473,11 @@ def _compare(**kwargs):
|
|||
|
||||
dry_run = kwargs['dry_run']
|
||||
log_level = log.level(kwargs['verbose'], kwargs['debug'])
|
||||
logger = log.get_logger(level=log_level)
|
||||
|
||||
subdirs = kwargs['subdirs']
|
||||
root = kwargs['dest']
|
||||
paths, root = _get_paths(subdirs, root)
|
||||
paths = os.path.join(root, subdirs)
|
||||
|
||||
path = kwargs['path']
|
||||
root = kwargs['root']
|
||||
|
||||
logger = log.get_logger(level=log_level)
|
||||
if not root:
|
||||
root = kwargs['path']
|
||||
root = kwargs['collection']
|
||||
paths, root = _get_subpaths(subdirs, root)
|
||||
|
||||
config = get_collection_config(root)
|
||||
opt = config.get_options()
|
||||
|
@ -511,9 +495,6 @@ def _compare(**kwargs):
|
|||
)
|
||||
|
||||
for path in paths:
|
||||
if kwargs['revert_compare']:
|
||||
collection.revert_compare(path)
|
||||
else:
|
||||
collection.sort_similar_images(path, kwargs['similarity'])
|
||||
|
||||
summary = collection.summary
|
||||
|
@ -537,7 +518,3 @@ main.add_command(_init)
|
|||
main.add_command(_import)
|
||||
main.add_command(_sort)
|
||||
main.add_command(_update)
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# main()
|
||||
|
|
|
@ -35,7 +35,7 @@ class FPath:
|
|||
def get_items(self):
|
||||
return {
|
||||
'album': '{album}',
|
||||
'basename': '{basename}',
|
||||
'stem': '{stem}',
|
||||
'camera_make': '{camera_make}',
|
||||
'camera_model': '{camera_model}',
|
||||
'city': '{city}',
|
||||
|
@ -123,15 +123,16 @@ class FPath:
|
|||
# Each item has its own custom logic and we evaluate a single item and return
|
||||
# the evaluated string.
|
||||
part = ''
|
||||
basename = os.path.splitext(metadata['filename'])[0]
|
||||
if item == 'basename':
|
||||
part = basename
|
||||
filename = metadata['filename']
|
||||
stem = os.path.splitext(filename)[0]
|
||||
if item == 'stem':
|
||||
part = stem
|
||||
elif item == 'ext':
|
||||
part = os.path.splitext(metadata['filename'])[1][1:]
|
||||
part = os.path.splitext(filename)[1][1:]
|
||||
elif item == 'name':
|
||||
# Remove date prefix added to the name.
|
||||
part = basename
|
||||
for i, rx in utils.get_date_regex(basename):
|
||||
part = stem
|
||||
for i, rx in utils.get_date_regex(stem):
|
||||
part = re.sub(rx, '', part)
|
||||
elif item == 'date':
|
||||
date = metadata['date_media']
|
||||
|
@ -173,11 +174,11 @@ class FPath:
|
|||
u_regex = '%u' + regex
|
||||
l_regex = '%l' + regex
|
||||
if re.search(u_regex, this_part):
|
||||
this_part = re.sub(u_regex, part.upper(), this_part)
|
||||
elif re.search(l_regex, this_part):
|
||||
this_part = re.sub(l_regex, part.lower(), this_part)
|
||||
else:
|
||||
this_part = re.sub(regex, part, this_part)
|
||||
return re.sub(u_regex, part.upper(), this_part)
|
||||
if re.search(l_regex, this_part):
|
||||
return re.sub(l_regex, part.lower(), this_part)
|
||||
|
||||
return re.sub(regex, part, this_part)
|
||||
|
||||
def get_path_part(self, this_part, metadata):
|
||||
"""Build path part
|
||||
|
@ -194,7 +195,7 @@ class FPath:
|
|||
regex = '[-_ .]?(%[ul])?' + regex
|
||||
this_part = re.sub(regex, part, this_part)
|
||||
else:
|
||||
self._set_case(regex, part, this_part)
|
||||
this_part = self._set_case(regex, part, this_part)
|
||||
|
||||
# Delete separator char at the begining of the string if any:
|
||||
if this_part:
|
||||
|
@ -341,7 +342,7 @@ class Collection:
|
|||
media.set_album_from_folder()
|
||||
updated = True
|
||||
if media.metadata['original_name'] in (False, ''):
|
||||
media.set_value('original_name', self.filename)
|
||||
media.set_value('original_name', media.metadata['filename'])
|
||||
updated = True
|
||||
if self.album_from_folder:
|
||||
album = media.metadata['album']
|
||||
|
@ -633,90 +634,58 @@ class Collection:
|
|||
else:
|
||||
return 0
|
||||
|
||||
def dedup_regex(self, path, dedup_regex, remove_duplicates=False):
|
||||
# cycle throught files
|
||||
result = False
|
||||
path = self._check_path(path)
|
||||
# Delimiter regex
|
||||
delim = r'[-_ .]'
|
||||
# Numeric date item regex
|
||||
d = r'\d{2}'
|
||||
# Numeric date regex
|
||||
|
||||
if len(dedup_regex) == 0:
|
||||
date_num2 = re.compile(
|
||||
fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])'
|
||||
)
|
||||
date_num3 = re.compile(
|
||||
fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])'
|
||||
)
|
||||
default = re.compile(r'([^-_ .]+[-_ .])')
|
||||
dedup_regex = [date_num3, date_num2, default]
|
||||
def _sort_medias(self, files_data, import_mode=None, remove_duplicates=False):
|
||||
"""
|
||||
sort files and solve conflicts
|
||||
"""
|
||||
# Create directories
|
||||
for media, relpath in files_data:
|
||||
dest_directory = self.root / relpath.parent
|
||||
self._create_directory(dest_directory, media)
|
||||
|
||||
conflicts = []
|
||||
self.src_list = [
|
||||
x
|
||||
for x in self._get_files_in_path(
|
||||
path, glob=self.glob,
|
||||
extensions=self.filter_by_ext,
|
||||
)
|
||||
]
|
||||
for src_path in self.src_list:
|
||||
# TODO to test it
|
||||
media = Media(src_path, path, logger=self.logger)
|
||||
path_parts = src_path.relative_to(self.root).parts
|
||||
dedup_path = []
|
||||
for path_part in path_parts:
|
||||
items = []
|
||||
items = self._split_part(dedup_regex.copy(), path_part, items)
|
||||
for media, relpath in files_data:
|
||||
src_path = media.file_path
|
||||
dest_path = self.root / relpath
|
||||
|
||||
filtered_items = []
|
||||
for item in items:
|
||||
if item not in filtered_items:
|
||||
filtered_items.append(item)
|
||||
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
|
||||
|
||||
dedup_path.append(''.join(filtered_items))
|
||||
|
||||
# Dedup path
|
||||
dest_path = self.root.joinpath(*dedup_path)
|
||||
self._create_directory(dest_path.parent.name, media)
|
||||
|
||||
conflicts = self.check_conflicts(src_path, dest_path, remove_duplicates)
|
||||
|
||||
result = False
|
||||
if not conflict:
|
||||
record = self._record_file(src_path, dest_path, media)
|
||||
self.sort_file(
|
||||
src_path, dest_path, media, import_mode=import_mode
|
||||
)
|
||||
elif conflict == 1:
|
||||
# There is conflict and file are different
|
||||
conflicts.append((src_path, dest_path, media))
|
||||
elif conflict in (2, 3):
|
||||
result = True
|
||||
|
||||
if result:
|
||||
# result is true or None
|
||||
elif conflict == 3:
|
||||
# Same file checksum
|
||||
if import_mode == 'move':
|
||||
self._remove(src_path)
|
||||
self.dest_list.append(dest_path)
|
||||
elif conflict == 2:
|
||||
# File already sorted
|
||||
self.dest_list.append(dest_path)
|
||||
|
||||
if conflicts != []:
|
||||
files_data, conflict = self._solve_conflicts(conflicts, remove_duplicates)
|
||||
src_path, dest_path, media = file_data
|
||||
for files_data, conflict in self._solve_conflicts(conflicts,
|
||||
remove_duplicates):
|
||||
src_path, dest_path, media = files_data
|
||||
|
||||
result = False
|
||||
if not conflict:
|
||||
self._record_file(src_path, dest_path, media)
|
||||
self.sort_file(
|
||||
src_path, dest_path, media, import_mode=import_mode
|
||||
)
|
||||
elif conflict == 1:
|
||||
# There is unresolved conflict
|
||||
self.summary.append((src_path, False))
|
||||
elif conflict in (2, 3):
|
||||
result = True
|
||||
|
||||
if result:
|
||||
# result is true or None
|
||||
elif conflict == 3:
|
||||
# Same file checksum
|
||||
if import_mode == 'move':
|
||||
self._remove(src_path)
|
||||
self.dest_list.append(dest_path)
|
||||
elif conflict == 2:
|
||||
# File already sorted
|
||||
self.dest_list.append(dest_path)
|
||||
|
||||
if not self._check_processed():
|
||||
self.summary.append((None, False))
|
||||
|
||||
return self.summary
|
||||
|
||||
def _modify_selection(self):
|
||||
"""
|
||||
|
@ -764,11 +733,67 @@ class Collection:
|
|||
# Finally check if are files are successfully processed
|
||||
n_fail = len(self.src_list) - len(self.dest_list)
|
||||
if n_fail != 0:
|
||||
self.logger.error("{n_fail} files have not be processed")
|
||||
self.logger.error(f"{n_fail} files have not be processed")
|
||||
return False
|
||||
|
||||
return self.check_db()
|
||||
|
||||
def _get_medias_data(
|
||||
self, src_dirs, import_mode=None, ignore_tags=set(), loc=None
|
||||
):
|
||||
"""Get medias data"""
|
||||
src_dir_in_collection = False
|
||||
for src_dir in src_dirs:
|
||||
self.dest_list = []
|
||||
src_dir = self._check_path(src_dir)
|
||||
self.src_list = self._get_path_list(src_dir)
|
||||
|
||||
# Get medias and src_dirs
|
||||
for src_path in self.src_list:
|
||||
if self.root in src_path.parents:
|
||||
src_dir_in_collection = True
|
||||
else:
|
||||
if not import_mode:
|
||||
self.logger.error(f"""{src_path} not in {self.root}
|
||||
collection, use `ordigi import`""")
|
||||
sys.exit(1)
|
||||
|
||||
# Get file metadata
|
||||
media = Media(
|
||||
src_path,
|
||||
src_dir,
|
||||
self.album_from_folder,
|
||||
ignore_tags,
|
||||
self.interactive,
|
||||
self.logger,
|
||||
self.use_date_filename,
|
||||
self.use_file_dates,
|
||||
)
|
||||
media.get_metadata(self.root, loc, self.db, self.cache)
|
||||
|
||||
yield media, src_dir_in_collection
|
||||
|
||||
def _init_check_db(self, loc=None, ignore_tags=set()):
|
||||
if self.db.is_empty('metadata'):
|
||||
self.init(loc, ignore_tags)
|
||||
elif not self.check_db():
|
||||
self.logger.error('Db data is not accurate run `ordigi update`')
|
||||
sys.exit(1)
|
||||
|
||||
def _get_path_list(self, path):
|
||||
src_list = [
|
||||
x
|
||||
for x in self._get_files_in_path(
|
||||
path, glob=self.glob,
|
||||
extensions=self.filter_by_ext,
|
||||
)
|
||||
]
|
||||
if self.interactive:
|
||||
src_list = self._modify_selection()
|
||||
print('Processing...')
|
||||
|
||||
return src_list
|
||||
|
||||
def get_medias(self, loc, ignore_tags=set()):
|
||||
for file_path in self._get_all_files():
|
||||
media = Media(
|
||||
|
@ -790,25 +815,6 @@ class Collection:
|
|||
|
||||
return self.summary
|
||||
|
||||
def _init_check_db(self, loc=None, ignore_tags=set()):
|
||||
if self.db.is_empty('metadata'):
|
||||
self.init(loc, ignore_tags)
|
||||
elif not self.check_db():
|
||||
self.logger.error('Db data is not accurate run `ordigi update`')
|
||||
sys.exit(1)
|
||||
|
||||
def check_files(self):
|
||||
for file_path in self._get_all_files():
|
||||
checksum = utils.checksum(file_path)
|
||||
relpath = file_path.relative_to(self.root)
|
||||
if checksum == self.db.get_checksum(relpath):
|
||||
self.summary.append((file_path, 'check'))
|
||||
else:
|
||||
self.logger.error('{file_path} is corrupted')
|
||||
self.summary.append((file_path, False))
|
||||
|
||||
return self.summary
|
||||
|
||||
def update(self, loc, ignore_tags=set()):
|
||||
file_paths = [x for x in self._get_all_files()]
|
||||
db_rows = [row for row in self.db.get_rows('metadata')]
|
||||
|
@ -857,10 +863,23 @@ class Collection:
|
|||
|
||||
return self.summary
|
||||
|
||||
def check_files(self):
|
||||
for file_path in self._get_all_files():
|
||||
checksum = utils.checksum(file_path)
|
||||
relpath = file_path.relative_to(self.root)
|
||||
if checksum == self.db.get_checksum(relpath):
|
||||
self.summary.append((file_path, 'check'))
|
||||
else:
|
||||
self.logger.error('{file_path} is corrupted')
|
||||
self.summary.append((file_path, False))
|
||||
|
||||
return self.summary
|
||||
|
||||
def remove_empty_subdirs(self, directories):
|
||||
parents = set()
|
||||
for directory in directories:
|
||||
# if folder empty, delete it
|
||||
if directory.is_dir():
|
||||
files = os.listdir(directory)
|
||||
if len(files) == 0:
|
||||
if not self.dry_run:
|
||||
|
@ -872,20 +891,6 @@ class Collection:
|
|||
if parents != set():
|
||||
self.remove_empty_subdirs(parents)
|
||||
|
||||
def _get_path_list(self, path):
|
||||
src_list = [
|
||||
x
|
||||
for x in self._get_files_in_path(
|
||||
path, glob=self.glob,
|
||||
extensions=self.filter_by_ext,
|
||||
)
|
||||
]
|
||||
if self.interactive:
|
||||
src_list = self._modify_selection()
|
||||
print('Processing...')
|
||||
|
||||
return src_list
|
||||
|
||||
def sort_file(self, src_path, dest_path, media, import_mode=False):
|
||||
if import_mode == 'copy':
|
||||
self._copy(src_path, dest_path)
|
||||
|
@ -909,96 +914,33 @@ class Collection:
|
|||
|
||||
return self.summary
|
||||
|
||||
def sort_files(self, src_dirs, path_format, loc, import_mode=False, remove_duplicates=False, ignore_tags=set()):
|
||||
def sort_files(
|
||||
self, src_dirs, path_format, loc,
|
||||
import_mode=False, remove_duplicates=False, ignore_tags=set()
|
||||
):
|
||||
"""
|
||||
Sort files into appropriate folder
|
||||
"""
|
||||
# Check db
|
||||
self._init_check_db(loc, ignore_tags)
|
||||
|
||||
# Get medias data
|
||||
files_data = []
|
||||
src_dirs_in_collection = set()
|
||||
for src_dir in src_dirs:
|
||||
self.dest_list = []
|
||||
src_dir = self._check_path(src_dir)
|
||||
conflicts = []
|
||||
self.src_list = self._get_path_list(src_dir)
|
||||
|
||||
# Get medias and src_dirs
|
||||
for src_path in self.src_list:
|
||||
if self.root in src_path.parents:
|
||||
src_dirs_in_collection.add(src_path.parent)
|
||||
else:
|
||||
if not import_mode:
|
||||
self.logger.error(f"""{src_path} not in {self.root}
|
||||
collection, use `ordigi import`""")
|
||||
sys.exit(1)
|
||||
|
||||
# Get file metadata
|
||||
media = Media(
|
||||
src_path,
|
||||
src_dir,
|
||||
self.album_from_folder,
|
||||
ignore_tags,
|
||||
self.interactive,
|
||||
self.logger,
|
||||
self.use_date_filename,
|
||||
self.use_file_dates,
|
||||
)
|
||||
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
|
||||
for media, src_dir_in_collection in self._get_medias_data(
|
||||
src_dirs,
|
||||
import_mode=import_mode, ignore_tags=ignore_tags, loc=loc,
|
||||
):
|
||||
# Get the destination path according to metadata
|
||||
fpath = FPath(path_format, self.day_begins, self.logger)
|
||||
relpath = Path(fpath.get_path(metadata))
|
||||
relpath = Path(fpath.get_path(media.metadata))
|
||||
if src_dir_in_collection:
|
||||
src_dirs_in_collection.add(media.file_path.parent)
|
||||
|
||||
files_data.append((copy(media), relpath))
|
||||
|
||||
# Create directories
|
||||
for media, relpath in files_data:
|
||||
dest_directory = self.root / relpath.parent
|
||||
self._create_directory(dest_directory, media)
|
||||
|
||||
# sort files and solve conflicts
|
||||
for media, relpath in files_data:
|
||||
src_path = media.file_path
|
||||
dest_path = self.root / relpath
|
||||
|
||||
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
|
||||
|
||||
if not conflict:
|
||||
self.sort_file(
|
||||
src_path, dest_path, media, import_mode=import_mode
|
||||
)
|
||||
elif conflict == 1:
|
||||
# There is conflict and file are different
|
||||
conflicts.append((src_path, dest_path, media))
|
||||
elif conflict == 3:
|
||||
# Same file checksum
|
||||
if import_mode == 'move':
|
||||
self._remove(src_path)
|
||||
self.dest_list.append(dest_path)
|
||||
elif conflict == 2:
|
||||
# File already sorted
|
||||
self.dest_list.append(dest_path)
|
||||
|
||||
if conflicts != []:
|
||||
files_data, conflict = self._solve_conflicts(conflicts, remove_duplicates)
|
||||
src_path, dest_path, media = file_data
|
||||
|
||||
if not conflict:
|
||||
self.sort_file(
|
||||
src_path, dest_path, media, import_mode=import_mode
|
||||
)
|
||||
elif conflict == 1:
|
||||
# There is unresolved conflict
|
||||
self.summary.append((src_path, False))
|
||||
elif conflict == 3:
|
||||
# Same file checksum
|
||||
if import_mode == 'move':
|
||||
self._remove(src_path)
|
||||
self.dest_list.append(dest_path)
|
||||
elif conflict == 2:
|
||||
# File already sorted
|
||||
self.dest_list.append(dest_path)
|
||||
# Sort files and solve conflicts
|
||||
self._sort_medias(files_data, import_mode, remove_duplicates)
|
||||
|
||||
self.remove_empty_subdirs(src_dirs_in_collection)
|
||||
|
||||
|
@ -1007,6 +949,56 @@ class Collection:
|
|||
|
||||
return self.summary
|
||||
|
||||
def dedup_regex(self, paths, dedup_regex, remove_duplicates=False):
|
||||
"""Deduplicate file path parts"""
|
||||
# Check db
|
||||
self._init_check_db()
|
||||
|
||||
# Delimiter regex
|
||||
delim = r'[-_ .]'
|
||||
# Numeric date item regex
|
||||
d = r'\d{2}'
|
||||
|
||||
# Numeric date regex
|
||||
if len(dedup_regex) == 0:
|
||||
date_num2 = re.compile(
|
||||
fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])'
|
||||
)
|
||||
date_num3 = re.compile(
|
||||
fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])'
|
||||
)
|
||||
default = re.compile(r'([^-_ .]+[-_ .])')
|
||||
dedup_regex = [date_num3, date_num2, default]
|
||||
|
||||
# Get medias data
|
||||
files_data = []
|
||||
for media, _ in self._get_medias_data(paths):
|
||||
# Deduplicate the path
|
||||
src_path = media.file_path
|
||||
path_parts = src_path.relative_to(self.root).parts
|
||||
dedup_path = []
|
||||
for path_part in path_parts:
|
||||
items = []
|
||||
items = self._split_part(dedup_regex.copy(), path_part, items)
|
||||
|
||||
filtered_items = []
|
||||
for item in items:
|
||||
if item not in filtered_items:
|
||||
filtered_items.append(item)
|
||||
|
||||
dedup_path.append(''.join(filtered_items))
|
||||
|
||||
relpath = Path(*dedup_path)
|
||||
files_data.append((copy(media), relpath))
|
||||
|
||||
# Sort files and solve conflicts
|
||||
self._sort_medias(files_data, remove_duplicates=remove_duplicates)
|
||||
|
||||
if not self._check_processed():
|
||||
self.summary.append((None, False))
|
||||
|
||||
return self.summary
|
||||
|
||||
def remove_empty_folders(self, directory, remove_root=True):
|
||||
'Function to remove empty folders'
|
||||
if not os.path.isdir(directory):
|
||||
|
@ -1049,106 +1041,61 @@ class Collection:
|
|||
if image.is_image():
|
||||
yield image
|
||||
|
||||
def sort_similar_images(self, path, similarity=80):
|
||||
|
||||
# Check db
|
||||
if not self.check_db():
|
||||
self.logger.error('Db data is not accurate run `ordigi init`')
|
||||
sys.exit(1)
|
||||
|
||||
path = self._check_path(path)
|
||||
images = set(x for x in self._get_images(path))
|
||||
i = Images(images, logger=self.logger)
|
||||
nb_row_ini = self.db.len('metadata')
|
||||
for image in images:
|
||||
if not image.img_path.is_file():
|
||||
continue
|
||||
media_ref = Media(image.img_path, path, self.logger)
|
||||
# Todo: compare metadata?
|
||||
metadata = media_ref.get_metadata(self.root, db=self.db, cache=self.cache)
|
||||
similar = False
|
||||
moved_imgs = set()
|
||||
for img_path in i.find_similar(image, similarity):
|
||||
similar = True
|
||||
def _get_media_data(self, img_path, path):
|
||||
media = Media(img_path, path, self.logger)
|
||||
metadata = media.get_metadata(self.root, db=self.db, cache=self.cache)
|
||||
# move image into directory
|
||||
name = img_path.stem
|
||||
directory_name = 'similar_to_' + name
|
||||
dest_directory = img_path.parent / directory_name
|
||||
dest_path = dest_directory / img_path.name
|
||||
dest_directory.mkdir(exist_ok=True)
|
||||
media.get_metadata(self.root, db=self.db, cache=self.cache)
|
||||
|
||||
# Move the simlars file into the destination directory
|
||||
self._move(img_path, dest_path)
|
||||
moved_imgs.add(img_path)
|
||||
if self._record_file(img_path, dest_path, media):
|
||||
self.summary.append((img_path, 'sort'))
|
||||
else:
|
||||
self.summary.append((img_path, False))
|
||||
return media
|
||||
|
||||
if similar:
|
||||
img_path = image.img_path
|
||||
dest_path = dest_directory / img_path.name
|
||||
self._move(img_path, dest_path)
|
||||
moved_imgs.add(img_path)
|
||||
if self._record_file(img_path, dest_path, media_ref):
|
||||
self.summary.append((img_path, 'sort'))
|
||||
else:
|
||||
self.summary.append((img_path, False))
|
||||
def _find_similar_images(self, image, images, path, dest_dir, similarity=80):
|
||||
files_data = []
|
||||
if not image.img_path.is_file():
|
||||
return files_data
|
||||
|
||||
nb_row_end = self.db.len('metadata')
|
||||
if nb_row_ini and nb_row_ini != nb_row_end:
|
||||
self.logger.error('Nb of row have changed unexpectedly')
|
||||
name = image.img_path.stem
|
||||
directory_name = dest_dir / name.replace('.', '_')
|
||||
|
||||
if result:
|
||||
result = self.check_db()
|
||||
self.summary.append((None, False))
|
||||
for img_path in images.find_similar(image, similarity):
|
||||
self.src_list.append(img_path)
|
||||
|
||||
return self.summary
|
||||
media = self._get_media_data(img_path, path)
|
||||
relpath = directory_name / img_path.name
|
||||
|
||||
def revert_compare(self, path):
|
||||
files_data.append((copy(media), relpath))
|
||||
|
||||
if not self.check_db():
|
||||
self.logger.error('Db data is not accurate run `ordigi init`')
|
||||
sys.exit(1)
|
||||
if files_data:
|
||||
# Found similar images to image
|
||||
self.src_list.append(image.img_path)
|
||||
media = self._get_media_data(image.img_path, path)
|
||||
relpath = directory_name / image.img_path.name
|
||||
files_data.insert(0, (copy(media), relpath))
|
||||
|
||||
return files_data
|
||||
|
||||
def sort_similar_images(self, path, similarity=80, remove_duplicates=False):
|
||||
"""Sort similar images using imagehash library"""
|
||||
# Check db
|
||||
self._init_check_db()
|
||||
|
||||
dest_dir = self.root / 'similar_images'
|
||||
path = self._check_path(path)
|
||||
dirnames = set()
|
||||
moved_files = set()
|
||||
|
||||
images_paths = set(x for x in self._get_images(path))
|
||||
images = Images(images_paths, logger=self.logger)
|
||||
nb_row_ini = self.db.len('metadata')
|
||||
for src_path in self._get_files_in_path(
|
||||
path, glob=self.glob,
|
||||
extensions=self.filter_by_ext,
|
||||
):
|
||||
dirname = src_path.parent.name
|
||||
if dirname.find('similar_to') == 0:
|
||||
dirnames.add(src_path.parent)
|
||||
|
||||
# move file to initial folder and update metadata
|
||||
media = Media(src_path, path, self.logger)
|
||||
metadata = media.get_metadata(self.root, db=self.db, cache=self.cache)
|
||||
dest_path = Path(src_path.parent.parent, src_path.name)
|
||||
self._move(src_path, dest_path)
|
||||
moved_files.add(src_path)
|
||||
if self._record_file(src_path, dest_path, media):
|
||||
self.summary.append((src_path, 'sort'))
|
||||
else:
|
||||
self.summary.append((src_path, False))
|
||||
|
||||
for dirname in dirnames:
|
||||
# remove 'similar_to*' directories
|
||||
try:
|
||||
dirname.rmdir()
|
||||
except OSError as error:
|
||||
self.logger.error(error)
|
||||
for image in images_paths:
|
||||
files_data = self._find_similar_images(
|
||||
image, images, path, dest_dir, similarity
|
||||
)
|
||||
if files_data:
|
||||
# Move the simlars file into the destination directory
|
||||
self._sort_medias(files_data, remove_duplicates=remove_duplicates)
|
||||
|
||||
nb_row_end = self.db.len('metadata')
|
||||
if nb_row_ini and nb_row_ini != nb_row_end:
|
||||
self.logger.error('Nb of row have changed unexpectedly')
|
||||
|
||||
if result:
|
||||
result = self.check_db()
|
||||
if not self._check_processed():
|
||||
self.summary.append((None, False))
|
||||
|
||||
return self.summary
|
||||
|
@ -1218,4 +1165,3 @@ class Collection:
|
|||
self.summary.append((file_path, 'update'))
|
||||
|
||||
return self.summary
|
||||
|
||||
|
|
|
@ -233,12 +233,13 @@ class Media:
|
|||
if self.metadata is None:
|
||||
return None
|
||||
|
||||
basename = os.path.splitext(self.metadata['filename'])[0]
|
||||
filename = self.metadata['filename']
|
||||
stem = os.path.splitext(filename)[0]
|
||||
date_original = self.metadata['date_original']
|
||||
if self.metadata['original_name']:
|
||||
date_filename = self.get_date_format(self.metadata['original_name'])
|
||||
else:
|
||||
date_filename = self.get_date_format(basename)
|
||||
date_filename = self.get_date_format(stem)
|
||||
|
||||
date_original = self.metadata['date_original']
|
||||
date_created = self.metadata['date_created']
|
||||
|
@ -246,7 +247,7 @@ class Media:
|
|||
if self.metadata['date_original']:
|
||||
if date_filename and date_filename != date_original:
|
||||
self.logger.warning(
|
||||
f"{basename} time mark is different from {date_original}"
|
||||
f"{filename} time mark is different from {date_original}"
|
||||
)
|
||||
if self.interactive:
|
||||
# Ask for keep date taken, filename time, or neither
|
||||
|
@ -268,7 +269,7 @@ class Media:
|
|||
)
|
||||
if date_created and date_filename > date_created:
|
||||
self.logger.warning(
|
||||
f"{basename} time mark is more recent than {date_created}"
|
||||
f"{filename} time mark is more recent than {date_created}"
|
||||
)
|
||||
if self.interactive:
|
||||
choices = [
|
||||
|
@ -335,6 +336,7 @@ class Media:
|
|||
else:
|
||||
return answers['album']
|
||||
|
||||
# TODO use methods _get_metadata_from_db and _get_metadata_from_exif
|
||||
def get_metadata(self, root, loc=None, db=None, cache=False):
|
||||
"""Get a dictionary of metadata from exif.
|
||||
All keys will be present and have a value of None if not obtained.
|
||||
|
|
|
@ -20,7 +20,7 @@ def reset_singletons():
|
|||
_ExifToolProc.instance = None
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
@pytest.fixture(scope="module")
|
||||
def sample_files_paths(tmpdir_factory):
|
||||
tmp_path = Path(tmpdir_factory.mktemp("ordigi-src-"))
|
||||
path = Path(ORDIGI_PATH, 'samples/test_exif')
|
||||
|
|
|
@ -1,13 +1,23 @@
|
|||
from imp import load_source
|
||||
from pathlib import Path
|
||||
import shutil
|
||||
from click.testing import CliRunner
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
from ordigi import cli
|
||||
|
||||
CONTENT = "content"
|
||||
|
||||
import ipdb; ipdb.set_trace()
|
||||
ordigi = load_source('cli', str(Path(__file__).parent.parent) + 'cli.py')
|
||||
ORDIGI_PATH = Path(__file__).parent.parent
|
||||
|
||||
|
||||
def get_arg_options_list(arg_options):
|
||||
arg_options_list = []
|
||||
for opt, arg in arg_options:
|
||||
arg_options_list.append(opt)
|
||||
arg_options_list.append(arg)
|
||||
|
||||
return arg_options_list
|
||||
|
||||
|
||||
class TestOrdigi:
|
||||
|
||||
|
@ -15,10 +25,168 @@ class TestOrdigi:
|
|||
def setup_class(cls, sample_files_paths):
|
||||
cls.runner = CliRunner()
|
||||
cls.src_path, cls.file_paths = sample_files_paths
|
||||
cls.logger_options = (
|
||||
'--debug',
|
||||
'--verbose',
|
||||
)
|
||||
cls.filter_options = (
|
||||
('--exclude', '.DS_Store'),
|
||||
('--filter-by-ext', 'jpg'),
|
||||
('--glob', '*'),
|
||||
)
|
||||
cls.sort_options = (
|
||||
'--album-from-folder',
|
||||
'--ignore-tags',
|
||||
'--path-format',
|
||||
'--remove-duplicates',
|
||||
'--use-date-filename',
|
||||
'--use-file-dates',
|
||||
)
|
||||
|
||||
def test__sort(self):
|
||||
import ipdb; ipdb.set_trace()
|
||||
result = self.runner.invoke(cli._sort, [str(self.src_path)])
|
||||
def assert_cli(self, command, paths):
|
||||
result = self.runner.invoke(command, [*paths])
|
||||
assert result.exit_code == 0
|
||||
|
||||
def assert_options(self, command, bool_options, arg_options, paths):
|
||||
for bool_option in bool_options:
|
||||
self.assert_cli(command, [bool_option, *paths])
|
||||
|
||||
for opt, arg in arg_options:
|
||||
self.assert_cli(command, [opt, arg, *paths])
|
||||
|
||||
def assert_all_options(self, command, bool_options, arg_options, paths):
|
||||
arg_options_list = get_arg_options_list(arg_options)
|
||||
self.assert_cli(command, [
|
||||
*bool_options, *arg_options_list, *paths,
|
||||
])
|
||||
|
||||
def test_sort(self):
|
||||
bool_options = (
|
||||
*self.logger_options,
|
||||
# '--interactive',
|
||||
'--dry-run',
|
||||
'--album-from-folder',
|
||||
'--remove-duplicates',
|
||||
'--use-date-filename',
|
||||
'--use-file-dates',
|
||||
'--clean',
|
||||
)
|
||||
|
||||
arg_options = (
|
||||
*self.filter_options,
|
||||
('--ignore-tags', 'CreateDate'),
|
||||
('--path-format', '{%Y}/{folder}/{name}.{ext}'),
|
||||
|
||||
)
|
||||
|
||||
paths = (str(self.src_path),)
|
||||
|
||||
self.assert_cli(cli._sort, paths)
|
||||
|
||||
self.assert_options(cli._sort, bool_options, arg_options, paths)
|
||||
self.assert_all_options(cli._sort, bool_options, arg_options, paths)
|
||||
|
||||
def assert_init(self):
|
||||
for bool_option in self.logger_options:
|
||||
result = self.runner.invoke(
|
||||
cli._init, [bool_option, str(self.src_path
|
||||
)])
|
||||
assert result.exit_code == 0, bool_option
|
||||
|
||||
def assert_update(self):
|
||||
file_path = Path(ORDIGI_PATH, 'samples/test_exif/photo.cr2')
|
||||
dest_path = self.src_path / 'photo_moved.cr2'
|
||||
shutil.copyfile(file_path, dest_path)
|
||||
for bool_option in self.logger_options:
|
||||
result = self.runner.invoke(
|
||||
cli._update, [bool_option, str(self.src_path
|
||||
)])
|
||||
assert result.exit_code == 0, bool_option
|
||||
|
||||
def assert_check(self):
|
||||
for bool_option in self.logger_options:
|
||||
result = self.runner.invoke(
|
||||
cli._check, [bool_option, str(self.src_path
|
||||
)])
|
||||
assert result.exit_code == 0, bool_option
|
||||
|
||||
def assert_clean(self):
|
||||
bool_options = (
|
||||
*self.logger_options,
|
||||
# '--interactive',
|
||||
'--dry-run',
|
||||
'--delete-excluded',
|
||||
'--folders',
|
||||
'--path-string',
|
||||
'--remove-duplicates',
|
||||
)
|
||||
|
||||
arg_options = (
|
||||
*self.filter_options,
|
||||
('--dedup-regex', r'\d{4}-\d{2}'),
|
||||
)
|
||||
|
||||
paths = ('test_exif', str(self.src_path))
|
||||
self.assert_cli(cli._clean, paths)
|
||||
|
||||
paths = (str(self.src_path),)
|
||||
self.assert_cli(cli._clean, paths)
|
||||
|
||||
self.assert_options(cli._clean, bool_options, arg_options, paths)
|
||||
self.assert_all_options(cli._clean, bool_options, arg_options, paths)
|
||||
|
||||
def test_init_update_check_clean(self):
|
||||
self.assert_init()
|
||||
self.assert_update()
|
||||
self.assert_check()
|
||||
self.assert_clean()
|
||||
|
||||
def test_import(self, tmp_path):
|
||||
bool_options = (
|
||||
*self.logger_options,
|
||||
# '--interactive',
|
||||
'--dry-run',
|
||||
'--album-from-folder',
|
||||
'--remove-duplicates',
|
||||
'--use-date-filename',
|
||||
'--use-file-dates',
|
||||
'--copy',
|
||||
)
|
||||
|
||||
arg_options = (
|
||||
*self.filter_options,
|
||||
('--ignore-tags', 'CreateDate'),
|
||||
('--path-format', '{%Y}/{folder}/{stem}.{ext}'),
|
||||
|
||||
)
|
||||
|
||||
paths = (str(self.src_path), str(tmp_path))
|
||||
|
||||
result = self.runner.invoke(cli._import, ['--copy', *paths])
|
||||
assert result.exit_code == 0
|
||||
|
||||
self.assert_options(cli._import, bool_options, arg_options, paths)
|
||||
self.assert_all_options(cli._import, bool_options, arg_options, paths)
|
||||
|
||||
def test_compare(self):
|
||||
bool_options = (
|
||||
*self.logger_options,
|
||||
# '--interactive',
|
||||
'--dry-run',
|
||||
'--find-duplicates',
|
||||
'--remove-duplicates',
|
||||
)
|
||||
|
||||
arg_options = (
|
||||
*self.filter_options,
|
||||
# ('--similar-to', ''),
|
||||
('--similarity', '65'),
|
||||
)
|
||||
|
||||
paths = (str(self.src_path),)
|
||||
|
||||
self.assert_cli(cli._compare, paths)
|
||||
self.assert_options(cli._compare, bool_options, arg_options, paths)
|
||||
|
||||
|
||||
def test_needsfiles(tmpdir):
|
||||
|
|
|
@ -239,11 +239,6 @@ class TestCollection:
|
|||
# Summary is created and there is no errors
|
||||
assert not summary.errors
|
||||
|
||||
summary = collection.revert_compare(path)
|
||||
|
||||
# Summary is created and there is no errors
|
||||
assert not summary.errors
|
||||
|
||||
@pytest.mark.skip()
|
||||
def test_fill_data(self, tmp_path, monkeypatch):
|
||||
path = tmp_path / 'collection'
|
||||
|
|
Loading…
Reference in New Issue