Refactoring cli and collection modules and fix tests

This commit is contained in:
Cédric Leporcq 2021-10-31 15:09:40 +01:00
parent bfb2153eb8
commit 0b81f89eee
6 changed files with 446 additions and 358 deletions

View File

@ -110,7 +110,7 @@ _sort_options = [
] ]
def print_help(command): def print_help(command):
click.echo(command.get_help(click.Context(sort))) click.echo(command.get_help(click.Context(command)))
def add_options(options): def add_options(options):
@ -140,6 +140,16 @@ def _get_paths(paths, root):
return paths, root return paths, root
def _get_subpaths(relpaths, root):
if not relpaths:
paths = {root}
else:
paths = set()
for relpath in relpaths:
paths.add(os.path.join(root, relpath))
return paths, root
@click.command('import') @click.command('import')
@add_options(_logger_options) @add_options(_logger_options)
@ -238,10 +248,7 @@ def _sort(**kwargs):
subdirs = kwargs['subdirs'] subdirs = kwargs['subdirs']
root = kwargs['dest'] root = kwargs['dest']
subpaths, root = _get_paths(subdirs, root) paths, root = _get_subpaths(subdirs, root)
paths = set()
for subpath in subpaths:
paths.add(os.path.join(root, subpath))
cache = True cache = True
if kwargs['reset_cache']: if kwargs['reset_cache']:
@ -317,10 +324,9 @@ def _sort(**kwargs):
help='True to remove files that are exactly the same in name and a file hash', help='True to remove files that are exactly the same in name and a file hash',
) )
@click.argument('subdirs', required=False, nargs=-1, type=click.Path()) @click.argument('subdirs', required=False, nargs=-1, type=click.Path())
@click.argument('dest', required=True, nargs=1, type=click.Path()) @click.argument('collection', required=True, nargs=1, type=click.Path())
def _clean(**kwargs): def _clean(**kwargs):
"""Remove empty folders """Remove empty folders"""
Usage: clean [--verbose|--debug] directory [removeRoot]"""
dry_run = kwargs['dry_run'] dry_run = kwargs['dry_run']
folders = kwargs['folders'] folders = kwargs['folders']
@ -328,9 +334,8 @@ def _clean(**kwargs):
logger = log.get_logger(level=log_level) logger = log.get_logger(level=log_level)
subdirs = kwargs['subdirs'] subdirs = kwargs['subdirs']
root = kwargs['dest'] root = kwargs['collection']
paths, root = _get_paths(subdirs, root) paths, root = _get_subpaths(subdirs, root)
paths = os.path.join(root, subdirs)
clean_all = False clean_all = False
if not folders: if not folders:
@ -352,13 +357,13 @@ def _clean(**kwargs):
max_deep=opt['max_deep'], max_deep=opt['max_deep'],
) )
for path in paths:
if kwargs['path_string']: if kwargs['path_string']:
dedup_regex = list(kwargs['dedup_regex']) dedup_regex = set(kwargs['dedup_regex'])
collection.dedup_regex( collection.dedup_regex(
path, dedup_regex, kwargs['remove_duplicates'] paths, dedup_regex, kwargs['remove_duplicates']
) )
for path in paths:
if clean_all or folders: if clean_all or folders:
collection.remove_empty_folders(path) collection.remove_empty_folders(path)
@ -446,21 +451,7 @@ def _check(**kwargs):
@add_options(_dry_run_options) @add_options(_dry_run_options)
@add_options(_filter_options) @add_options(_filter_options)
@click.option('--find-duplicates', '-f', default=False, is_flag=True) @click.option('--find-duplicates', '-f', default=False, is_flag=True)
@click.option(
'--output-dir',
'-o',
default=False,
is_flag=True,
help='output dir',
)
@click.option('--remove-duplicates', '-r', default=False, is_flag=True) @click.option('--remove-duplicates', '-r', default=False, is_flag=True)
@click.option(
'--revert-compare',
'-R',
default=False,
is_flag=True,
help='Revert compare',
)
@click.option( @click.option(
'--similar-to', '--similar-to',
'-s', '-s',
@ -474,7 +465,7 @@ def _check(**kwargs):
help='Similarity level for images', help='Similarity level for images',
) )
@click.argument('subdirs', required=False, nargs=-1, type=click.Path()) @click.argument('subdirs', required=False, nargs=-1, type=click.Path())
@click.argument('dest', required=True, nargs=1, type=click.Path()) @click.argument('collection', required=True, nargs=1, type=click.Path())
def _compare(**kwargs): def _compare(**kwargs):
""" """
Sort similar images in directories Sort similar images in directories
@ -482,18 +473,11 @@ def _compare(**kwargs):
dry_run = kwargs['dry_run'] dry_run = kwargs['dry_run']
log_level = log.level(kwargs['verbose'], kwargs['debug']) log_level = log.level(kwargs['verbose'], kwargs['debug'])
logger = log.get_logger(level=log_level)
subdirs = kwargs['subdirs'] subdirs = kwargs['subdirs']
root = kwargs['dest'] root = kwargs['collection']
paths, root = _get_paths(subdirs, root) paths, root = _get_subpaths(subdirs, root)
paths = os.path.join(root, subdirs)
path = kwargs['path']
root = kwargs['root']
logger = log.get_logger(level=log_level)
if not root:
root = kwargs['path']
config = get_collection_config(root) config = get_collection_config(root)
opt = config.get_options() opt = config.get_options()
@ -511,9 +495,6 @@ def _compare(**kwargs):
) )
for path in paths: for path in paths:
if kwargs['revert_compare']:
collection.revert_compare(path)
else:
collection.sort_similar_images(path, kwargs['similarity']) collection.sort_similar_images(path, kwargs['similarity'])
summary = collection.summary summary = collection.summary
@ -537,7 +518,3 @@ main.add_command(_init)
main.add_command(_import) main.add_command(_import)
main.add_command(_sort) main.add_command(_sort)
main.add_command(_update) main.add_command(_update)
# if __name__ == '__main__':
# main()

View File

@ -35,7 +35,7 @@ class FPath:
def get_items(self): def get_items(self):
return { return {
'album': '{album}', 'album': '{album}',
'basename': '{basename}', 'stem': '{stem}',
'camera_make': '{camera_make}', 'camera_make': '{camera_make}',
'camera_model': '{camera_model}', 'camera_model': '{camera_model}',
'city': '{city}', 'city': '{city}',
@ -123,15 +123,16 @@ class FPath:
# Each item has its own custom logic and we evaluate a single item and return # Each item has its own custom logic and we evaluate a single item and return
# the evaluated string. # the evaluated string.
part = '' part = ''
basename = os.path.splitext(metadata['filename'])[0] filename = metadata['filename']
if item == 'basename': stem = os.path.splitext(filename)[0]
part = basename if item == 'stem':
part = stem
elif item == 'ext': elif item == 'ext':
part = os.path.splitext(metadata['filename'])[1][1:] part = os.path.splitext(filename)[1][1:]
elif item == 'name': elif item == 'name':
# Remove date prefix added to the name. # Remove date prefix added to the name.
part = basename part = stem
for i, rx in utils.get_date_regex(basename): for i, rx in utils.get_date_regex(stem):
part = re.sub(rx, '', part) part = re.sub(rx, '', part)
elif item == 'date': elif item == 'date':
date = metadata['date_media'] date = metadata['date_media']
@ -173,11 +174,11 @@ class FPath:
u_regex = '%u' + regex u_regex = '%u' + regex
l_regex = '%l' + regex l_regex = '%l' + regex
if re.search(u_regex, this_part): if re.search(u_regex, this_part):
this_part = re.sub(u_regex, part.upper(), this_part) return re.sub(u_regex, part.upper(), this_part)
elif re.search(l_regex, this_part): if re.search(l_regex, this_part):
this_part = re.sub(l_regex, part.lower(), this_part) return re.sub(l_regex, part.lower(), this_part)
else:
this_part = re.sub(regex, part, this_part) return re.sub(regex, part, this_part)
def get_path_part(self, this_part, metadata): def get_path_part(self, this_part, metadata):
"""Build path part """Build path part
@ -194,7 +195,7 @@ class FPath:
regex = '[-_ .]?(%[ul])?' + regex regex = '[-_ .]?(%[ul])?' + regex
this_part = re.sub(regex, part, this_part) this_part = re.sub(regex, part, this_part)
else: else:
self._set_case(regex, part, this_part) this_part = self._set_case(regex, part, this_part)
# Delete separator char at the begining of the string if any: # Delete separator char at the begining of the string if any:
if this_part: if this_part:
@ -341,7 +342,7 @@ class Collection:
media.set_album_from_folder() media.set_album_from_folder()
updated = True updated = True
if media.metadata['original_name'] in (False, ''): if media.metadata['original_name'] in (False, ''):
media.set_value('original_name', self.filename) media.set_value('original_name', media.metadata['filename'])
updated = True updated = True
if self.album_from_folder: if self.album_from_folder:
album = media.metadata['album'] album = media.metadata['album']
@ -633,90 +634,58 @@ class Collection:
else: else:
return 0 return 0
def dedup_regex(self, path, dedup_regex, remove_duplicates=False): def _sort_medias(self, files_data, import_mode=None, remove_duplicates=False):
# cycle throught files """
result = False sort files and solve conflicts
path = self._check_path(path) """
# Delimiter regex # Create directories
delim = r'[-_ .]' for media, relpath in files_data:
# Numeric date item regex dest_directory = self.root / relpath.parent
d = r'\d{2}' self._create_directory(dest_directory, media)
# Numeric date regex
if len(dedup_regex) == 0:
date_num2 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])'
)
date_num3 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])'
)
default = re.compile(r'([^-_ .]+[-_ .])')
dedup_regex = [date_num3, date_num2, default]
conflicts = [] conflicts = []
self.src_list = [ for media, relpath in files_data:
x src_path = media.file_path
for x in self._get_files_in_path( dest_path = self.root / relpath
path, glob=self.glob,
extensions=self.filter_by_ext,
)
]
for src_path in self.src_list:
# TODO to test it
media = Media(src_path, path, logger=self.logger)
path_parts = src_path.relative_to(self.root).parts
dedup_path = []
for path_part in path_parts:
items = []
items = self._split_part(dedup_regex.copy(), path_part, items)
filtered_items = [] conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
for item in items:
if item not in filtered_items:
filtered_items.append(item)
dedup_path.append(''.join(filtered_items))
# Dedup path
dest_path = self.root.joinpath(*dedup_path)
self._create_directory(dest_path.parent.name, media)
conflicts = self.check_conflicts(src_path, dest_path, remove_duplicates)
result = False
if not conflict: if not conflict:
record = self._record_file(src_path, dest_path, media) self.sort_file(
src_path, dest_path, media, import_mode=import_mode
)
elif conflict == 1: elif conflict == 1:
# There is conflict and file are different # There is conflict and file are different
conflicts.append((src_path, dest_path, media)) conflicts.append((src_path, dest_path, media))
elif conflict in (2, 3): elif conflict == 3:
result = True # Same file checksum
if import_mode == 'move':
if result: self._remove(src_path)
# result is true or None self.dest_list.append(dest_path)
elif conflict == 2:
# File already sorted
self.dest_list.append(dest_path) self.dest_list.append(dest_path)
if conflicts != []: if conflicts != []:
files_data, conflict = self._solve_conflicts(conflicts, remove_duplicates) for files_data, conflict in self._solve_conflicts(conflicts,
src_path, dest_path, media = file_data remove_duplicates):
src_path, dest_path, media = files_data
result = False
if not conflict: if not conflict:
self._record_file(src_path, dest_path, media) self.sort_file(
src_path, dest_path, media, import_mode=import_mode
)
elif conflict == 1: elif conflict == 1:
# There is unresolved conflict # There is unresolved conflict
self.summary.append((src_path, False)) self.summary.append((src_path, False))
elif conflict in (2, 3): elif conflict == 3:
result = True # Same file checksum
if import_mode == 'move':
if result: self._remove(src_path)
# result is true or None self.dest_list.append(dest_path)
elif conflict == 2:
# File already sorted
self.dest_list.append(dest_path) self.dest_list.append(dest_path)
if not self._check_processed():
self.summary.append((None, False))
return self.summary
def _modify_selection(self): def _modify_selection(self):
""" """
@ -764,11 +733,67 @@ class Collection:
# Finally check if are files are successfully processed # Finally check if are files are successfully processed
n_fail = len(self.src_list) - len(self.dest_list) n_fail = len(self.src_list) - len(self.dest_list)
if n_fail != 0: if n_fail != 0:
self.logger.error("{n_fail} files have not be processed") self.logger.error(f"{n_fail} files have not be processed")
return False return False
return self.check_db() return self.check_db()
def _get_medias_data(
self, src_dirs, import_mode=None, ignore_tags=set(), loc=None
):
"""Get medias data"""
src_dir_in_collection = False
for src_dir in src_dirs:
self.dest_list = []
src_dir = self._check_path(src_dir)
self.src_list = self._get_path_list(src_dir)
# Get medias and src_dirs
for src_path in self.src_list:
if self.root in src_path.parents:
src_dir_in_collection = True
else:
if not import_mode:
self.logger.error(f"""{src_path} not in {self.root}
collection, use `ordigi import`""")
sys.exit(1)
# Get file metadata
media = Media(
src_path,
src_dir,
self.album_from_folder,
ignore_tags,
self.interactive,
self.logger,
self.use_date_filename,
self.use_file_dates,
)
media.get_metadata(self.root, loc, self.db, self.cache)
yield media, src_dir_in_collection
def _init_check_db(self, loc=None, ignore_tags=set()):
if self.db.is_empty('metadata'):
self.init(loc, ignore_tags)
elif not self.check_db():
self.logger.error('Db data is not accurate run `ordigi update`')
sys.exit(1)
def _get_path_list(self, path):
src_list = [
x
for x in self._get_files_in_path(
path, glob=self.glob,
extensions=self.filter_by_ext,
)
]
if self.interactive:
src_list = self._modify_selection()
print('Processing...')
return src_list
def get_medias(self, loc, ignore_tags=set()): def get_medias(self, loc, ignore_tags=set()):
for file_path in self._get_all_files(): for file_path in self._get_all_files():
media = Media( media = Media(
@ -790,25 +815,6 @@ class Collection:
return self.summary return self.summary
def _init_check_db(self, loc=None, ignore_tags=set()):
if self.db.is_empty('metadata'):
self.init(loc, ignore_tags)
elif not self.check_db():
self.logger.error('Db data is not accurate run `ordigi update`')
sys.exit(1)
def check_files(self):
for file_path in self._get_all_files():
checksum = utils.checksum(file_path)
relpath = file_path.relative_to(self.root)
if checksum == self.db.get_checksum(relpath):
self.summary.append((file_path, 'check'))
else:
self.logger.error('{file_path} is corrupted')
self.summary.append((file_path, False))
return self.summary
def update(self, loc, ignore_tags=set()): def update(self, loc, ignore_tags=set()):
file_paths = [x for x in self._get_all_files()] file_paths = [x for x in self._get_all_files()]
db_rows = [row for row in self.db.get_rows('metadata')] db_rows = [row for row in self.db.get_rows('metadata')]
@ -857,10 +863,23 @@ class Collection:
return self.summary return self.summary
def check_files(self):
for file_path in self._get_all_files():
checksum = utils.checksum(file_path)
relpath = file_path.relative_to(self.root)
if checksum == self.db.get_checksum(relpath):
self.summary.append((file_path, 'check'))
else:
self.logger.error('{file_path} is corrupted')
self.summary.append((file_path, False))
return self.summary
def remove_empty_subdirs(self, directories): def remove_empty_subdirs(self, directories):
parents = set() parents = set()
for directory in directories: for directory in directories:
# if folder empty, delete it # if folder empty, delete it
if directory.is_dir():
files = os.listdir(directory) files = os.listdir(directory)
if len(files) == 0: if len(files) == 0:
if not self.dry_run: if not self.dry_run:
@ -872,20 +891,6 @@ class Collection:
if parents != set(): if parents != set():
self.remove_empty_subdirs(parents) self.remove_empty_subdirs(parents)
def _get_path_list(self, path):
src_list = [
x
for x in self._get_files_in_path(
path, glob=self.glob,
extensions=self.filter_by_ext,
)
]
if self.interactive:
src_list = self._modify_selection()
print('Processing...')
return src_list
def sort_file(self, src_path, dest_path, media, import_mode=False): def sort_file(self, src_path, dest_path, media, import_mode=False):
if import_mode == 'copy': if import_mode == 'copy':
self._copy(src_path, dest_path) self._copy(src_path, dest_path)
@ -909,96 +914,33 @@ class Collection:
return self.summary return self.summary
def sort_files(self, src_dirs, path_format, loc, import_mode=False, remove_duplicates=False, ignore_tags=set()): def sort_files(
self, src_dirs, path_format, loc,
import_mode=False, remove_duplicates=False, ignore_tags=set()
):
""" """
Sort files into appropriate folder Sort files into appropriate folder
""" """
# Check db # Check db
self._init_check_db(loc, ignore_tags) self._init_check_db(loc, ignore_tags)
# Get medias data
files_data = [] files_data = []
src_dirs_in_collection = set() src_dirs_in_collection = set()
for src_dir in src_dirs: for media, src_dir_in_collection in self._get_medias_data(
self.dest_list = [] src_dirs,
src_dir = self._check_path(src_dir) import_mode=import_mode, ignore_tags=ignore_tags, loc=loc,
conflicts = [] ):
self.src_list = self._get_path_list(src_dir)
# Get medias and src_dirs
for src_path in self.src_list:
if self.root in src_path.parents:
src_dirs_in_collection.add(src_path.parent)
else:
if not import_mode:
self.logger.error(f"""{src_path} not in {self.root}
collection, use `ordigi import`""")
sys.exit(1)
# Get file metadata
media = Media(
src_path,
src_dir,
self.album_from_folder,
ignore_tags,
self.interactive,
self.logger,
self.use_date_filename,
self.use_file_dates,
)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
# Get the destination path according to metadata # Get the destination path according to metadata
fpath = FPath(path_format, self.day_begins, self.logger) fpath = FPath(path_format, self.day_begins, self.logger)
relpath = Path(fpath.get_path(metadata)) relpath = Path(fpath.get_path(media.metadata))
if src_dir_in_collection:
src_dirs_in_collection.add(media.file_path.parent)
files_data.append((copy(media), relpath)) files_data.append((copy(media), relpath))
# Create directories # Sort files and solve conflicts
for media, relpath in files_data: self._sort_medias(files_data, import_mode, remove_duplicates)
dest_directory = self.root / relpath.parent
self._create_directory(dest_directory, media)
# sort files and solve conflicts
for media, relpath in files_data:
src_path = media.file_path
dest_path = self.root / relpath
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
if not conflict:
self.sort_file(
src_path, dest_path, media, import_mode=import_mode
)
elif conflict == 1:
# There is conflict and file are different
conflicts.append((src_path, dest_path, media))
elif conflict == 3:
# Same file checksum
if import_mode == 'move':
self._remove(src_path)
self.dest_list.append(dest_path)
elif conflict == 2:
# File already sorted
self.dest_list.append(dest_path)
if conflicts != []:
files_data, conflict = self._solve_conflicts(conflicts, remove_duplicates)
src_path, dest_path, media = file_data
if not conflict:
self.sort_file(
src_path, dest_path, media, import_mode=import_mode
)
elif conflict == 1:
# There is unresolved conflict
self.summary.append((src_path, False))
elif conflict == 3:
# Same file checksum
if import_mode == 'move':
self._remove(src_path)
self.dest_list.append(dest_path)
elif conflict == 2:
# File already sorted
self.dest_list.append(dest_path)
self.remove_empty_subdirs(src_dirs_in_collection) self.remove_empty_subdirs(src_dirs_in_collection)
@ -1007,6 +949,56 @@ class Collection:
return self.summary return self.summary
def dedup_regex(self, paths, dedup_regex, remove_duplicates=False):
"""Deduplicate file path parts"""
# Check db
self._init_check_db()
# Delimiter regex
delim = r'[-_ .]'
# Numeric date item regex
d = r'\d{2}'
# Numeric date regex
if len(dedup_regex) == 0:
date_num2 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])'
)
date_num3 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])'
)
default = re.compile(r'([^-_ .]+[-_ .])')
dedup_regex = [date_num3, date_num2, default]
# Get medias data
files_data = []
for media, _ in self._get_medias_data(paths):
# Deduplicate the path
src_path = media.file_path
path_parts = src_path.relative_to(self.root).parts
dedup_path = []
for path_part in path_parts:
items = []
items = self._split_part(dedup_regex.copy(), path_part, items)
filtered_items = []
for item in items:
if item not in filtered_items:
filtered_items.append(item)
dedup_path.append(''.join(filtered_items))
relpath = Path(*dedup_path)
files_data.append((copy(media), relpath))
# Sort files and solve conflicts
self._sort_medias(files_data, remove_duplicates=remove_duplicates)
if not self._check_processed():
self.summary.append((None, False))
return self.summary
def remove_empty_folders(self, directory, remove_root=True): def remove_empty_folders(self, directory, remove_root=True):
'Function to remove empty folders' 'Function to remove empty folders'
if not os.path.isdir(directory): if not os.path.isdir(directory):
@ -1049,106 +1041,61 @@ class Collection:
if image.is_image(): if image.is_image():
yield image yield image
def sort_similar_images(self, path, similarity=80): def _get_media_data(self, img_path, path):
# Check db
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
path = self._check_path(path)
images = set(x for x in self._get_images(path))
i = Images(images, logger=self.logger)
nb_row_ini = self.db.len('metadata')
for image in images:
if not image.img_path.is_file():
continue
media_ref = Media(image.img_path, path, self.logger)
# Todo: compare metadata?
metadata = media_ref.get_metadata(self.root, db=self.db, cache=self.cache)
similar = False
moved_imgs = set()
for img_path in i.find_similar(image, similarity):
similar = True
media = Media(img_path, path, self.logger) media = Media(img_path, path, self.logger)
metadata = media.get_metadata(self.root, db=self.db, cache=self.cache) media.get_metadata(self.root, db=self.db, cache=self.cache)
# move image into directory
name = img_path.stem
directory_name = 'similar_to_' + name
dest_directory = img_path.parent / directory_name
dest_path = dest_directory / img_path.name
dest_directory.mkdir(exist_ok=True)
# Move the simlars file into the destination directory return media
self._move(img_path, dest_path)
moved_imgs.add(img_path)
if self._record_file(img_path, dest_path, media):
self.summary.append((img_path, 'sort'))
else:
self.summary.append((img_path, False))
if similar: def _find_similar_images(self, image, images, path, dest_dir, similarity=80):
img_path = image.img_path files_data = []
dest_path = dest_directory / img_path.name if not image.img_path.is_file():
self._move(img_path, dest_path) return files_data
moved_imgs.add(img_path)
if self._record_file(img_path, dest_path, media_ref):
self.summary.append((img_path, 'sort'))
else:
self.summary.append((img_path, False))
nb_row_end = self.db.len('metadata') name = image.img_path.stem
if nb_row_ini and nb_row_ini != nb_row_end: directory_name = dest_dir / name.replace('.', '_')
self.logger.error('Nb of row have changed unexpectedly')
if result: for img_path in images.find_similar(image, similarity):
result = self.check_db() self.src_list.append(img_path)
self.summary.append((None, False))
return self.summary media = self._get_media_data(img_path, path)
relpath = directory_name / img_path.name
def revert_compare(self, path): files_data.append((copy(media), relpath))
if not self.check_db(): if files_data:
self.logger.error('Db data is not accurate run `ordigi init`') # Found similar images to image
sys.exit(1) self.src_list.append(image.img_path)
media = self._get_media_data(image.img_path, path)
relpath = directory_name / image.img_path.name
files_data.insert(0, (copy(media), relpath))
return files_data
def sort_similar_images(self, path, similarity=80, remove_duplicates=False):
"""Sort similar images using imagehash library"""
# Check db
self._init_check_db()
dest_dir = self.root / 'similar_images'
path = self._check_path(path) path = self._check_path(path)
dirnames = set()
moved_files = set() images_paths = set(x for x in self._get_images(path))
images = Images(images_paths, logger=self.logger)
nb_row_ini = self.db.len('metadata') nb_row_ini = self.db.len('metadata')
for src_path in self._get_files_in_path( for image in images_paths:
path, glob=self.glob, files_data = self._find_similar_images(
extensions=self.filter_by_ext, image, images, path, dest_dir, similarity
): )
dirname = src_path.parent.name if files_data:
if dirname.find('similar_to') == 0: # Move the simlars file into the destination directory
dirnames.add(src_path.parent) self._sort_medias(files_data, remove_duplicates=remove_duplicates)
# move file to initial folder and update metadata
media = Media(src_path, path, self.logger)
metadata = media.get_metadata(self.root, db=self.db, cache=self.cache)
dest_path = Path(src_path.parent.parent, src_path.name)
self._move(src_path, dest_path)
moved_files.add(src_path)
if self._record_file(src_path, dest_path, media):
self.summary.append((src_path, 'sort'))
else:
self.summary.append((src_path, False))
for dirname in dirnames:
# remove 'similar_to*' directories
try:
dirname.rmdir()
except OSError as error:
self.logger.error(error)
nb_row_end = self.db.len('metadata') nb_row_end = self.db.len('metadata')
if nb_row_ini and nb_row_ini != nb_row_end: if nb_row_ini and nb_row_ini != nb_row_end:
self.logger.error('Nb of row have changed unexpectedly') self.logger.error('Nb of row have changed unexpectedly')
if result: if not self._check_processed():
result = self.check_db()
self.summary.append((None, False)) self.summary.append((None, False))
return self.summary return self.summary
@ -1218,4 +1165,3 @@ class Collection:
self.summary.append((file_path, 'update')) self.summary.append((file_path, 'update'))
return self.summary return self.summary

View File

@ -233,12 +233,13 @@ class Media:
if self.metadata is None: if self.metadata is None:
return None return None
basename = os.path.splitext(self.metadata['filename'])[0] filename = self.metadata['filename']
stem = os.path.splitext(filename)[0]
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
if self.metadata['original_name']: if self.metadata['original_name']:
date_filename = self.get_date_format(self.metadata['original_name']) date_filename = self.get_date_format(self.metadata['original_name'])
else: else:
date_filename = self.get_date_format(basename) date_filename = self.get_date_format(stem)
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
date_created = self.metadata['date_created'] date_created = self.metadata['date_created']
@ -246,7 +247,7 @@ class Media:
if self.metadata['date_original']: if self.metadata['date_original']:
if date_filename and date_filename != date_original: if date_filename and date_filename != date_original:
self.logger.warning( self.logger.warning(
f"{basename} time mark is different from {date_original}" f"{filename} time mark is different from {date_original}"
) )
if self.interactive: if self.interactive:
# Ask for keep date taken, filename time, or neither # Ask for keep date taken, filename time, or neither
@ -268,7 +269,7 @@ class Media:
) )
if date_created and date_filename > date_created: if date_created and date_filename > date_created:
self.logger.warning( self.logger.warning(
f"{basename} time mark is more recent than {date_created}" f"{filename} time mark is more recent than {date_created}"
) )
if self.interactive: if self.interactive:
choices = [ choices = [
@ -335,6 +336,7 @@ class Media:
else: else:
return answers['album'] return answers['album']
# TODO use methods _get_metadata_from_db and _get_metadata_from_exif
def get_metadata(self, root, loc=None, db=None, cache=False): def get_metadata(self, root, loc=None, db=None, cache=False):
"""Get a dictionary of metadata from exif. """Get a dictionary of metadata from exif.
All keys will be present and have a value of None if not obtained. All keys will be present and have a value of None if not obtained.

View File

@ -20,7 +20,7 @@ def reset_singletons():
_ExifToolProc.instance = None _ExifToolProc.instance = None
@pytest.fixture(scope="session") @pytest.fixture(scope="module")
def sample_files_paths(tmpdir_factory): def sample_files_paths(tmpdir_factory):
tmp_path = Path(tmpdir_factory.mktemp("ordigi-src-")) tmp_path = Path(tmpdir_factory.mktemp("ordigi-src-"))
path = Path(ORDIGI_PATH, 'samples/test_exif') path = Path(ORDIGI_PATH, 'samples/test_exif')

View File

@ -1,13 +1,23 @@
from imp import load_source import shutil
from pathlib import Path
from click.testing import CliRunner from click.testing import CliRunner
from pathlib import Path
import pytest import pytest
import sys
from ordigi import cli
CONTENT = "content" CONTENT = "content"
import ipdb; ipdb.set_trace() ORDIGI_PATH = Path(__file__).parent.parent
ordigi = load_source('cli', str(Path(__file__).parent.parent) + 'cli.py')
def get_arg_options_list(arg_options):
arg_options_list = []
for opt, arg in arg_options:
arg_options_list.append(opt)
arg_options_list.append(arg)
return arg_options_list
class TestOrdigi: class TestOrdigi:
@ -15,10 +25,168 @@ class TestOrdigi:
def setup_class(cls, sample_files_paths): def setup_class(cls, sample_files_paths):
cls.runner = CliRunner() cls.runner = CliRunner()
cls.src_path, cls.file_paths = sample_files_paths cls.src_path, cls.file_paths = sample_files_paths
cls.logger_options = (
'--debug',
'--verbose',
)
cls.filter_options = (
('--exclude', '.DS_Store'),
('--filter-by-ext', 'jpg'),
('--glob', '*'),
)
cls.sort_options = (
'--album-from-folder',
'--ignore-tags',
'--path-format',
'--remove-duplicates',
'--use-date-filename',
'--use-file-dates',
)
def test__sort(self): def assert_cli(self, command, paths):
import ipdb; ipdb.set_trace() result = self.runner.invoke(command, [*paths])
result = self.runner.invoke(cli._sort, [str(self.src_path)]) assert result.exit_code == 0
def assert_options(self, command, bool_options, arg_options, paths):
for bool_option in bool_options:
self.assert_cli(command, [bool_option, *paths])
for opt, arg in arg_options:
self.assert_cli(command, [opt, arg, *paths])
def assert_all_options(self, command, bool_options, arg_options, paths):
arg_options_list = get_arg_options_list(arg_options)
self.assert_cli(command, [
*bool_options, *arg_options_list, *paths,
])
def test_sort(self):
bool_options = (
*self.logger_options,
# '--interactive',
'--dry-run',
'--album-from-folder',
'--remove-duplicates',
'--use-date-filename',
'--use-file-dates',
'--clean',
)
arg_options = (
*self.filter_options,
('--ignore-tags', 'CreateDate'),
('--path-format', '{%Y}/{folder}/{name}.{ext}'),
)
paths = (str(self.src_path),)
self.assert_cli(cli._sort, paths)
self.assert_options(cli._sort, bool_options, arg_options, paths)
self.assert_all_options(cli._sort, bool_options, arg_options, paths)
def assert_init(self):
for bool_option in self.logger_options:
result = self.runner.invoke(
cli._init, [bool_option, str(self.src_path
)])
assert result.exit_code == 0, bool_option
def assert_update(self):
file_path = Path(ORDIGI_PATH, 'samples/test_exif/photo.cr2')
dest_path = self.src_path / 'photo_moved.cr2'
shutil.copyfile(file_path, dest_path)
for bool_option in self.logger_options:
result = self.runner.invoke(
cli._update, [bool_option, str(self.src_path
)])
assert result.exit_code == 0, bool_option
def assert_check(self):
for bool_option in self.logger_options:
result = self.runner.invoke(
cli._check, [bool_option, str(self.src_path
)])
assert result.exit_code == 0, bool_option
def assert_clean(self):
bool_options = (
*self.logger_options,
# '--interactive',
'--dry-run',
'--delete-excluded',
'--folders',
'--path-string',
'--remove-duplicates',
)
arg_options = (
*self.filter_options,
('--dedup-regex', r'\d{4}-\d{2}'),
)
paths = ('test_exif', str(self.src_path))
self.assert_cli(cli._clean, paths)
paths = (str(self.src_path),)
self.assert_cli(cli._clean, paths)
self.assert_options(cli._clean, bool_options, arg_options, paths)
self.assert_all_options(cli._clean, bool_options, arg_options, paths)
def test_init_update_check_clean(self):
self.assert_init()
self.assert_update()
self.assert_check()
self.assert_clean()
def test_import(self, tmp_path):
bool_options = (
*self.logger_options,
# '--interactive',
'--dry-run',
'--album-from-folder',
'--remove-duplicates',
'--use-date-filename',
'--use-file-dates',
'--copy',
)
arg_options = (
*self.filter_options,
('--ignore-tags', 'CreateDate'),
('--path-format', '{%Y}/{folder}/{stem}.{ext}'),
)
paths = (str(self.src_path), str(tmp_path))
result = self.runner.invoke(cli._import, ['--copy', *paths])
assert result.exit_code == 0
self.assert_options(cli._import, bool_options, arg_options, paths)
self.assert_all_options(cli._import, bool_options, arg_options, paths)
def test_compare(self):
bool_options = (
*self.logger_options,
# '--interactive',
'--dry-run',
'--find-duplicates',
'--remove-duplicates',
)
arg_options = (
*self.filter_options,
# ('--similar-to', ''),
('--similarity', '65'),
)
paths = (str(self.src_path),)
self.assert_cli(cli._compare, paths)
self.assert_options(cli._compare, bool_options, arg_options, paths)
def test_needsfiles(tmpdir): def test_needsfiles(tmpdir):

View File

@ -239,11 +239,6 @@ class TestCollection:
# Summary is created and there is no errors # Summary is created and there is no errors
assert not summary.errors assert not summary.errors
summary = collection.revert_compare(path)
# Summary is created and there is no errors
assert not summary.errors
@pytest.mark.skip() @pytest.mark.skip()
def test_fill_data(self, tmp_path, monkeypatch): def test_fill_data(self, tmp_path, monkeypatch):
path = tmp_path / 'collection' path = tmp_path / 'collection'