From 513adb28904a1747525ece9f5b864c25adc42736 Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Sun, 17 Oct 2021 17:55:13 +0200 Subject: [PATCH] Add path format and fix empty dir and exclude files --- ordigi.py | 44 ++++++++++++------------ ordigi/collection.py | 74 ++++++++++++++++++++++++++++++++-------- ordigi/config.py | 12 ++++--- tests/test_collection.py | 4 +-- 4 files changed, 91 insertions(+), 43 deletions(-) diff --git a/ordigi.py b/ordigi.py index 625dceb..090e31d 100755 --- a/ordigi.py +++ b/ordigi.py @@ -11,8 +11,6 @@ from ordigi import constants from ordigi import log from ordigi.collection import Collection from ordigi.geolocation import GeoLocation -# from ordigi.media import Media, get_all_subclasses -# from ordigi.summary import Summary _logger_options = [ @@ -120,10 +118,10 @@ def _get_exclude(opt, exclude): '--interactive', '-i', default=False, is_flag=True, help="Interactive mode" ) @click.option( - '--max-deep', - '-m', + '--path-format', + '-p', default=None, - help='Maximum level to proceed. Number from 0 to desired level.', + help='set custom featured path format', ) @click.option( '--remove-duplicates', @@ -172,10 +170,6 @@ def sort(**kwargs): logger = log.get_logger(level=log_level) - max_deep = kwargs['max_deep'] - if max_deep is not None: - max_deep = int(max_deep) - cache = True if kwargs['reset_cache']: cache = False @@ -197,12 +191,16 @@ def sort(**kwargs): config = Config(constants.CONFIG_FILE) opt = config.get_options() + path_format = opt['path_format'] + if kwargs['path_format']: + path_format = kwargs['path_format'] + exclude = _get_exclude(opt, kwargs['exclude']) filter_by_ext = set(kwargs['filter_by_ext']) collection = Collection( destination, - opt['path_format'], + path_format, kwargs['album_from_folder'], cache, opt['day_begins'], @@ -212,7 +210,7 @@ def sort(**kwargs): kwargs['glob'], kwargs['interactive'], logger, - max_deep, + opt['max_deep'], mode, kwargs['use_date_filename'], kwargs['use_file_dates'], @@ -246,13 +244,10 @@ def sort(**kwargs): help='Regex to match duplicate strings parts', ) @click.option( - '--folders', '-f', default=False, is_flag=True, help='Remove empty folders' + '--delete-excluded', '-d', default=False, is_flag=True, help='Remove excluded files' ) @click.option( - '--max-deep', - '-m', - default=None, - help='Maximum level to proceed. Number from 0 to desired level.', + '--folders', '-f', default=False, is_flag=True, help='Remove empty folders' ) @click.option( '--path-string', '-p', default=False, is_flag=True, help='Deduplicate path string' @@ -276,6 +271,8 @@ def clean(**kwargs): """Remove empty folders Usage: clean [--verbose|--debug] directory [removeRoot]""" + import ipdb; ipdb.set_trace() + result = True dry_run = kwargs['dry_run'] folders = kwargs['folders'] log_level = log.level(kwargs['verbose'], kwargs['debug']) @@ -304,7 +301,7 @@ def clean(**kwargs): filter_by_ext=filter_by_ext, glob=kwargs['glob'], logger=logger, - max_deep=kwargs['max_deep'], + max_deep=opt['max_deep'], mode='move', ) @@ -317,6 +314,9 @@ def clean(**kwargs): if clean_all or folders: collection.remove_empty_folders(path) + if kwargs['delete_excluded']: + collection.remove_excluded_files() + if log_level < 30: summary.print() @@ -335,7 +335,7 @@ def init(**kwargs): logger = log.get_logger(level=log_level) loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout']) - collection = Collection(kwargs['path'], None, mode='move', logger=logger) + collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger) summary = collection.init(loc) if log_level < 30: @@ -353,7 +353,7 @@ def update(**kwargs): logger = log.get_logger(level=log_level) loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout']) - collection = Collection(kwargs['path'], None, mode='move', logger=logger) + collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger) summary = collection.update(loc) if log_level < 30: @@ -367,7 +367,9 @@ def check(**kwargs): """check db and verify hashes""" log_level = log.level(kwargs['verbose'], kwargs['debug']) logger = log.get_logger(level=log_level) - collection = Collection(kwargs['path'], None, mode='move', logger=logger) + config = Config(constants.CONFIG_FILE) + opt = config.get_options() + collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger) result = collection.check_db() if result: summary, result = collection.check_files() @@ -376,7 +378,7 @@ def check(**kwargs): if not result: sys.exit(1) else: - self.logger.error('Db data is not accurate run `ordigi init`') + logger.error('Db data is not accurate run `ordigi update`') sys.exit(1) diff --git a/ordigi/collection.py b/ordigi/collection.py index e252bcf..0c812e9 100644 --- a/ordigi/collection.py +++ b/ordigi/collection.py @@ -377,7 +377,7 @@ class Collection: self._add_db_data(media.metadata) if self.mode == 'move': # Delete file path entry in db when file is moved inside collection - if str(self.root) in str(src_path): + if self.root in src_path.parents: self.db.delete_filepath(str(src_path.relative_to(self.root))) self.summary.append((src_path, dest_path)) @@ -394,6 +394,20 @@ class Collection: os.remove(file_path) self.logger.info(f'remove: {file_path}') + def remove_excluded_files(self): + result = True + for file_path in self.root.glob(self.glob): + if file_path.is_dir(): + continue + else: + if self.root / '.ordigi' in file_path.parents: + continue + + for exclude in self.exclude: + if fnmatch(file_path, exclude): + self.remove(file_path) + break + def sort_file(self, src_path, dest_path, remove_duplicates=False): ''' Copy or move file to dest_path. @@ -526,7 +540,7 @@ class Collection: """ return len(path.parts) - 1 - def _get_files_in_path(self, path, glob='**/*', maxlevel=None, extensions=set()): + def _get_files_in_path(self, path, glob='**/*', extensions=set()): """Recursively get files which match a path and extension. :param str path string: Path to start recursive file listing @@ -545,12 +559,11 @@ class Collection: else: level = len(subdirs.parts) - if subdirs.parts != (): - if subdirs.parts[0] == '.ordigi': - continue + if self.root / '.ordigi' in file_path.parents: + continue - if maxlevel is not None: - if level > maxlevel: + if self.max_deep is not None: + if level > self.max_deep: continue matched = False @@ -558,7 +571,6 @@ class Collection: if fnmatch(file_path, exclude): matched = True break - if matched: continue @@ -647,7 +659,13 @@ class Collection: dedup_regex = [date_num3, date_num2, default] conflict_file_list = [] - self.src_list = [x for x in self._get_files_in_path(path, glob=self.glob)] + self.src_list = [ + x + for x in self._get_files_in_path( + path, glob=self.glob, + extensions=self.filter_by_ext, + ) + ] for src_path in self.src_list: # TODO to test it media = Media(src_path, path, logger=self.logger) @@ -819,17 +837,35 @@ class Collection: return self.summary + def remove_empty_subdirs(self, directories): + parents = set() + for directory in directories: + # if folder empty, delete it + files = os.listdir(directory) + if len(files) == 0: + self.logger.info(f"Removing empty folder: {directory}") + directory.rmdir() + + if self.root in directory.parent.parents: + parents.add(directory.parent) + + if parents != set(): + self.remove_empty_subdirs(parents) + def sort_files(self, paths, loc, remove_duplicates=False, ignore_tags=set()): """ Sort files into appropriate folder """ # Check db - if not self.check_db(): - self.logger.error('Db data is not accurate run `ordigi init`') + if [x for x in self.db.get_rows('metadata')] == []: + self.init(loc, ignore_tags) + elif not self.check_db(): + self.logger.error('Db data is not accurate run `ordigi update`') sys.exit(1) result = False files_data = [] + src_dirs_in_collection = set() for path in paths: self.dest_list = [] path = self._check_path(path) @@ -837,7 +873,8 @@ class Collection: self.src_list = [ x for x in self._get_files_in_path( - path, glob=self.glob, extensions=self.filter_by_ext + path, glob=self.glob, + extensions=self.filter_by_ext, ) ] if self.interactive: @@ -846,6 +883,9 @@ class Collection: # Get medias and paths for src_path in self.src_list: + # List all src_dirs in collection + if self.root in src_path.parents: + src_dirs_in_collection.add(src_path.parent) # Process files media = Media( src_path, @@ -892,12 +932,14 @@ class Collection: if conflict_file_list != []: record = self._solve_conflicts(conflict_file_list, remove_duplicates) + self.remove_empty_subdirs(src_dirs_in_collection) + if not self._check_processed(): record = False return self.summary, record - def remove_empty_folders(path, remove_root=True): + def remove_empty_folders(self, path, remove_root=True): 'Function to remove empty folders' if not os.path.isdir(path): return @@ -927,7 +969,8 @@ class Collection: :returns: iter """ for src_path in self._get_files_in_path( - path, glob=self.glob, extensions=self.filter_by_ext + path, glob=self.glob, + extensions=self.filter_by_ext, ): dirname = src_path.parent.name @@ -1012,7 +1055,8 @@ class Collection: moved_files = set() nb_row_ini = self.db.len('metadata') for src_path in self._get_files_in_path( - path, glob=self.glob, extensions=self.filter_by_ext + path, glob=self.glob, + extensions=self.filter_by_ext, ): dirname = src_path.parent.name if dirname.find('similar_to') == 0: diff --git a/ordigi/config.py b/ordigi/config.py index 0722d39..9096f51 100644 --- a/ordigi/config.py +++ b/ordigi/config.py @@ -79,11 +79,13 @@ class Config: options['path_format'] = self.get_path_definition() - if 'Path' in self.conf and 'day_begins' in self.conf['Path']: - config_directory = self.conf['Path'] - options['day_begins'] = int(config_directory['day_begins']) - else: - options['day_begins'] = 0 + options['day_begins'] = 0 + options['max_deep'] = None + if 'Path' in self.conf: + if 'day_begins' in self.conf['Path']: + options['day_begins'] = int(self.conf['Path']['day_begins']) + if 'max_deep' in self.conf['Path']: + options['max_deep'] = int(self.conf['Path']['max_deep']) if 'Exclusions' in self.conf: options['exclude'] = [value for key, value in self.conf.items('Exclusions')] diff --git a/tests/test_collection.py b/tests/test_collection.py index 8f6d104..02e88ea 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -201,10 +201,10 @@ class TestCollection: def test__get_files_in_path(self, tmp_path): collection = Collection(tmp_path, self.path_format, - exclude={'**/*.dng',}, + exclude={'**/*.dng',}, max_deep=1, use_date_filename=True, use_file_dates=True) paths = [x for x in collection._get_files_in_path(self.src_path, - maxlevel=1, glob='**/photo*')] + glob='**/photo*')] assert len(paths) == 6 for path in paths: assert isinstance(path, Path)