Add path format and fix empty dir and exclude files

This commit is contained in:
Cédric Leporcq 2021-10-17 17:55:13 +02:00
parent 5c255093e3
commit 513adb2890
4 changed files with 91 additions and 43 deletions

View File

@ -11,8 +11,6 @@ from ordigi import constants
from ordigi import log from ordigi import log
from ordigi.collection import Collection from ordigi.collection import Collection
from ordigi.geolocation import GeoLocation from ordigi.geolocation import GeoLocation
# from ordigi.media import Media, get_all_subclasses
# from ordigi.summary import Summary
_logger_options = [ _logger_options = [
@ -120,10 +118,10 @@ def _get_exclude(opt, exclude):
'--interactive', '-i', default=False, is_flag=True, help="Interactive mode" '--interactive', '-i', default=False, is_flag=True, help="Interactive mode"
) )
@click.option( @click.option(
'--max-deep', '--path-format',
'-m', '-p',
default=None, default=None,
help='Maximum level to proceed. Number from 0 to desired level.', help='set custom featured path format',
) )
@click.option( @click.option(
'--remove-duplicates', '--remove-duplicates',
@ -172,10 +170,6 @@ def sort(**kwargs):
logger = log.get_logger(level=log_level) logger = log.get_logger(level=log_level)
max_deep = kwargs['max_deep']
if max_deep is not None:
max_deep = int(max_deep)
cache = True cache = True
if kwargs['reset_cache']: if kwargs['reset_cache']:
cache = False cache = False
@ -197,12 +191,16 @@ def sort(**kwargs):
config = Config(constants.CONFIG_FILE) config = Config(constants.CONFIG_FILE)
opt = config.get_options() opt = config.get_options()
path_format = opt['path_format']
if kwargs['path_format']:
path_format = kwargs['path_format']
exclude = _get_exclude(opt, kwargs['exclude']) exclude = _get_exclude(opt, kwargs['exclude'])
filter_by_ext = set(kwargs['filter_by_ext']) filter_by_ext = set(kwargs['filter_by_ext'])
collection = Collection( collection = Collection(
destination, destination,
opt['path_format'], path_format,
kwargs['album_from_folder'], kwargs['album_from_folder'],
cache, cache,
opt['day_begins'], opt['day_begins'],
@ -212,7 +210,7 @@ def sort(**kwargs):
kwargs['glob'], kwargs['glob'],
kwargs['interactive'], kwargs['interactive'],
logger, logger,
max_deep, opt['max_deep'],
mode, mode,
kwargs['use_date_filename'], kwargs['use_date_filename'],
kwargs['use_file_dates'], kwargs['use_file_dates'],
@ -246,13 +244,10 @@ def sort(**kwargs):
help='Regex to match duplicate strings parts', help='Regex to match duplicate strings parts',
) )
@click.option( @click.option(
'--folders', '-f', default=False, is_flag=True, help='Remove empty folders' '--delete-excluded', '-d', default=False, is_flag=True, help='Remove excluded files'
) )
@click.option( @click.option(
'--max-deep', '--folders', '-f', default=False, is_flag=True, help='Remove empty folders'
'-m',
default=None,
help='Maximum level to proceed. Number from 0 to desired level.',
) )
@click.option( @click.option(
'--path-string', '-p', default=False, is_flag=True, help='Deduplicate path string' '--path-string', '-p', default=False, is_flag=True, help='Deduplicate path string'
@ -276,6 +271,8 @@ def clean(**kwargs):
"""Remove empty folders """Remove empty folders
Usage: clean [--verbose|--debug] directory [removeRoot]""" Usage: clean [--verbose|--debug] directory [removeRoot]"""
import ipdb; ipdb.set_trace()
result = True
dry_run = kwargs['dry_run'] dry_run = kwargs['dry_run']
folders = kwargs['folders'] folders = kwargs['folders']
log_level = log.level(kwargs['verbose'], kwargs['debug']) log_level = log.level(kwargs['verbose'], kwargs['debug'])
@ -304,7 +301,7 @@ def clean(**kwargs):
filter_by_ext=filter_by_ext, filter_by_ext=filter_by_ext,
glob=kwargs['glob'], glob=kwargs['glob'],
logger=logger, logger=logger,
max_deep=kwargs['max_deep'], max_deep=opt['max_deep'],
mode='move', mode='move',
) )
@ -317,6 +314,9 @@ def clean(**kwargs):
if clean_all or folders: if clean_all or folders:
collection.remove_empty_folders(path) collection.remove_empty_folders(path)
if kwargs['delete_excluded']:
collection.remove_excluded_files()
if log_level < 30: if log_level < 30:
summary.print() summary.print()
@ -335,7 +335,7 @@ def init(**kwargs):
logger = log.get_logger(level=log_level) logger = log.get_logger(level=log_level)
loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout']) loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout'])
collection = Collection(kwargs['path'], None, mode='move', logger=logger) collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger)
summary = collection.init(loc) summary = collection.init(loc)
if log_level < 30: if log_level < 30:
@ -353,7 +353,7 @@ def update(**kwargs):
logger = log.get_logger(level=log_level) logger = log.get_logger(level=log_level)
loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout']) loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout'])
collection = Collection(kwargs['path'], None, mode='move', logger=logger) collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger)
summary = collection.update(loc) summary = collection.update(loc)
if log_level < 30: if log_level < 30:
@ -367,7 +367,9 @@ def check(**kwargs):
"""check db and verify hashes""" """check db and verify hashes"""
log_level = log.level(kwargs['verbose'], kwargs['debug']) log_level = log.level(kwargs['verbose'], kwargs['debug'])
logger = log.get_logger(level=log_level) logger = log.get_logger(level=log_level)
collection = Collection(kwargs['path'], None, mode='move', logger=logger) config = Config(constants.CONFIG_FILE)
opt = config.get_options()
collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger)
result = collection.check_db() result = collection.check_db()
if result: if result:
summary, result = collection.check_files() summary, result = collection.check_files()
@ -376,7 +378,7 @@ def check(**kwargs):
if not result: if not result:
sys.exit(1) sys.exit(1)
else: else:
self.logger.error('Db data is not accurate run `ordigi init`') logger.error('Db data is not accurate run `ordigi update`')
sys.exit(1) sys.exit(1)

View File

@ -377,7 +377,7 @@ class Collection:
self._add_db_data(media.metadata) self._add_db_data(media.metadata)
if self.mode == 'move': if self.mode == 'move':
# Delete file path entry in db when file is moved inside collection # Delete file path entry in db when file is moved inside collection
if str(self.root) in str(src_path): if self.root in src_path.parents:
self.db.delete_filepath(str(src_path.relative_to(self.root))) self.db.delete_filepath(str(src_path.relative_to(self.root)))
self.summary.append((src_path, dest_path)) self.summary.append((src_path, dest_path))
@ -394,6 +394,20 @@ class Collection:
os.remove(file_path) os.remove(file_path)
self.logger.info(f'remove: {file_path}') self.logger.info(f'remove: {file_path}')
def remove_excluded_files(self):
result = True
for file_path in self.root.glob(self.glob):
if file_path.is_dir():
continue
else:
if self.root / '.ordigi' in file_path.parents:
continue
for exclude in self.exclude:
if fnmatch(file_path, exclude):
self.remove(file_path)
break
def sort_file(self, src_path, dest_path, remove_duplicates=False): def sort_file(self, src_path, dest_path, remove_duplicates=False):
''' '''
Copy or move file to dest_path. Copy or move file to dest_path.
@ -526,7 +540,7 @@ class Collection:
""" """
return len(path.parts) - 1 return len(path.parts) - 1
def _get_files_in_path(self, path, glob='**/*', maxlevel=None, extensions=set()): def _get_files_in_path(self, path, glob='**/*', extensions=set()):
"""Recursively get files which match a path and extension. """Recursively get files which match a path and extension.
:param str path string: Path to start recursive file listing :param str path string: Path to start recursive file listing
@ -545,12 +559,11 @@ class Collection:
else: else:
level = len(subdirs.parts) level = len(subdirs.parts)
if subdirs.parts != (): if self.root / '.ordigi' in file_path.parents:
if subdirs.parts[0] == '.ordigi': continue
continue
if maxlevel is not None: if self.max_deep is not None:
if level > maxlevel: if level > self.max_deep:
continue continue
matched = False matched = False
@ -558,7 +571,6 @@ class Collection:
if fnmatch(file_path, exclude): if fnmatch(file_path, exclude):
matched = True matched = True
break break
if matched: if matched:
continue continue
@ -647,7 +659,13 @@ class Collection:
dedup_regex = [date_num3, date_num2, default] dedup_regex = [date_num3, date_num2, default]
conflict_file_list = [] conflict_file_list = []
self.src_list = [x for x in self._get_files_in_path(path, glob=self.glob)] self.src_list = [
x
for x in self._get_files_in_path(
path, glob=self.glob,
extensions=self.filter_by_ext,
)
]
for src_path in self.src_list: for src_path in self.src_list:
# TODO to test it # TODO to test it
media = Media(src_path, path, logger=self.logger) media = Media(src_path, path, logger=self.logger)
@ -819,17 +837,35 @@ class Collection:
return self.summary return self.summary
def remove_empty_subdirs(self, directories):
parents = set()
for directory in directories:
# if folder empty, delete it
files = os.listdir(directory)
if len(files) == 0:
self.logger.info(f"Removing empty folder: {directory}")
directory.rmdir()
if self.root in directory.parent.parents:
parents.add(directory.parent)
if parents != set():
self.remove_empty_subdirs(parents)
def sort_files(self, paths, loc, remove_duplicates=False, ignore_tags=set()): def sort_files(self, paths, loc, remove_duplicates=False, ignore_tags=set()):
""" """
Sort files into appropriate folder Sort files into appropriate folder
""" """
# Check db # Check db
if not self.check_db(): if [x for x in self.db.get_rows('metadata')] == []:
self.logger.error('Db data is not accurate run `ordigi init`') self.init(loc, ignore_tags)
elif not self.check_db():
self.logger.error('Db data is not accurate run `ordigi update`')
sys.exit(1) sys.exit(1)
result = False result = False
files_data = [] files_data = []
src_dirs_in_collection = set()
for path in paths: for path in paths:
self.dest_list = [] self.dest_list = []
path = self._check_path(path) path = self._check_path(path)
@ -837,7 +873,8 @@ class Collection:
self.src_list = [ self.src_list = [
x x
for x in self._get_files_in_path( for x in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext path, glob=self.glob,
extensions=self.filter_by_ext,
) )
] ]
if self.interactive: if self.interactive:
@ -846,6 +883,9 @@ class Collection:
# Get medias and paths # Get medias and paths
for src_path in self.src_list: for src_path in self.src_list:
# List all src_dirs in collection
if self.root in src_path.parents:
src_dirs_in_collection.add(src_path.parent)
# Process files # Process files
media = Media( media = Media(
src_path, src_path,
@ -892,12 +932,14 @@ class Collection:
if conflict_file_list != []: if conflict_file_list != []:
record = self._solve_conflicts(conflict_file_list, remove_duplicates) record = self._solve_conflicts(conflict_file_list, remove_duplicates)
self.remove_empty_subdirs(src_dirs_in_collection)
if not self._check_processed(): if not self._check_processed():
record = False record = False
return self.summary, record return self.summary, record
def remove_empty_folders(path, remove_root=True): def remove_empty_folders(self, path, remove_root=True):
'Function to remove empty folders' 'Function to remove empty folders'
if not os.path.isdir(path): if not os.path.isdir(path):
return return
@ -927,7 +969,8 @@ class Collection:
:returns: iter :returns: iter
""" """
for src_path in self._get_files_in_path( for src_path in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext path, glob=self.glob,
extensions=self.filter_by_ext,
): ):
dirname = src_path.parent.name dirname = src_path.parent.name
@ -1012,7 +1055,8 @@ class Collection:
moved_files = set() moved_files = set()
nb_row_ini = self.db.len('metadata') nb_row_ini = self.db.len('metadata')
for src_path in self._get_files_in_path( for src_path in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext path, glob=self.glob,
extensions=self.filter_by_ext,
): ):
dirname = src_path.parent.name dirname = src_path.parent.name
if dirname.find('similar_to') == 0: if dirname.find('similar_to') == 0:

View File

@ -79,11 +79,13 @@ class Config:
options['path_format'] = self.get_path_definition() options['path_format'] = self.get_path_definition()
if 'Path' in self.conf and 'day_begins' in self.conf['Path']: options['day_begins'] = 0
config_directory = self.conf['Path'] options['max_deep'] = None
options['day_begins'] = int(config_directory['day_begins']) if 'Path' in self.conf:
else: if 'day_begins' in self.conf['Path']:
options['day_begins'] = 0 options['day_begins'] = int(self.conf['Path']['day_begins'])
if 'max_deep' in self.conf['Path']:
options['max_deep'] = int(self.conf['Path']['max_deep'])
if 'Exclusions' in self.conf: if 'Exclusions' in self.conf:
options['exclude'] = [value for key, value in self.conf.items('Exclusions')] options['exclude'] = [value for key, value in self.conf.items('Exclusions')]

View File

@ -201,10 +201,10 @@ class TestCollection:
def test__get_files_in_path(self, tmp_path): def test__get_files_in_path(self, tmp_path):
collection = Collection(tmp_path, self.path_format, collection = Collection(tmp_path, self.path_format,
exclude={'**/*.dng',}, exclude={'**/*.dng',}, max_deep=1,
use_date_filename=True, use_file_dates=True) use_date_filename=True, use_file_dates=True)
paths = [x for x in collection._get_files_in_path(self.src_path, paths = [x for x in collection._get_files_in_path(self.src_path,
maxlevel=1, glob='**/photo*')] glob='**/photo*')]
assert len(paths) == 6 assert len(paths) == 6
for path in paths: for path in paths:
assert isinstance(path, Path) assert isinstance(path, Path)