Add path format and fix empty dir and exclude files

This commit is contained in:
Cédric Leporcq 2021-10-17 17:55:13 +02:00
parent 5c255093e3
commit 513adb2890
4 changed files with 91 additions and 43 deletions

View File

@ -11,8 +11,6 @@ from ordigi import constants
from ordigi import log
from ordigi.collection import Collection
from ordigi.geolocation import GeoLocation
# from ordigi.media import Media, get_all_subclasses
# from ordigi.summary import Summary
_logger_options = [
@ -120,10 +118,10 @@ def _get_exclude(opt, exclude):
'--interactive', '-i', default=False, is_flag=True, help="Interactive mode"
)
@click.option(
'--max-deep',
'-m',
'--path-format',
'-p',
default=None,
help='Maximum level to proceed. Number from 0 to desired level.',
help='set custom featured path format',
)
@click.option(
'--remove-duplicates',
@ -172,10 +170,6 @@ def sort(**kwargs):
logger = log.get_logger(level=log_level)
max_deep = kwargs['max_deep']
if max_deep is not None:
max_deep = int(max_deep)
cache = True
if kwargs['reset_cache']:
cache = False
@ -197,12 +191,16 @@ def sort(**kwargs):
config = Config(constants.CONFIG_FILE)
opt = config.get_options()
path_format = opt['path_format']
if kwargs['path_format']:
path_format = kwargs['path_format']
exclude = _get_exclude(opt, kwargs['exclude'])
filter_by_ext = set(kwargs['filter_by_ext'])
collection = Collection(
destination,
opt['path_format'],
path_format,
kwargs['album_from_folder'],
cache,
opt['day_begins'],
@ -212,7 +210,7 @@ def sort(**kwargs):
kwargs['glob'],
kwargs['interactive'],
logger,
max_deep,
opt['max_deep'],
mode,
kwargs['use_date_filename'],
kwargs['use_file_dates'],
@ -246,13 +244,10 @@ def sort(**kwargs):
help='Regex to match duplicate strings parts',
)
@click.option(
'--folders', '-f', default=False, is_flag=True, help='Remove empty folders'
'--delete-excluded', '-d', default=False, is_flag=True, help='Remove excluded files'
)
@click.option(
'--max-deep',
'-m',
default=None,
help='Maximum level to proceed. Number from 0 to desired level.',
'--folders', '-f', default=False, is_flag=True, help='Remove empty folders'
)
@click.option(
'--path-string', '-p', default=False, is_flag=True, help='Deduplicate path string'
@ -276,6 +271,8 @@ def clean(**kwargs):
"""Remove empty folders
Usage: clean [--verbose|--debug] directory [removeRoot]"""
import ipdb; ipdb.set_trace()
result = True
dry_run = kwargs['dry_run']
folders = kwargs['folders']
log_level = log.level(kwargs['verbose'], kwargs['debug'])
@ -304,7 +301,7 @@ def clean(**kwargs):
filter_by_ext=filter_by_ext,
glob=kwargs['glob'],
logger=logger,
max_deep=kwargs['max_deep'],
max_deep=opt['max_deep'],
mode='move',
)
@ -317,6 +314,9 @@ def clean(**kwargs):
if clean_all or folders:
collection.remove_empty_folders(path)
if kwargs['delete_excluded']:
collection.remove_excluded_files()
if log_level < 30:
summary.print()
@ -335,7 +335,7 @@ def init(**kwargs):
logger = log.get_logger(level=log_level)
loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout'])
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger)
summary = collection.init(loc)
if log_level < 30:
@ -353,7 +353,7 @@ def update(**kwargs):
logger = log.get_logger(level=log_level)
loc = GeoLocation(opt['geocoder'], logger, opt['prefer_english_names'], opt['timeout'])
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger)
summary = collection.update(loc)
if log_level < 30:
@ -367,7 +367,9 @@ def check(**kwargs):
"""check db and verify hashes"""
log_level = log.level(kwargs['verbose'], kwargs['debug'])
logger = log.get_logger(level=log_level)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
config = Config(constants.CONFIG_FILE)
opt = config.get_options()
collection = Collection(kwargs['path'], None, exclude=opt['exclude'], mode='move', logger=logger)
result = collection.check_db()
if result:
summary, result = collection.check_files()
@ -376,7 +378,7 @@ def check(**kwargs):
if not result:
sys.exit(1)
else:
self.logger.error('Db data is not accurate run `ordigi init`')
logger.error('Db data is not accurate run `ordigi update`')
sys.exit(1)

View File

@ -377,7 +377,7 @@ class Collection:
self._add_db_data(media.metadata)
if self.mode == 'move':
# Delete file path entry in db when file is moved inside collection
if str(self.root) in str(src_path):
if self.root in src_path.parents:
self.db.delete_filepath(str(src_path.relative_to(self.root)))
self.summary.append((src_path, dest_path))
@ -394,6 +394,20 @@ class Collection:
os.remove(file_path)
self.logger.info(f'remove: {file_path}')
def remove_excluded_files(self):
result = True
for file_path in self.root.glob(self.glob):
if file_path.is_dir():
continue
else:
if self.root / '.ordigi' in file_path.parents:
continue
for exclude in self.exclude:
if fnmatch(file_path, exclude):
self.remove(file_path)
break
def sort_file(self, src_path, dest_path, remove_duplicates=False):
'''
Copy or move file to dest_path.
@ -526,7 +540,7 @@ class Collection:
"""
return len(path.parts) - 1
def _get_files_in_path(self, path, glob='**/*', maxlevel=None, extensions=set()):
def _get_files_in_path(self, path, glob='**/*', extensions=set()):
"""Recursively get files which match a path and extension.
:param str path string: Path to start recursive file listing
@ -545,12 +559,11 @@ class Collection:
else:
level = len(subdirs.parts)
if subdirs.parts != ():
if subdirs.parts[0] == '.ordigi':
if self.root / '.ordigi' in file_path.parents:
continue
if maxlevel is not None:
if level > maxlevel:
if self.max_deep is not None:
if level > self.max_deep:
continue
matched = False
@ -558,7 +571,6 @@ class Collection:
if fnmatch(file_path, exclude):
matched = True
break
if matched:
continue
@ -647,7 +659,13 @@ class Collection:
dedup_regex = [date_num3, date_num2, default]
conflict_file_list = []
self.src_list = [x for x in self._get_files_in_path(path, glob=self.glob)]
self.src_list = [
x
for x in self._get_files_in_path(
path, glob=self.glob,
extensions=self.filter_by_ext,
)
]
for src_path in self.src_list:
# TODO to test it
media = Media(src_path, path, logger=self.logger)
@ -819,17 +837,35 @@ class Collection:
return self.summary
def remove_empty_subdirs(self, directories):
parents = set()
for directory in directories:
# if folder empty, delete it
files = os.listdir(directory)
if len(files) == 0:
self.logger.info(f"Removing empty folder: {directory}")
directory.rmdir()
if self.root in directory.parent.parents:
parents.add(directory.parent)
if parents != set():
self.remove_empty_subdirs(parents)
def sort_files(self, paths, loc, remove_duplicates=False, ignore_tags=set()):
"""
Sort files into appropriate folder
"""
# Check db
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
if [x for x in self.db.get_rows('metadata')] == []:
self.init(loc, ignore_tags)
elif not self.check_db():
self.logger.error('Db data is not accurate run `ordigi update`')
sys.exit(1)
result = False
files_data = []
src_dirs_in_collection = set()
for path in paths:
self.dest_list = []
path = self._check_path(path)
@ -837,7 +873,8 @@ class Collection:
self.src_list = [
x
for x in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext
path, glob=self.glob,
extensions=self.filter_by_ext,
)
]
if self.interactive:
@ -846,6 +883,9 @@ class Collection:
# Get medias and paths
for src_path in self.src_list:
# List all src_dirs in collection
if self.root in src_path.parents:
src_dirs_in_collection.add(src_path.parent)
# Process files
media = Media(
src_path,
@ -892,12 +932,14 @@ class Collection:
if conflict_file_list != []:
record = self._solve_conflicts(conflict_file_list, remove_duplicates)
self.remove_empty_subdirs(src_dirs_in_collection)
if not self._check_processed():
record = False
return self.summary, record
def remove_empty_folders(path, remove_root=True):
def remove_empty_folders(self, path, remove_root=True):
'Function to remove empty folders'
if not os.path.isdir(path):
return
@ -927,7 +969,8 @@ class Collection:
:returns: iter
"""
for src_path in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext
path, glob=self.glob,
extensions=self.filter_by_ext,
):
dirname = src_path.parent.name
@ -1012,7 +1055,8 @@ class Collection:
moved_files = set()
nb_row_ini = self.db.len('metadata')
for src_path in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext
path, glob=self.glob,
extensions=self.filter_by_ext,
):
dirname = src_path.parent.name
if dirname.find('similar_to') == 0:

View File

@ -79,11 +79,13 @@ class Config:
options['path_format'] = self.get_path_definition()
if 'Path' in self.conf and 'day_begins' in self.conf['Path']:
config_directory = self.conf['Path']
options['day_begins'] = int(config_directory['day_begins'])
else:
options['day_begins'] = 0
options['max_deep'] = None
if 'Path' in self.conf:
if 'day_begins' in self.conf['Path']:
options['day_begins'] = int(self.conf['Path']['day_begins'])
if 'max_deep' in self.conf['Path']:
options['max_deep'] = int(self.conf['Path']['max_deep'])
if 'Exclusions' in self.conf:
options['exclude'] = [value for key, value in self.conf.items('Exclusions')]

View File

@ -201,10 +201,10 @@ class TestCollection:
def test__get_files_in_path(self, tmp_path):
collection = Collection(tmp_path, self.path_format,
exclude={'**/*.dng',},
exclude={'**/*.dng',}, max_deep=1,
use_date_filename=True, use_file_dates=True)
paths = [x for x in collection._get_files_in_path(self.src_path,
maxlevel=1, glob='**/photo*')]
glob='**/photo*')]
assert len(paths) == 6
for path in paths:
assert isinstance(path, Path)