From 9f6eb52ebca14bf22a62d944363e1f40260ce6ec Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Thu, 29 Jul 2021 18:42:31 +0200 Subject: [PATCH] Change file paths definitions and parsing --- config.ini-sample | 14 +- elodie.py | 24 +- elodie/config.py | 22 ++ elodie/constants.py | 3 + elodie/filesystem.py | 231 ++++++++++++++++++-- elodie/media/media.py | 4 +- elodie/plugins/googlephotos/googlephotos.py | 2 +- tests/test_filesystem.py | 3 +- 8 files changed, 257 insertions(+), 46 deletions(-) diff --git a/config.ini-sample b/config.ini-sample index d3733ce..7bb5a35 100644 --- a/config.ini-sample +++ b/config.ini-sample @@ -1,16 +1,16 @@ -[Directory] +[Path] # day_begins: what hour of the day you want the day to begin (only for # classification purposes). Defaults at 0 as midnight. Can be # used to group early morning photos with the previous day. Must # be a number between 0-23') day_begins=4 -location=%city, %state -year=%Y -month=%B -# date=%Y -# custom=%date %album -full_path=%year/%month/%location +dirs_path=%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder} +name={%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext} + +[Exclusions] +name1=.directory +name2=.DS_Store [Geolocation] # geocoder: Nominatim or MapQuest diff --git a/elodie.py b/elodie.py index d14a196..3ea7baa 100755 --- a/elodie.py +++ b/elodie.py @@ -19,6 +19,7 @@ from elodie import constants from elodie import geolocation from elodie import log from elodie.compatability import _decode +from elodie import config from elodie.config import load_config from elodie.filesystem import FileSystem from elodie.gui import CompareImageApp @@ -182,8 +183,6 @@ def _import(destination, source, file, album_from_folder, trash, @click.option('--ignore-tags', '-i', default=set(), multiple=True, help='Specific tags or group that will be ignored when\ searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' ) -@click.option('--keep-folders', '-k', default=None, - help='Folder from given level are keep back') @click.option('--max-deep', '-m', default=None, help='Maximum level to proceed. Number from 0 to desired level.') @click.option('--remove-duplicates', '-r', default=False, is_flag=True, @@ -193,7 +192,7 @@ def _import(destination, source, file, album_from_folder, trash, help='True if you want to see details of file processing') @click.argument('paths', required=True, nargs=-1, type=click.Path()) def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignore_tags, - keep_folders, max_deep, remove_duplicates, verbose, paths): + max_deep, remove_duplicates, verbose, paths): """Sort files or directories by reading their EXIF and organizing them according to config.ini preferences. """ @@ -210,9 +209,6 @@ def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignor else: constants.debug = logging.ERROR - if keep_folders is not None: - keep_folders = int(keep_folders) - if max_deep is not None: max_deep = int(max_deep) @@ -232,24 +228,26 @@ def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignor if not os.path.exists(destination): logger.error(f'Directory {destination} does not exist') + conf = config.load_config(constants.CONFIG_FILE) + path_format = config.get_path_definition(conf) + # if no exclude list was passed in we check if there's a config if len(exclude_regex) == 0: - config = load_config(constants.CONFIG_FILE) - if 'Exclusions' in config: - exclude_regex = [value for key, value in config.items('Exclusions')] + if 'Exclusions' in conf: + exclude_regex = [value for key, value in conf.items('Exclusions')] exclude_regex_list = set(exclude_regex) # Initialize Db db = Db(destination) - if 'Directory' in config and 'day_begins' in config['Directory']: - config_directory = config['Directory'] + if 'Directory' in conf and 'day_begins' in conf['Directory']: + config_directory = conf['Directory'] day_begins = config_directory['day_begins'] else: day_begins = 0 - filesystem = FileSystem(mode, dry_run, exclude_regex_list, logger, - day_begins, filter_by_ext, keep_folders, max_deep) + filesystem = FileSystem(day_begins, dry_run, exclude_regex_list, + filter_by_ext, logger, max_deep, mode, path_format) summary, has_errors = filesystem.sort_files(paths, destination, db, remove_duplicates, ignore_tags) diff --git a/elodie/config.py b/elodie/config.py index d8f5c9a..37e38ea 100644 --- a/elodie/config.py +++ b/elodie/config.py @@ -1,6 +1,7 @@ """Load config file as a singleton.""" from configparser import RawConfigParser from os import path +from elodie import constants def load_config(file): @@ -33,3 +34,24 @@ def load_config_for_plugin(name, file): return config[key] return {} + + +def get_path_definition(config): + """Returns a list of folder definitions. + + Each element in the list represents a folder. + Fallback folders are supported and are nested lists. + + :returns: string + """ + + if 'Path' in config: + if 'format' in config['Path']: + return config['Path']['format'] + elif 'dirs_path' and 'name' in config['Path']: + return path.join(config['Path']['dirs_path'], + config['Path']['name']) + + return path.join(constants.default_path, constants.default_name) + + diff --git a/elodie/constants.py b/elodie/constants.py index 4bddc14..18756b9 100644 --- a/elodie/constants.py +++ b/elodie/constants.py @@ -16,6 +16,9 @@ if ( ): application_directory = environ['ELODIE_APPLICATION_DIRECTORY'] +default_path = '{%Y-%m-%b}/{album}|{city}|{"Unknown Location"}' +default_name = '{%Y-%m-%d_%H-%M-%S}-{original_name}-{title}.{ext}' + #: File in which to store details about media Elodie has seen. hash_db = 'hash.json' # TODO will be removed eventualy later diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 7f5044a..7642dd3 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -9,7 +9,9 @@ import filecmp import hashlib import logging import os +import pathlib import re +import sys import shutil import time from datetime import datetime, timedelta @@ -29,9 +31,9 @@ from elodie.summary import Summary class FileSystem(object): """A class for interacting with the file system.""" - def __init__(self, mode='copy', dry_run=False, exclude_regex_list=set(), - logger=logging.getLogger(), day_begins=0, filter_by_ext=(), - keep_folders=None, max_deep=None): + def __init__(self, day_begins=0, dry_run=False, exclude_regex_list=set(), + filter_by_ext=(), logger=logging.getLogger(), max_deep=None, + mode='copy', path_format=None): # The default folder path is along the lines of 2017-06-17_01-04-14-dsc_1234-some-title.jpg self.default_file_name_definition = { 'date': '%Y-%m-%d_%H-%M-%S', @@ -51,6 +53,26 @@ class FileSystem(object): # It captures some additional characters like the unicode checkmark \u2713. # See build failures in Python3 here. # https://travis-ci.org/jmathai/elodie/builds/483012902 + + self.items = { + 'album': '{album}', + 'basename': '{basename}', + 'camera_make': '{camera_make}', + 'camera_model': '{camera_model}', + 'city': '{city}', + 'custom': '{".*"}', + 'country': '{country}', + # 'folder': '{folder[<>]?[-+]?[1-9]?}', + 'folder': '{folder}', + 'folders': '{folders(\[[0-9:]{0,3}\])?}', + 'location': '{location}', + 'ext': '{ext}', + 'original_name': '{original_name}', + 'state': '{state}', + 'title': '{title}', + 'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string + } + self.whitespace_regex = '[ \t\n\r\f\v]+' self.dry_run = dry_run @@ -60,8 +82,12 @@ class FileSystem(object): self.summary = Summary() self.day_begins = day_begins self.filter_by_ext = filter_by_ext - self.keep_folders = keep_folders self.max_deep = max_deep + if path_format: + self.path_format = path_format + else: + self.path_format = os.path.join(constants.default_path, + constants.default_name) # Instantiate a plugins object self.plugins = Plugins() @@ -201,7 +227,7 @@ class FileSystem(object): # [ # [('date', '%Y-%m-%d_%H-%M-%S')], # [('original_name', '')], [('title', '')], // contains a fallback - # [('extension', '')] + # [('ext', '')] # ] name_template, definition = self.get_file_name_definition() @@ -232,8 +258,12 @@ class FileSystem(object): ) break elif part in ('album', 'extension', 'title'): - if metadata[part]: - this_value = re.sub(self.whitespace_regex, '-', metadata[part].strip()) + key = part + if part == 'extension': + key = 'ext' + if metadata[key]: + this_value = re.sub(self.whitespace_regex, '-', + metadata[key].strip()) break elif part in ('original_name'): # First we check if we have metadata['original_name']. @@ -297,7 +327,7 @@ class FileSystem(object): [ ('date', '%Y-%m-%d'), [ - ('location', '%city'), + ('default', '%city'), ('album', ''), ('"Unknown Location", '') ] @@ -320,7 +350,7 @@ class FileSystem(object): # Find all subpatterns of name that map to the components of the file's # name. - # I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'extension'] #noqa + # I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'ext'] #noqa path_parts = re.findall( '(\%[a-z_]+)', config_file['name'] @@ -357,7 +387,7 @@ class FileSystem(object): [ ('date', '%Y-%m-%d'), [ - ('location', '%city'), + ('default', '%city'), ('album', ''), ('"Unknown Location", '') ] @@ -407,6 +437,7 @@ class FileSystem(object): return self.cached_folder_path_definition + def get_folder_path(self, metadata, db, path_parts=None): """Given a media's metadata this function returns the folder path as a string. @@ -433,6 +464,167 @@ class FileSystem(object): break return os.path.join(*path) + + def get_location_part(self, mask, part, place_name): + """Takes a mask for a location and interpolates the actual place names. + + Given these parameters here are the outputs. + + mask = 'city' + part = 'city-random' + place_name = {'city': u'Sunnyvale'} + return 'Sunnyvale' + + mask = 'location' + part = 'location' + place_name = {'default': u'Sunnyvale', 'city': u'Sunnyvale'} + return 'Sunnyvale' + + :returns: str + """ + folder_name = part + if(mask in place_name): + replace_target = mask + replace_with = place_name[mask] + else: + replace_target = part + replace_with = '' + + folder_name = folder_name.replace( + replace_target, + replace_with, + ) + + return folder_name + + + + def get_part(self, item, mask, metadata, db, subdirs): + """Parse a specific folder's name given a mask and metadata. + + :param item: Name of the item as defined in the path (i.e. date from %date) + :param mask: Mask representing the template for the path (i.e. %city %state + :param metadata: Metadata dictionary. + :returns: str + """ + + # Each item has its own custom logic and we evaluate a single item and return + # the evaluated string. + if item in ('basename'): + return os.path.basename(metadata['base_name']) + elif item is 'date': + date = self.get_date_taken(metadata) + # early morning photos can be grouped with previous day + date = self.check_for_early_morning_photos(date) + if date is not None: + return date.strftime(mask) + else: + return '' + + elif item in ('location', 'city', 'state', 'country'): + place_name = geolocation.place_name( + metadata['latitude'], + metadata['longitude'], + db + ) + if item == 'location': + mask = 'default' + + return self.get_location_part(mask, item, place_name) + elif item in ('folder'): + return os.path.basename(subdirs) + + elif item in ('folders'): + folders = pathlib.Path(subdirs).parts + folders = eval(mask) + + return os.path.join(*folders) + + elif item in ('album','camera_make', 'camera_model', 'ext', + 'title'): + if metadata[item]: + # return metadata[item] + return re.sub(self.whitespace_regex, '_', metadata[item].strip()) + elif item in ('original_name'): + # First we check if we have metadata['original_name']. + # We have to do this for backwards compatibility because + # we original did not store this back into EXIF. + if metadata[item]: + part = os.path.splitext(metadata['original_name'])[0] + else: + # We didn't always store original_name so this is + # for backwards compatability. + # We want to remove the hardcoded date prefix we used + # to add to the name. + # This helps when re-running the program on file + # which were already processed. + part = re.sub( + '^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-', + '', + metadata['base_name'] + ) + if(len(part) == 0): + part = metadata['base_name'] + # Lastly we want to sanitize the name + return re.sub(self.whitespace_regex, '_', part.strip()) + elif item in 'custom': + # Fallback string + return mask[1:-1] + + return '' + + + def get_path(self, metadata, db, subdirs=''): + """path_format: {%Y-%d-%m}/%u{city}/{album} + + Returns file path. + + :returns: string""" + + # if self.path_format is None: + # path_format = self.get_path_definition() + path_format = self.path_format + # self.cached_folder_path_definition = [] + path = [] + path_parts = path_format.split('/') + for path_part in path_parts: + this_parts = path_part.split('|') + # p = [] + for this_part in this_parts: + # parts = '' + for item, mask in self.items.items(): + matched = re.search(mask, this_part) + if matched: + # parts = re.split(mask, this_part) + # parts = this_part.split('%')[1:] + part = self.get_part(item, matched.group()[1:-1], metadata, db, + subdirs) + + # Capitalization + umask = '%u' + mask + lmask = '%l' + mask + if re.search(umask, this_part): + this_part = re.sub(umask, part.upper(), this_part) + elif re.search(lmask, this_part): + this_part = re.sub(lmask, part.lower(), this_part) + else: + this_part = re.sub(mask, part, this_part) + + if this_part: + # Check if all masks are substituted + if True in [c in this_part for c in '{}']: + self.logger.error(f'Format path part invalid: \ + {this_part}') + sys.exit(1) + + path.append(this_part.strip()) + # We break as soon as we have a value to append + break + # Else we continue for fallbacks + + return os.path.join(*path) + + def get_date_from_string(self, string, user_regex=None): # If missing datetime from EXIF data check if filename is in datetime format. # For this use a user provided regex if possible. @@ -811,11 +1003,8 @@ class FileSystem(object): self.max_deep): if dirname == os.path.join(path, '.elodie'): continue - if self.keep_folders is not None: - if level < self.keep_folders: - subdirs = '' - else: - subdirs = os.path.join(subdirs, os.path.basename(dirname)) + + subdirs = os.path.join(subdirs, os.path.basename(dirname)) for filename in filenames: # If file extension is in `extensions` @@ -850,16 +1039,14 @@ class FileSystem(object): if media: metadata = media.get_metadata() # Get the destination path according to metadata - directory_name = self.get_folder_path(metadata, db) - file_name = self.get_file_name(metadata) + file_path = self.get_path(metadata, db, subdirs=subdirs) else: # Keep same directory structure - directory_name = os.path.dirname(os.path.relpath(src_path, - path)) - file_name = os.path.basename(src_path) + file_path = os.path.relpath(src_path, path) - dest_directory = os.path.join(destination, directory_name) - dest_path = os.path.join(dest_directory, subdirs, file_name) + dest_directory = os.path.join(destination, + os.path.dirname(file_path)) + dest_path = os.path.join(destination, file_path) self.create_directory(dest_directory) result = self.sort_file(src_path, dest_path, remove_duplicates) if result: diff --git a/elodie/media/media.py b/elodie/media/media.py index 50051ac..9c10ac9 100644 --- a/elodie/media/media.py +++ b/elodie/media/media.py @@ -111,7 +111,7 @@ class Media(): return None source = self.source - return os.path.splitext(source)[1][1:].lower() + return os.path.splitext(source)[1][1:] def get_metadata(self, update_cache=False, album_from_folder=False): @@ -146,7 +146,7 @@ class Media(): 'mime_type': self.get_mimetype(), 'original_name': self.get_original_name(), 'base_name': os.path.basename(os.path.splitext(source)[0]), - 'extension': self.get_extension(), + 'ext': self.get_extension(), 'directory_path': os.path.dirname(source) } diff --git a/elodie/plugins/googlephotos/googlephotos.py b/elodie/plugins/googlephotos/googlephotos.py index d63b3e4..c9e19d5 100644 --- a/elodie/plugins/googlephotos/googlephotos.py +++ b/elodie/plugins/googlephotos/googlephotos.py @@ -61,7 +61,7 @@ class GooglePhotos(PluginBase): self.session = None def after(self, file_path, destination_folder, final_file_path, metadata): - extension = metadata['extension'] + extension = metadata['ext'] if(extension in Photo.extensions or extension in Video.extensions): self.log(u'Added {} to db.'.format(final_file_path)) self.db.set(final_file_path, metadata['original_name']) diff --git a/tests/test_filesystem.py b/tests/test_filesystem.py index b050d2c..9fcddc5 100644 --- a/tests/test_filesystem.py +++ b/tests/test_filesystem.py @@ -778,7 +778,8 @@ def test_sort_files(): temporary_folder_destination, folder_destination = helper.create_working_folder() db = Db(folder) - filesystem = FileSystem() + path_format = os.path.join(constants.default_path, constants.default_name) + filesystem = FileSystem(path_format=path_format) filenames = ['photo.png', 'plain.jpg', 'text.txt', 'withoutextension'] for src_file in filenames: