Change file paths definitions and parsing

This commit is contained in:
Cédric Leporcq 2021-07-29 18:42:31 +02:00
parent b4a8cc88cb
commit 9f6eb52ebc
8 changed files with 257 additions and 46 deletions

View File

@ -1,16 +1,16 @@
[Directory] [Path]
# day_begins: what hour of the day you want the day to begin (only for # day_begins: what hour of the day you want the day to begin (only for
# classification purposes). Defaults at 0 as midnight. Can be # classification purposes). Defaults at 0 as midnight. Can be
# used to group early morning photos with the previous day. Must # used to group early morning photos with the previous day. Must
# be a number between 0-23') # be a number between 0-23')
day_begins=4 day_begins=4
location=%city, %state dirs_path=%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder}
year=%Y name={%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext}
month=%B
# date=%Y [Exclusions]
# custom=%date %album name1=.directory
full_path=%year/%month/%location name2=.DS_Store
[Geolocation] [Geolocation]
# geocoder: Nominatim or MapQuest # geocoder: Nominatim or MapQuest

View File

@ -19,6 +19,7 @@ from elodie import constants
from elodie import geolocation from elodie import geolocation
from elodie import log from elodie import log
from elodie.compatability import _decode from elodie.compatability import _decode
from elodie import config
from elodie.config import load_config from elodie.config import load_config
from elodie.filesystem import FileSystem from elodie.filesystem import FileSystem
from elodie.gui import CompareImageApp from elodie.gui import CompareImageApp
@ -182,8 +183,6 @@ def _import(destination, source, file, album_from_folder, trash,
@click.option('--ignore-tags', '-i', default=set(), multiple=True, @click.option('--ignore-tags', '-i', default=set(), multiple=True,
help='Specific tags or group that will be ignored when\ help='Specific tags or group that will be ignored when\
searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' ) searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' )
@click.option('--keep-folders', '-k', default=None,
help='Folder from given level are keep back')
@click.option('--max-deep', '-m', default=None, @click.option('--max-deep', '-m', default=None,
help='Maximum level to proceed. Number from 0 to desired level.') help='Maximum level to proceed. Number from 0 to desired level.')
@click.option('--remove-duplicates', '-r', default=False, is_flag=True, @click.option('--remove-duplicates', '-r', default=False, is_flag=True,
@ -193,7 +192,7 @@ def _import(destination, source, file, album_from_folder, trash,
help='True if you want to see details of file processing') help='True if you want to see details of file processing')
@click.argument('paths', required=True, nargs=-1, type=click.Path()) @click.argument('paths', required=True, nargs=-1, type=click.Path())
def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignore_tags, def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignore_tags,
keep_folders, max_deep, remove_duplicates, verbose, paths): max_deep, remove_duplicates, verbose, paths):
"""Sort files or directories by reading their EXIF and organizing them """Sort files or directories by reading their EXIF and organizing them
according to config.ini preferences. according to config.ini preferences.
""" """
@ -210,9 +209,6 @@ def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignor
else: else:
constants.debug = logging.ERROR constants.debug = logging.ERROR
if keep_folders is not None:
keep_folders = int(keep_folders)
if max_deep is not None: if max_deep is not None:
max_deep = int(max_deep) max_deep = int(max_deep)
@ -232,24 +228,26 @@ def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignor
if not os.path.exists(destination): if not os.path.exists(destination):
logger.error(f'Directory {destination} does not exist') logger.error(f'Directory {destination} does not exist')
conf = config.load_config(constants.CONFIG_FILE)
path_format = config.get_path_definition(conf)
# if no exclude list was passed in we check if there's a config # if no exclude list was passed in we check if there's a config
if len(exclude_regex) == 0: if len(exclude_regex) == 0:
config = load_config(constants.CONFIG_FILE) if 'Exclusions' in conf:
if 'Exclusions' in config: exclude_regex = [value for key, value in conf.items('Exclusions')]
exclude_regex = [value for key, value in config.items('Exclusions')]
exclude_regex_list = set(exclude_regex) exclude_regex_list = set(exclude_regex)
# Initialize Db # Initialize Db
db = Db(destination) db = Db(destination)
if 'Directory' in config and 'day_begins' in config['Directory']: if 'Directory' in conf and 'day_begins' in conf['Directory']:
config_directory = config['Directory'] config_directory = conf['Directory']
day_begins = config_directory['day_begins'] day_begins = config_directory['day_begins']
else: else:
day_begins = 0 day_begins = 0
filesystem = FileSystem(mode, dry_run, exclude_regex_list, logger, filesystem = FileSystem(day_begins, dry_run, exclude_regex_list,
day_begins, filter_by_ext, keep_folders, max_deep) filter_by_ext, logger, max_deep, mode, path_format)
summary, has_errors = filesystem.sort_files(paths, destination, db, summary, has_errors = filesystem.sort_files(paths, destination, db,
remove_duplicates, ignore_tags) remove_duplicates, ignore_tags)

View File

@ -1,6 +1,7 @@
"""Load config file as a singleton.""" """Load config file as a singleton."""
from configparser import RawConfigParser from configparser import RawConfigParser
from os import path from os import path
from elodie import constants
def load_config(file): def load_config(file):
@ -33,3 +34,24 @@ def load_config_for_plugin(name, file):
return config[key] return config[key]
return {} return {}
def get_path_definition(config):
"""Returns a list of folder definitions.
Each element in the list represents a folder.
Fallback folders are supported and are nested lists.
:returns: string
"""
if 'Path' in config:
if 'format' in config['Path']:
return config['Path']['format']
elif 'dirs_path' and 'name' in config['Path']:
return path.join(config['Path']['dirs_path'],
config['Path']['name'])
return path.join(constants.default_path, constants.default_name)

View File

@ -16,6 +16,9 @@ if (
): ):
application_directory = environ['ELODIE_APPLICATION_DIRECTORY'] application_directory = environ['ELODIE_APPLICATION_DIRECTORY']
default_path = '{%Y-%m-%b}/{album}|{city}|{"Unknown Location"}'
default_name = '{%Y-%m-%d_%H-%M-%S}-{original_name}-{title}.{ext}'
#: File in which to store details about media Elodie has seen. #: File in which to store details about media Elodie has seen.
hash_db = 'hash.json' hash_db = 'hash.json'
# TODO will be removed eventualy later # TODO will be removed eventualy later

View File

@ -9,7 +9,9 @@ import filecmp
import hashlib import hashlib
import logging import logging
import os import os
import pathlib
import re import re
import sys
import shutil import shutil
import time import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -29,9 +31,9 @@ from elodie.summary import Summary
class FileSystem(object): class FileSystem(object):
"""A class for interacting with the file system.""" """A class for interacting with the file system."""
def __init__(self, mode='copy', dry_run=False, exclude_regex_list=set(), def __init__(self, day_begins=0, dry_run=False, exclude_regex_list=set(),
logger=logging.getLogger(), day_begins=0, filter_by_ext=(), filter_by_ext=(), logger=logging.getLogger(), max_deep=None,
keep_folders=None, max_deep=None): mode='copy', path_format=None):
# The default folder path is along the lines of 2017-06-17_01-04-14-dsc_1234-some-title.jpg # The default folder path is along the lines of 2017-06-17_01-04-14-dsc_1234-some-title.jpg
self.default_file_name_definition = { self.default_file_name_definition = {
'date': '%Y-%m-%d_%H-%M-%S', 'date': '%Y-%m-%d_%H-%M-%S',
@ -51,6 +53,26 @@ class FileSystem(object):
# It captures some additional characters like the unicode checkmark \u2713. # It captures some additional characters like the unicode checkmark \u2713.
# See build failures in Python3 here. # See build failures in Python3 here.
# https://travis-ci.org/jmathai/elodie/builds/483012902 # https://travis-ci.org/jmathai/elodie/builds/483012902
self.items = {
'album': '{album}',
'basename': '{basename}',
'camera_make': '{camera_make}',
'camera_model': '{camera_model}',
'city': '{city}',
'custom': '{".*"}',
'country': '{country}',
# 'folder': '{folder[<>]?[-+]?[1-9]?}',
'folder': '{folder}',
'folders': '{folders(\[[0-9:]{0,3}\])?}',
'location': '{location}',
'ext': '{ext}',
'original_name': '{original_name}',
'state': '{state}',
'title': '{title}',
'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string
}
self.whitespace_regex = '[ \t\n\r\f\v]+' self.whitespace_regex = '[ \t\n\r\f\v]+'
self.dry_run = dry_run self.dry_run = dry_run
@ -60,8 +82,12 @@ class FileSystem(object):
self.summary = Summary() self.summary = Summary()
self.day_begins = day_begins self.day_begins = day_begins
self.filter_by_ext = filter_by_ext self.filter_by_ext = filter_by_ext
self.keep_folders = keep_folders
self.max_deep = max_deep self.max_deep = max_deep
if path_format:
self.path_format = path_format
else:
self.path_format = os.path.join(constants.default_path,
constants.default_name)
# Instantiate a plugins object # Instantiate a plugins object
self.plugins = Plugins() self.plugins = Plugins()
@ -201,7 +227,7 @@ class FileSystem(object):
# [ # [
# [('date', '%Y-%m-%d_%H-%M-%S')], # [('date', '%Y-%m-%d_%H-%M-%S')],
# [('original_name', '')], [('title', '')], // contains a fallback # [('original_name', '')], [('title', '')], // contains a fallback
# [('extension', '')] # [('ext', '')]
# ] # ]
name_template, definition = self.get_file_name_definition() name_template, definition = self.get_file_name_definition()
@ -232,8 +258,12 @@ class FileSystem(object):
) )
break break
elif part in ('album', 'extension', 'title'): elif part in ('album', 'extension', 'title'):
if metadata[part]: key = part
this_value = re.sub(self.whitespace_regex, '-', metadata[part].strip()) if part == 'extension':
key = 'ext'
if metadata[key]:
this_value = re.sub(self.whitespace_regex, '-',
metadata[key].strip())
break break
elif part in ('original_name'): elif part in ('original_name'):
# First we check if we have metadata['original_name']. # First we check if we have metadata['original_name'].
@ -297,7 +327,7 @@ class FileSystem(object):
[ [
('date', '%Y-%m-%d'), ('date', '%Y-%m-%d'),
[ [
('location', '%city'), ('default', '%city'),
('album', ''), ('album', ''),
('"Unknown Location", '') ('"Unknown Location", '')
] ]
@ -320,7 +350,7 @@ class FileSystem(object):
# Find all subpatterns of name that map to the components of the file's # Find all subpatterns of name that map to the components of the file's
# name. # name.
# I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'extension'] #noqa # I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'ext'] #noqa
path_parts = re.findall( path_parts = re.findall(
'(\%[a-z_]+)', '(\%[a-z_]+)',
config_file['name'] config_file['name']
@ -357,7 +387,7 @@ class FileSystem(object):
[ [
('date', '%Y-%m-%d'), ('date', '%Y-%m-%d'),
[ [
('location', '%city'), ('default', '%city'),
('album', ''), ('album', ''),
('"Unknown Location", '') ('"Unknown Location", '')
] ]
@ -407,6 +437,7 @@ class FileSystem(object):
return self.cached_folder_path_definition return self.cached_folder_path_definition
def get_folder_path(self, metadata, db, path_parts=None): def get_folder_path(self, metadata, db, path_parts=None):
"""Given a media's metadata this function returns the folder path as a string. """Given a media's metadata this function returns the folder path as a string.
@ -433,6 +464,167 @@ class FileSystem(object):
break break
return os.path.join(*path) return os.path.join(*path)
def get_location_part(self, mask, part, place_name):
"""Takes a mask for a location and interpolates the actual place names.
Given these parameters here are the outputs.
mask = 'city'
part = 'city-random'
place_name = {'city': u'Sunnyvale'}
return 'Sunnyvale'
mask = 'location'
part = 'location'
place_name = {'default': u'Sunnyvale', 'city': u'Sunnyvale'}
return 'Sunnyvale'
:returns: str
"""
folder_name = part
if(mask in place_name):
replace_target = mask
replace_with = place_name[mask]
else:
replace_target = part
replace_with = ''
folder_name = folder_name.replace(
replace_target,
replace_with,
)
return folder_name
def get_part(self, item, mask, metadata, db, subdirs):
"""Parse a specific folder's name given a mask and metadata.
:param item: Name of the item as defined in the path (i.e. date from %date)
:param mask: Mask representing the template for the path (i.e. %city %state
:param metadata: Metadata dictionary.
:returns: str
"""
# Each item has its own custom logic and we evaluate a single item and return
# the evaluated string.
if item in ('basename'):
return os.path.basename(metadata['base_name'])
elif item is 'date':
date = self.get_date_taken(metadata)
# early morning photos can be grouped with previous day
date = self.check_for_early_morning_photos(date)
if date is not None:
return date.strftime(mask)
else:
return ''
elif item in ('location', 'city', 'state', 'country'):
place_name = geolocation.place_name(
metadata['latitude'],
metadata['longitude'],
db
)
if item == 'location':
mask = 'default'
return self.get_location_part(mask, item, place_name)
elif item in ('folder'):
return os.path.basename(subdirs)
elif item in ('folders'):
folders = pathlib.Path(subdirs).parts
folders = eval(mask)
return os.path.join(*folders)
elif item in ('album','camera_make', 'camera_model', 'ext',
'title'):
if metadata[item]:
# return metadata[item]
return re.sub(self.whitespace_regex, '_', metadata[item].strip())
elif item in ('original_name'):
# First we check if we have metadata['original_name'].
# We have to do this for backwards compatibility because
# we original did not store this back into EXIF.
if metadata[item]:
part = os.path.splitext(metadata['original_name'])[0]
else:
# We didn't always store original_name so this is
# for backwards compatability.
# We want to remove the hardcoded date prefix we used
# to add to the name.
# This helps when re-running the program on file
# which were already processed.
part = re.sub(
'^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-',
'',
metadata['base_name']
)
if(len(part) == 0):
part = metadata['base_name']
# Lastly we want to sanitize the name
return re.sub(self.whitespace_regex, '_', part.strip())
elif item in 'custom':
# Fallback string
return mask[1:-1]
return ''
def get_path(self, metadata, db, subdirs=''):
"""path_format: {%Y-%d-%m}/%u{city}/{album}
Returns file path.
:returns: string"""
# if self.path_format is None:
# path_format = self.get_path_definition()
path_format = self.path_format
# self.cached_folder_path_definition = []
path = []
path_parts = path_format.split('/')
for path_part in path_parts:
this_parts = path_part.split('|')
# p = []
for this_part in this_parts:
# parts = ''
for item, mask in self.items.items():
matched = re.search(mask, this_part)
if matched:
# parts = re.split(mask, this_part)
# parts = this_part.split('%')[1:]
part = self.get_part(item, matched.group()[1:-1], metadata, db,
subdirs)
# Capitalization
umask = '%u' + mask
lmask = '%l' + mask
if re.search(umask, this_part):
this_part = re.sub(umask, part.upper(), this_part)
elif re.search(lmask, this_part):
this_part = re.sub(lmask, part.lower(), this_part)
else:
this_part = re.sub(mask, part, this_part)
if this_part:
# Check if all masks are substituted
if True in [c in this_part for c in '{}']:
self.logger.error(f'Format path part invalid: \
{this_part}')
sys.exit(1)
path.append(this_part.strip())
# We break as soon as we have a value to append
break
# Else we continue for fallbacks
return os.path.join(*path)
def get_date_from_string(self, string, user_regex=None): def get_date_from_string(self, string, user_regex=None):
# If missing datetime from EXIF data check if filename is in datetime format. # If missing datetime from EXIF data check if filename is in datetime format.
# For this use a user provided regex if possible. # For this use a user provided regex if possible.
@ -811,11 +1003,8 @@ class FileSystem(object):
self.max_deep): self.max_deep):
if dirname == os.path.join(path, '.elodie'): if dirname == os.path.join(path, '.elodie'):
continue continue
if self.keep_folders is not None:
if level < self.keep_folders: subdirs = os.path.join(subdirs, os.path.basename(dirname))
subdirs = ''
else:
subdirs = os.path.join(subdirs, os.path.basename(dirname))
for filename in filenames: for filename in filenames:
# If file extension is in `extensions` # If file extension is in `extensions`
@ -850,16 +1039,14 @@ class FileSystem(object):
if media: if media:
metadata = media.get_metadata() metadata = media.get_metadata()
# Get the destination path according to metadata # Get the destination path according to metadata
directory_name = self.get_folder_path(metadata, db) file_path = self.get_path(metadata, db, subdirs=subdirs)
file_name = self.get_file_name(metadata)
else: else:
# Keep same directory structure # Keep same directory structure
directory_name = os.path.dirname(os.path.relpath(src_path, file_path = os.path.relpath(src_path, path)
path))
file_name = os.path.basename(src_path)
dest_directory = os.path.join(destination, directory_name) dest_directory = os.path.join(destination,
dest_path = os.path.join(dest_directory, subdirs, file_name) os.path.dirname(file_path))
dest_path = os.path.join(destination, file_path)
self.create_directory(dest_directory) self.create_directory(dest_directory)
result = self.sort_file(src_path, dest_path, remove_duplicates) result = self.sort_file(src_path, dest_path, remove_duplicates)
if result: if result:

View File

@ -111,7 +111,7 @@ class Media():
return None return None
source = self.source source = self.source
return os.path.splitext(source)[1][1:].lower() return os.path.splitext(source)[1][1:]
def get_metadata(self, update_cache=False, album_from_folder=False): def get_metadata(self, update_cache=False, album_from_folder=False):
@ -146,7 +146,7 @@ class Media():
'mime_type': self.get_mimetype(), 'mime_type': self.get_mimetype(),
'original_name': self.get_original_name(), 'original_name': self.get_original_name(),
'base_name': os.path.basename(os.path.splitext(source)[0]), 'base_name': os.path.basename(os.path.splitext(source)[0]),
'extension': self.get_extension(), 'ext': self.get_extension(),
'directory_path': os.path.dirname(source) 'directory_path': os.path.dirname(source)
} }

View File

@ -61,7 +61,7 @@ class GooglePhotos(PluginBase):
self.session = None self.session = None
def after(self, file_path, destination_folder, final_file_path, metadata): def after(self, file_path, destination_folder, final_file_path, metadata):
extension = metadata['extension'] extension = metadata['ext']
if(extension in Photo.extensions or extension in Video.extensions): if(extension in Photo.extensions or extension in Video.extensions):
self.log(u'Added {} to db.'.format(final_file_path)) self.log(u'Added {} to db.'.format(final_file_path))
self.db.set(final_file_path, metadata['original_name']) self.db.set(final_file_path, metadata['original_name'])

View File

@ -778,7 +778,8 @@ def test_sort_files():
temporary_folder_destination, folder_destination = helper.create_working_folder() temporary_folder_destination, folder_destination = helper.create_working_folder()
db = Db(folder) db = Db(folder)
filesystem = FileSystem() path_format = os.path.join(constants.default_path, constants.default_name)
filesystem = FileSystem(path_format=path_format)
filenames = ['photo.png', 'plain.jpg', 'text.txt', 'withoutextension'] filenames = ['photo.png', 'plain.jpg', 'text.txt', 'withoutextension']
for src_file in filenames: for src_file in filenames: