Change file paths definitions and parsing

This commit is contained in:
Cédric Leporcq 2021-07-29 18:42:31 +02:00
parent b4a8cc88cb
commit 9f6eb52ebc
8 changed files with 257 additions and 46 deletions

View File

@ -1,16 +1,16 @@
[Directory]
[Path]
# day_begins: what hour of the day you want the day to begin (only for
# classification purposes). Defaults at 0 as midnight. Can be
# used to group early morning photos with the previous day. Must
# be a number between 0-23')
day_begins=4
location=%city, %state
year=%Y
month=%B
# date=%Y
# custom=%date %album
full_path=%year/%month/%location
dirs_path=%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder}
name={%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext}
[Exclusions]
name1=.directory
name2=.DS_Store
[Geolocation]
# geocoder: Nominatim or MapQuest

View File

@ -19,6 +19,7 @@ from elodie import constants
from elodie import geolocation
from elodie import log
from elodie.compatability import _decode
from elodie import config
from elodie.config import load_config
from elodie.filesystem import FileSystem
from elodie.gui import CompareImageApp
@ -182,8 +183,6 @@ def _import(destination, source, file, album_from_folder, trash,
@click.option('--ignore-tags', '-i', default=set(), multiple=True,
help='Specific tags or group that will be ignored when\
searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' )
@click.option('--keep-folders', '-k', default=None,
help='Folder from given level are keep back')
@click.option('--max-deep', '-m', default=None,
help='Maximum level to proceed. Number from 0 to desired level.')
@click.option('--remove-duplicates', '-r', default=False, is_flag=True,
@ -193,7 +192,7 @@ def _import(destination, source, file, album_from_folder, trash,
help='True if you want to see details of file processing')
@click.argument('paths', required=True, nargs=-1, type=click.Path())
def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignore_tags,
keep_folders, max_deep, remove_duplicates, verbose, paths):
max_deep, remove_duplicates, verbose, paths):
"""Sort files or directories by reading their EXIF and organizing them
according to config.ini preferences.
"""
@ -210,9 +209,6 @@ def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignor
else:
constants.debug = logging.ERROR
if keep_folders is not None:
keep_folders = int(keep_folders)
if max_deep is not None:
max_deep = int(max_deep)
@ -232,24 +228,26 @@ def _sort(debug, dry_run, destination, copy, exclude_regex, filter_by_ext, ignor
if not os.path.exists(destination):
logger.error(f'Directory {destination} does not exist')
conf = config.load_config(constants.CONFIG_FILE)
path_format = config.get_path_definition(conf)
# if no exclude list was passed in we check if there's a config
if len(exclude_regex) == 0:
config = load_config(constants.CONFIG_FILE)
if 'Exclusions' in config:
exclude_regex = [value for key, value in config.items('Exclusions')]
if 'Exclusions' in conf:
exclude_regex = [value for key, value in conf.items('Exclusions')]
exclude_regex_list = set(exclude_regex)
# Initialize Db
db = Db(destination)
if 'Directory' in config and 'day_begins' in config['Directory']:
config_directory = config['Directory']
if 'Directory' in conf and 'day_begins' in conf['Directory']:
config_directory = conf['Directory']
day_begins = config_directory['day_begins']
else:
day_begins = 0
filesystem = FileSystem(mode, dry_run, exclude_regex_list, logger,
day_begins, filter_by_ext, keep_folders, max_deep)
filesystem = FileSystem(day_begins, dry_run, exclude_regex_list,
filter_by_ext, logger, max_deep, mode, path_format)
summary, has_errors = filesystem.sort_files(paths, destination, db,
remove_duplicates, ignore_tags)

View File

@ -1,6 +1,7 @@
"""Load config file as a singleton."""
from configparser import RawConfigParser
from os import path
from elodie import constants
def load_config(file):
@ -33,3 +34,24 @@ def load_config_for_plugin(name, file):
return config[key]
return {}
def get_path_definition(config):
"""Returns a list of folder definitions.
Each element in the list represents a folder.
Fallback folders are supported and are nested lists.
:returns: string
"""
if 'Path' in config:
if 'format' in config['Path']:
return config['Path']['format']
elif 'dirs_path' and 'name' in config['Path']:
return path.join(config['Path']['dirs_path'],
config['Path']['name'])
return path.join(constants.default_path, constants.default_name)

View File

@ -16,6 +16,9 @@ if (
):
application_directory = environ['ELODIE_APPLICATION_DIRECTORY']
default_path = '{%Y-%m-%b}/{album}|{city}|{"Unknown Location"}'
default_name = '{%Y-%m-%d_%H-%M-%S}-{original_name}-{title}.{ext}'
#: File in which to store details about media Elodie has seen.
hash_db = 'hash.json'
# TODO will be removed eventualy later

View File

@ -9,7 +9,9 @@ import filecmp
import hashlib
import logging
import os
import pathlib
import re
import sys
import shutil
import time
from datetime import datetime, timedelta
@ -29,9 +31,9 @@ from elodie.summary import Summary
class FileSystem(object):
"""A class for interacting with the file system."""
def __init__(self, mode='copy', dry_run=False, exclude_regex_list=set(),
logger=logging.getLogger(), day_begins=0, filter_by_ext=(),
keep_folders=None, max_deep=None):
def __init__(self, day_begins=0, dry_run=False, exclude_regex_list=set(),
filter_by_ext=(), logger=logging.getLogger(), max_deep=None,
mode='copy', path_format=None):
# The default folder path is along the lines of 2017-06-17_01-04-14-dsc_1234-some-title.jpg
self.default_file_name_definition = {
'date': '%Y-%m-%d_%H-%M-%S',
@ -51,6 +53,26 @@ class FileSystem(object):
# It captures some additional characters like the unicode checkmark \u2713.
# See build failures in Python3 here.
# https://travis-ci.org/jmathai/elodie/builds/483012902
self.items = {
'album': '{album}',
'basename': '{basename}',
'camera_make': '{camera_make}',
'camera_model': '{camera_model}',
'city': '{city}',
'custom': '{".*"}',
'country': '{country}',
# 'folder': '{folder[<>]?[-+]?[1-9]?}',
'folder': '{folder}',
'folders': '{folders(\[[0-9:]{0,3}\])?}',
'location': '{location}',
'ext': '{ext}',
'original_name': '{original_name}',
'state': '{state}',
'title': '{title}',
'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string
}
self.whitespace_regex = '[ \t\n\r\f\v]+'
self.dry_run = dry_run
@ -60,8 +82,12 @@ class FileSystem(object):
self.summary = Summary()
self.day_begins = day_begins
self.filter_by_ext = filter_by_ext
self.keep_folders = keep_folders
self.max_deep = max_deep
if path_format:
self.path_format = path_format
else:
self.path_format = os.path.join(constants.default_path,
constants.default_name)
# Instantiate a plugins object
self.plugins = Plugins()
@ -201,7 +227,7 @@ class FileSystem(object):
# [
# [('date', '%Y-%m-%d_%H-%M-%S')],
# [('original_name', '')], [('title', '')], // contains a fallback
# [('extension', '')]
# [('ext', '')]
# ]
name_template, definition = self.get_file_name_definition()
@ -232,8 +258,12 @@ class FileSystem(object):
)
break
elif part in ('album', 'extension', 'title'):
if metadata[part]:
this_value = re.sub(self.whitespace_regex, '-', metadata[part].strip())
key = part
if part == 'extension':
key = 'ext'
if metadata[key]:
this_value = re.sub(self.whitespace_regex, '-',
metadata[key].strip())
break
elif part in ('original_name'):
# First we check if we have metadata['original_name'].
@ -297,7 +327,7 @@ class FileSystem(object):
[
('date', '%Y-%m-%d'),
[
('location', '%city'),
('default', '%city'),
('album', ''),
('"Unknown Location", '')
]
@ -320,7 +350,7 @@ class FileSystem(object):
# Find all subpatterns of name that map to the components of the file's
# name.
# I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'extension'] #noqa
# I.e. %date-%original_name-%title.%extension => ['date', 'original_name', 'title', 'ext'] #noqa
path_parts = re.findall(
'(\%[a-z_]+)',
config_file['name']
@ -357,7 +387,7 @@ class FileSystem(object):
[
('date', '%Y-%m-%d'),
[
('location', '%city'),
('default', '%city'),
('album', ''),
('"Unknown Location", '')
]
@ -407,6 +437,7 @@ class FileSystem(object):
return self.cached_folder_path_definition
def get_folder_path(self, metadata, db, path_parts=None):
"""Given a media's metadata this function returns the folder path as a string.
@ -433,6 +464,167 @@ class FileSystem(object):
break
return os.path.join(*path)
def get_location_part(self, mask, part, place_name):
"""Takes a mask for a location and interpolates the actual place names.
Given these parameters here are the outputs.
mask = 'city'
part = 'city-random'
place_name = {'city': u'Sunnyvale'}
return 'Sunnyvale'
mask = 'location'
part = 'location'
place_name = {'default': u'Sunnyvale', 'city': u'Sunnyvale'}
return 'Sunnyvale'
:returns: str
"""
folder_name = part
if(mask in place_name):
replace_target = mask
replace_with = place_name[mask]
else:
replace_target = part
replace_with = ''
folder_name = folder_name.replace(
replace_target,
replace_with,
)
return folder_name
def get_part(self, item, mask, metadata, db, subdirs):
"""Parse a specific folder's name given a mask and metadata.
:param item: Name of the item as defined in the path (i.e. date from %date)
:param mask: Mask representing the template for the path (i.e. %city %state
:param metadata: Metadata dictionary.
:returns: str
"""
# Each item has its own custom logic and we evaluate a single item and return
# the evaluated string.
if item in ('basename'):
return os.path.basename(metadata['base_name'])
elif item is 'date':
date = self.get_date_taken(metadata)
# early morning photos can be grouped with previous day
date = self.check_for_early_morning_photos(date)
if date is not None:
return date.strftime(mask)
else:
return ''
elif item in ('location', 'city', 'state', 'country'):
place_name = geolocation.place_name(
metadata['latitude'],
metadata['longitude'],
db
)
if item == 'location':
mask = 'default'
return self.get_location_part(mask, item, place_name)
elif item in ('folder'):
return os.path.basename(subdirs)
elif item in ('folders'):
folders = pathlib.Path(subdirs).parts
folders = eval(mask)
return os.path.join(*folders)
elif item in ('album','camera_make', 'camera_model', 'ext',
'title'):
if metadata[item]:
# return metadata[item]
return re.sub(self.whitespace_regex, '_', metadata[item].strip())
elif item in ('original_name'):
# First we check if we have metadata['original_name'].
# We have to do this for backwards compatibility because
# we original did not store this back into EXIF.
if metadata[item]:
part = os.path.splitext(metadata['original_name'])[0]
else:
# We didn't always store original_name so this is
# for backwards compatability.
# We want to remove the hardcoded date prefix we used
# to add to the name.
# This helps when re-running the program on file
# which were already processed.
part = re.sub(
'^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-',
'',
metadata['base_name']
)
if(len(part) == 0):
part = metadata['base_name']
# Lastly we want to sanitize the name
return re.sub(self.whitespace_regex, '_', part.strip())
elif item in 'custom':
# Fallback string
return mask[1:-1]
return ''
def get_path(self, metadata, db, subdirs=''):
"""path_format: {%Y-%d-%m}/%u{city}/{album}
Returns file path.
:returns: string"""
# if self.path_format is None:
# path_format = self.get_path_definition()
path_format = self.path_format
# self.cached_folder_path_definition = []
path = []
path_parts = path_format.split('/')
for path_part in path_parts:
this_parts = path_part.split('|')
# p = []
for this_part in this_parts:
# parts = ''
for item, mask in self.items.items():
matched = re.search(mask, this_part)
if matched:
# parts = re.split(mask, this_part)
# parts = this_part.split('%')[1:]
part = self.get_part(item, matched.group()[1:-1], metadata, db,
subdirs)
# Capitalization
umask = '%u' + mask
lmask = '%l' + mask
if re.search(umask, this_part):
this_part = re.sub(umask, part.upper(), this_part)
elif re.search(lmask, this_part):
this_part = re.sub(lmask, part.lower(), this_part)
else:
this_part = re.sub(mask, part, this_part)
if this_part:
# Check if all masks are substituted
if True in [c in this_part for c in '{}']:
self.logger.error(f'Format path part invalid: \
{this_part}')
sys.exit(1)
path.append(this_part.strip())
# We break as soon as we have a value to append
break
# Else we continue for fallbacks
return os.path.join(*path)
def get_date_from_string(self, string, user_regex=None):
# If missing datetime from EXIF data check if filename is in datetime format.
# For this use a user provided regex if possible.
@ -811,11 +1003,8 @@ class FileSystem(object):
self.max_deep):
if dirname == os.path.join(path, '.elodie'):
continue
if self.keep_folders is not None:
if level < self.keep_folders:
subdirs = ''
else:
subdirs = os.path.join(subdirs, os.path.basename(dirname))
subdirs = os.path.join(subdirs, os.path.basename(dirname))
for filename in filenames:
# If file extension is in `extensions`
@ -850,16 +1039,14 @@ class FileSystem(object):
if media:
metadata = media.get_metadata()
# Get the destination path according to metadata
directory_name = self.get_folder_path(metadata, db)
file_name = self.get_file_name(metadata)
file_path = self.get_path(metadata, db, subdirs=subdirs)
else:
# Keep same directory structure
directory_name = os.path.dirname(os.path.relpath(src_path,
path))
file_name = os.path.basename(src_path)
file_path = os.path.relpath(src_path, path)
dest_directory = os.path.join(destination, directory_name)
dest_path = os.path.join(dest_directory, subdirs, file_name)
dest_directory = os.path.join(destination,
os.path.dirname(file_path))
dest_path = os.path.join(destination, file_path)
self.create_directory(dest_directory)
result = self.sort_file(src_path, dest_path, remove_duplicates)
if result:

View File

@ -111,7 +111,7 @@ class Media():
return None
source = self.source
return os.path.splitext(source)[1][1:].lower()
return os.path.splitext(source)[1][1:]
def get_metadata(self, update_cache=False, album_from_folder=False):
@ -146,7 +146,7 @@ class Media():
'mime_type': self.get_mimetype(),
'original_name': self.get_original_name(),
'base_name': os.path.basename(os.path.splitext(source)[0]),
'extension': self.get_extension(),
'ext': self.get_extension(),
'directory_path': os.path.dirname(source)
}

View File

@ -61,7 +61,7 @@ class GooglePhotos(PluginBase):
self.session = None
def after(self, file_path, destination_folder, final_file_path, metadata):
extension = metadata['extension']
extension = metadata['ext']
if(extension in Photo.extensions or extension in Video.extensions):
self.log(u'Added {} to db.'.format(final_file_path))
self.db.set(final_file_path, metadata['original_name'])

View File

@ -778,7 +778,8 @@ def test_sort_files():
temporary_folder_destination, folder_destination = helper.create_working_folder()
db = Db(folder)
filesystem = FileSystem()
path_format = os.path.join(constants.default_path, constants.default_name)
filesystem = FileSystem(path_format=path_format)
filenames = ['photo.png', 'plain.jpg', 'text.txt', 'withoutextension']
for src_file in filenames: