From 9dfb870704a7a47c57fdcf7abf775da3c83d73d8 Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Sun, 8 Aug 2021 13:09:13 +0200 Subject: [PATCH] Fix get_path, get_part and change get_date_from_string functions --- dozo/filesystem.py | 135 ++++++++++++++++++++++---------------------- dozo/media/media.py | 4 +- 2 files changed, 69 insertions(+), 70 deletions(-) diff --git a/dozo/filesystem.py b/dozo/filesystem.py index 3896945..5e55c85 100644 --- a/dozo/filesystem.py +++ b/dozo/filesystem.py @@ -197,17 +197,20 @@ class FileSystem(object): # Each item has its own custom logic and we evaluate a single item and return # the evaluated string. - if item in ('basename'): - return os.path.basename(metadata['base_name']) - elif item is 'date': + part = '' + if item == 'basename': + part = os.path.basename(metadata['base_name']) + elif item == 'name': + # Remove date prefix added to the name. + part = metadata['base_name'] + for i, rx in self.match_date_from_string(metadata['base_name']): + part = re.sub(rx, '', part) + elif item == 'date': date = self.get_date_taken(metadata) # early morning photos can be grouped with previous day date = self.check_for_early_morning_photos(date) if date is not None: - return date.strftime(mask) - else: - return '' - + part = date.strftime(mask) elif item in ('location', 'city', 'state', 'country'): place_name = geolocation.place_name( metadata['latitude'], @@ -219,51 +222,33 @@ class FileSystem(object): if item == 'location': mask = 'default' - return self.get_location_part(mask, item, place_name) - elif item in ('folder'): - return os.path.basename(subdirs) + part = self.get_location_part(mask, item, place_name) + elif item == 'folder': + part = os.path.basename(subdirs) - elif item in ('folders'): + elif item == 'folders': folders = pathlib.Path(subdirs).parts folders = eval(mask) - return os.path.join(*folders) + part = os.path.join(*folders) elif item in ('album','camera_make', 'camera_model', 'ext', 'title'): if metadata[item]: - # return metadata[item] - return re.sub(self.whitespace_regex, '_', metadata[item].strip()) + part = metadata[item] elif item in ('original_name'): # First we check if we have metadata['original_name']. # We have to do this for backwards compatibility because # we original did not store this back into EXIF. if metadata[item]: part = os.path.splitext(metadata['original_name'])[0] - else: - # We didn't always store original_name so this is - # for backwards compatability. - # We want to remove the hardcoded date prefix we used - # to add to the name. - # This helps when re-running the program on file - # which were already processed. - part = re.sub( - '^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-', - '', - metadata['base_name'] - ) - if(len(part) == 0): - part = metadata['base_name'] - # Lastly we want to sanitize the name - return re.sub(self.whitespace_regex, '_', part.strip()) elif item in 'custom': # Fallback string - return mask[1:-1] + part = mask[1:-1] - return '' + return part - - def get_path(self, metadata, db, subdirs=''): + def get_path(self, metadata, db, subdirs='', whitespace_sub='_'): """path_format: {%Y-%d-%m}/%u{city}/{album} Returns file path. @@ -278,23 +263,26 @@ class FileSystem(object): # p = [] for this_part in this_parts: # parts = '' - for item, mask in self.items.items(): - matched = re.search(mask, this_part) + for item, regex in self.items.items(): + matched = re.search(regex, this_part) if matched: # parts = re.split(mask, this_part) # parts = this_part.split('%')[1:] part = self.get_part(item, matched.group()[1:-1], metadata, db, subdirs) + part = part.strip() + # Capitalization - umask = '%u' + mask - lmask = '%l' + mask - if re.search(umask, this_part): - this_part = re.sub(umask, part.upper(), this_part) - elif re.search(lmask, this_part): - this_part = re.sub(lmask, part.lower(), this_part) + u_regex = '%u' + regex + l_regex = '%l' + regex + if re.search(u_regex, this_part): + this_part = re.sub(u_regex, part.upper(), this_part) + elif re.search(l_regex, this_part): + this_part = re.sub(l_regex, part.lower(), this_part) else: - this_part = re.sub(mask, part, this_part) + this_part = re.sub(regex, part, this_part) + if this_part: # Check if all masks are substituted @@ -308,14 +296,18 @@ class FileSystem(object): break # Else we continue for fallbacks - return os.path.join(*path) + if(len(path[-1]) == 0): + path[-1] = metadata['base_name'] + path_string = os.path.join(*path) - def get_date_from_string(self, string, user_regex=None): - # If missing datetime from EXIF data check if filename is in datetime format. - # For this use a user provided regex if possible. - # Otherwise assume a filename such as IMG_20160915_123456.jpg as default. + if whitespace_sub != ' ': + # Lastly we want to sanitize the name + path_string = re.sub(self.whitespace_regex, whitespace_sub, path_string) + return path_string + + def match_date_from_string(self, string, user_regex=None): if user_regex is not None: matches = re.findall(user_regex, string) else: @@ -325,30 +317,38 @@ class FileSystem(object): 'a': re.compile( r'.*[_-]?(?P\d{4})[_-]?(?P\d{2})[_-]?(?P\d{2})[_-]?(?P\d{2})[_-]?(?P\d{2})[_-]?(?P\d{2})'), 'b': re.compile ( - '[-_./](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]'), + r'[-_./](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]'), # not very accurate 'c': re.compile ( - '[-_./](?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]'), + r'[-_./](?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]'), 'd': re.compile ( - '[-_./](?P\d{2})[-_.](?P\d{2})[-_.](?P\d{4})[-_./]') + r'[-_./](?P\d{2})[-_.](?P\d{2})[-_.](?P\d{4})[-_./]') } - matches = [] for i, rx in regex.items(): - match = re.findall(rx, string) - if match != []: - if i == 'c': - match = [('20'+match[0][0],match[0][1],match[0][2])] - elif i == 'd': - # reorder items - match = [(match[0][2],match[0][1],match[0][0])] - # matches = match + matches - if len(match) != 1: - # The time string is not uniq - continue - matches.append((match[0], rx)) - # We want only the first match for the moment - break + yield i, rx + + def get_date_from_string(self, string, user_regex=None): + # If missing datetime from EXIF data check if filename is in datetime format. + # For this use a user provided regex if possible. + # Otherwise assume a filename such as IMG_20160915_123456.jpg as default. + + matches = [] + for i, rx in self.match_date_from_string(string, user_regex): + match = re.findall(rx, string) + if match != []: + if i == 'c': + match = [('20' + match[0][0], match[0][1], match[0][2])] + elif i == 'd': + # reorder items + match = [(match[0][2], match[0][1], match[0][0])] + # matches = match + matches + if len(match) != 1: + # The time string is not uniq + continue + matches.append((match[0], rx)) + # We want only the first match for the moment + break # check if there is only one result if len(set(matches)) == 1: @@ -368,7 +368,6 @@ class FileSystem(object): return None - def get_date_taken(self, metadata): ''' Get the date taken from metadata or filename @@ -397,8 +396,8 @@ class FileSystem(object): self.logger.warn(f"{basename} time mark is more recent than {date_created}") return date_filename if True: + # TODO warm and ask for confirmation if date_created is not None: - # TODO warm and ask for confirmation return date_created elif metadata['date_modified'] is not None: return metadata['date_modified'] diff --git a/dozo/media/media.py b/dozo/media/media.py index bad6605..2ff8579 100644 --- a/dozo/media/media.py +++ b/dozo/media/media.py @@ -352,9 +352,9 @@ class Media(): try: if(key in exif): # correct nasty formated date - regex = re.compile('(\d{4}):(\d{2}):(\d{2})') + regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})') if(re.match(regex , exif[key]) is not None): # noqa - exif[key] = re.sub(regex ,'\g<1>-\g<2>-\g<3>',exif[key]) + exif[key] = re.sub(regex , r'\g<1>-\g<2>-\g<3>', exif[key]) return parse(exif[key]) # if(re.match('\d{4}(-|:)\d{2}(-|:)\d{2}', exif[key]) is not None): # noqa # dt, tm = exif[key].split(' ')