From da764e94e63f5345b4dc0e66053a6c2f1d3274e9 Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Sat, 17 Apr 2021 05:08:58 +0200 Subject: [PATCH] Change date taken behaviour and use datetime format --- elodie.py | 2 +- elodie/filesystem.py | 158 +++++++++++++++++++++++-------- elodie/media/base.py | 4 +- elodie/media/media.py | 53 +++++++++-- elodie/media/photo.py | 47 --------- elodie/media/video.py | 59 ++---------- elodie/tests/filesystem_test.py | 11 +++ elodie/tests/media/audio_test.py | 18 ++-- elodie/tests/media/photo_test.py | 48 ++++------ 9 files changed, 216 insertions(+), 184 deletions(-) diff --git a/elodie.py b/elodie.py index ba13d1d..b0206b5 100755 --- a/elodie.py +++ b/elodie.py @@ -232,7 +232,7 @@ def update_time(media, file_path, time_string): sys.exit(1) time = datetime.strptime(time_string, time_format) - media.set_date_taken(time) + media.set_date_original(time) return True diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 125047f..3c3e8df 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -10,6 +10,7 @@ import os import re import shutil import time +from datetime import datetime from elodie import compatability from elodie import geolocation @@ -18,7 +19,7 @@ from elodie.config import load_config from elodie import constants from elodie.localstorage import Db -from elodie.media.base import Base, get_all_subclasses +from elodie.media import base from elodie.plugins.plugins import Plugins class FileSystem(object): @@ -95,7 +96,7 @@ class FileSystem(object): # If extensions is None then we get all supported extensions if not extensions: extensions = set() - subclasses = get_all_subclasses(Base) + subclasses = base.get_all_subclasses() for cls in subclasses: extensions.update(cls.extensions) @@ -155,7 +156,11 @@ class FileSystem(object): for this_part in parts: part, mask = this_part if part in ('date', 'day', 'month', 'year'): - this_value = time.strftime(mask, metadata['date_taken']) + date = self.get_date_taken(metadata) + if date is not None: + this_value = date.strftime(mask) + else: + this_value='' break elif part in ('location', 'city', 'state', 'country'): place_name = geolocation.place_name( @@ -371,6 +376,98 @@ class FileSystem(object): break return os.path.join(*path) + def get_date_from_string(self, string, user_regex=None): + # If missing datetime from EXIF data check if filename is in datetime format. + # For this use a user provided regex if possible. + # Otherwise assume a filename such as IMG_20160915_123456.jpg as default. + + if user_regex is not None: + matches = re.findall(user_regex, string) + else: + regex = { + # regex to match date format type %Y%m%d, %y%m%d, %d%m%Y, + # etc... + 'a': re.compile( + r'.*[_-]?(?P\d{4})[_-]?(?P\d{2})[_-]?(?P\d{2})[_-]?(?P\d{2})[_-]?(?P\d{2})[_-]?(?P\d{2})'), + 'b': re.compile ( + '[-_./](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]'), + # not very accurate + 'c': re.compile ( + '[-_./](?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]'), + 'd': re.compile ( + '[-_./](?P\d{2})[-_.](?P\d{2})[-_.](?P\d{4})[-_./]') + } + + matches = [] + for i, rx in regex.items(): + match = re.findall(rx, string) + if match != []: + if i == 'c': + match = [('20'+match[0][0],match[0][1],match[0][2])] + elif i == 'd': + # reorder items + match = [(match[0][2],match[0][1],match[0][0])] + # matches = match + matches + if len(match) != 1: + # The time string is not uniq + continue + matches.append((match[0], rx)) + # We want only the first match for the moment + break + + # check if there is only one result + if len(set(matches)) == 1: + try: + # Convert str to int + date_object = tuple(map(int, matches[0][0])) + + time = False + if len(date_object) > 3: + time = True + + date = datetime(*date_object) + except (KeyError, ValueError): + return None + + return date + + return None + + + def get_date_taken(self, metadata): + ''' + Get the date taken from metadata or filename + :returns: datetime or None. + ''' + if metadata is None: + return None + + basename = metadata['base_name'] + date_original = metadata['date_original'] + if metadata['original_name'] is not None: + date_filename = self.get_date_from_string(metadata['original_name']) + else: + date_filename = self.get_date_from_string(basename) + + date_created = metadata['date_created'] + if metadata['date_original'] is not None: + if (date_filename is not None and + date_filename != date_original): + log.warn(f"{basename} time mark is different from {date_original}") + # TODO ask for keep date taken, filename time, or neither + return metadata['date_original'] + elif True: + if date_filename is not None: + if date_created is not None and date_filename > date_created: + log.warn(f"{basename} time mark is more recent than {date_created}") + return date_filename + if True: + if date_created is not None: + # TODO warm and ask for confirmation + return date_created + elif metadata['date_modified'] is not None: + return metadata['date_modified'] + def get_dynamic_path(self, part, mask, metadata): """Parse a specific folder's name given a mask and metadata. @@ -382,6 +479,16 @@ class FileSystem(object): # Each part has its own custom logic and we evaluate a single part and return # the evaluated string. + if part in ('date'): + # If Directory is in the config we assume full_path and its + # corresponding values (date, location) are also present + config_directory = self.default_folder_path_definition + if('Directory' in config): + config_directory = config['Directory'] + # Get date mask from config + mask = '' + if 'date' in config_directory: + mask = config_directory['date'] if part in ('custom'): custom_parts = re.findall('(%[a-z_]+)', mask) folder = mask @@ -391,19 +498,12 @@ class FileSystem(object): self.get_dynamic_path(i[1:], i, metadata) ) return folder - elif part in ('date'): - config = load_config(constants.CONFIG_FILE) - # If Directory is in the config we assume full_path and its - # corresponding values (date, location) are also present - config_directory = self.default_folder_path_definition - if('Directory' in config): - config_directory = config['Directory'] - date_mask = '' - if 'date' in config_directory: - date_mask = config_directory['date'] - return time.strftime(date_mask, metadata['date_taken']) - elif part in ('day', 'month', 'year'): - return time.strftime(mask, metadata['date_taken']) + elif part in ('date', 'day', 'month', 'year'): + date = self.get_date_taken(metadata) + if date is not None: + return date.strftime(mask) + else: + return '' elif part in ('location', 'city', 'state', 'country'): place_name = geolocation.place_name( metadata['latitude'], @@ -576,7 +676,6 @@ class FileSystem(object): if(exif_original_file_exists is True): # We can remove it as we don't need the initial file. os.remove(exif_original_file) - os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: if(exif_original_file_exists is True): # Move the newly processed file with any updated tags to the @@ -590,8 +689,8 @@ class FileSystem(object): # Set the utime based on what the original file contained # before we made any changes. # Then set the utime on the destination file based on metadata. - os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime)) - self.set_utime_from_metadata(metadata, dest_path) + date_taken = self.get_date_taken(metadata) + self.set_utime_from_metadata(date_taken, dest_path) db = Db() db.add_hash(checksum, dest_path) @@ -607,32 +706,15 @@ class FileSystem(object): return dest_path - def set_utime_from_metadata(self, metadata, file_path): + def set_utime_from_metadata(self, date_taken, file_path): """ Set the modification time on the file based on the file name. """ # Initialize date taken to what's returned from the metadata function. + os.utime(file_path, (int(datetime.now().timestamp()), int(date_taken.timestamp()))) # If the folder and file name follow a time format of # YYYY-MM-DD_HH-MM-SS-IMG_0001.JPG then we override the date_taken - date_taken = metadata['date_taken'] - base_name = metadata['base_name'] - year_month_day_match = re.search( - '^(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})', - base_name - ) - if(year_month_day_match is not None): - (year, month, day, hour, minute, second) = year_month_day_match.groups() # noqa - date_taken = time.strptime( - '{}-{}-{} {}:{}:{}'.format(year, month, day, hour, minute, second), # noqa - '%Y-%m-%d %H:%M:%S' - ) - os.utime(file_path, (time.time(), time.mktime(date_taken))) - else: - # We don't make any assumptions about time zones and - # assume local time zone. - date_taken_in_seconds = time.mktime(date_taken) - os.utime(file_path, (time.time(), (date_taken_in_seconds))) def should_exclude(self, path, regex_list=set(), needs_compiled=False): if(len(regex_list) == 0): diff --git a/elodie/media/base.py b/elodie/media/base.py index 83f29c5..3eae82d 100644 --- a/elodie/media/base.py +++ b/elodie/media/base.py @@ -90,7 +90,9 @@ class Base(object): source = self.source self.metadata = { - 'date_taken': self.get_date_taken(), + 'date_original': self.get_date_attribute(self.date_original), + 'date_created': self.get_date_attribute(self.date_created), + 'date_modified': self.get_date_attribute(self.date_modified), 'camera_make': self.get_camera_make(), 'camera_model': self.get_camera_model(), 'latitude': self.get_coordinate('latitude'), diff --git a/elodie/media/media.py b/elodie/media/media.py index 788b670..b9c82ec 100644 --- a/elodie/media/media.py +++ b/elodie/media/media.py @@ -14,6 +14,9 @@ import os import six # load modules +from elodie import log +from dateutil.parser import parse +import re from elodie.external.pyexiftool import ExifTool from elodie.media.base import Base @@ -33,13 +36,9 @@ class Media(Base): def __init__(self, source=None): super(Media, self).__init__(source) - self.exif_map = { - 'date_taken': [ - 'EXIF:DateTimeOriginal', - 'EXIF:CreateDate', - 'EXIF:ModifyDate' - ] - } + self.date_original = ['EXIF:DateTimeOriginal'] + self.date_created = ['EXIF:CreateDate'] + self.date_modified = ['File:FileModifyDate'] self.camera_make_keys = ['EXIF:Make', 'QuickTime:Make'] self.camera_model_keys = ['EXIF:Model', 'QuickTime:Model'] self.album_keys = ['XMP-xmpDM:Album', 'XMP:Album'] @@ -132,6 +131,42 @@ class Media(Base): return self.exif_metadata + + def get_date_attribute(self, tag): + """Get a date attribute. + :returns: time object or None + """ + exif = self.get_exiftool_attributes() + if not exif: + return None + # We need to parse a string from EXIF into a timestamp. + # EXIF DateTimeOriginal and EXIF DateTime are both stored + # in %Y:%m:%d %H:%M:%S format + # we split on a space and then r':|-' -> convert to int -> .timetuple() + # the conversion in the local timezone + # EXIF DateTime is already stored as a timestamp + # Sourced from https://github.com/photo/frontend/blob/master/src/libraries/models/Photo.php#L500 # noqa + for key in tag: + try: + if(key in exif): + # correct nasty formated date + regex = re.compile('(\d{4}):(\d{2}):(\d{2})') + if(re.match(regex , exif[key]) is not None): # noqa + exif[key] = re.sub(regex ,'\g<1>-\g<2>-\g<3>',exif[key]) + return parse(exif[key]) + # if(re.match('\d{4}(-|:)\d{2}(-|:)\d{2}', exif[key]) is not None): # noqa + # dt, tm = exif[key].split(' ') + # dt_list = compile(r'-|:').split(dt) + # dt_list = dt_list + compile(r'-|:').split(tm) + # dt_list = map(int, dt_list) + # return datetime(*dt_list) + except BaseException or dateutil.parser._parser.ParserError as e: + log.error(e) + return None + + return None + + def get_camera_make(self): """Get the camera make stored in EXIF. @@ -228,7 +263,7 @@ class Media(Base): return status - def set_date_taken(self, time): + def set_date_original(self, time): """Set the date/time a photo was taken. :param datetime time: datetime object of when the photo was taken @@ -239,7 +274,7 @@ class Media(Base): tags = {} formatted_time = time.strftime('%Y:%m:%d %H:%M:%S') - for key in self.exif_map['date_taken']: + for key in self.date_original: tags[key] = formatted_time status = self.__set_tags(tags) diff --git a/elodie/media/photo.py b/elodie/media/photo.py index 07f3e04..7ba7244 100644 --- a/elodie/media/photo.py +++ b/elodie/media/photo.py @@ -9,12 +9,8 @@ from __future__ import absolute_import import imghdr import os -import re import time -from datetime import datetime -from re import compile -from elodie import log from .media import Media @@ -45,49 +41,6 @@ class Photo(Media): except ImportError: pass - def get_date_taken(self): - """Get the date which the photo was taken. - - The date value returned is defined by the min() of mtime and ctime. - - :returns: time object or None for non-photo files or 0 timestamp - """ - if(not self.is_valid()): - return None - - source = self.source - seconds_since_epoch = min(os.path.getmtime(source), os.path.getctime(source)) # noqa - - exif = self.get_exiftool_attributes() - if not exif: - return seconds_since_epoch - - # We need to parse a string from EXIF into a timestamp. - # EXIF DateTimeOriginal and EXIF DateTime are both stored - # in %Y:%m:%d %H:%M:%S format - # we split on a space and then r':|-' -> convert to int -> .timetuple() - # the conversion in the local timezone - # EXIF DateTime is already stored as a timestamp - # Sourced from https://github.com/photo/frontend/blob/master/src/libraries/models/Photo.php#L500 # noqa - for key in self.exif_map['date_taken']: - try: - if(key in exif): - if(re.match('\d{4}(-|:)\d{2}(-|:)\d{2}', exif[key]) is not None): # noqa - dt, tm = exif[key].split(' ') - dt_list = compile(r'-|:').split(dt) - dt_list = dt_list + compile(r'-|:').split(tm) - dt_list = map(int, dt_list) - time_tuple = datetime(*dt_list).timetuple() - seconds_since_epoch = time.mktime(time_tuple) - break - except BaseException as e: - log.error(e) - pass - - if(seconds_since_epoch == 0): - return None - - return time.gmtime(seconds_since_epoch) def is_valid(self): """Check the file extension against valid file extensions. diff --git a/elodie/media/video.py b/elodie/media/video.py index a53d238..9af6317 100644 --- a/elodie/media/video.py +++ b/elodie/media/video.py @@ -32,13 +32,18 @@ class Video(Media): def __init__(self, source=None): super(Video, self).__init__(source) - self.exif_map['date_taken'] = [ + self.date_original = [ + 'EXIF:DateTimeOriginal', + 'H264:DateTimeOriginal' + ] + self.date_created = [ + 'EXIF:CreateDate', 'QuickTime:CreationDate', 'QuickTime:CreateDate', 'QuickTime:CreationDate-und-US', - 'QuickTime:MediaCreateDate', - 'H264:DateTimeOriginal' + 'QuickTime:MediaCreateDate' ] + self.date_modified = ['File:FileModifyDate'] self.title_key = 'XMP:DisplayName' self.latitude_keys = [ 'XMP:GPSLatitude', @@ -53,51 +58,3 @@ class Video(Media): self.latitude_ref_key = 'EXIF:GPSLatitudeRef' self.longitude_ref_key = 'EXIF:GPSLongitudeRef' self.set_gps_ref = False - - def get_date_taken(self): - """Get the date which the photo was taken. - - The date value returned is defined by the min() of mtime and ctime. - - :returns: time object or None for non-photo files or 0 timestamp - """ - if(not self.is_valid()): - return None - - source = self.source - seconds_since_epoch = min(os.path.getmtime(source), os.path.getctime(source)) # noqa - - exif = self.get_exiftool_attributes() - for date_key in self.exif_map['date_taken']: - if date_key in exif: - # Example date strings we want to parse - # 2015:01:19 12:45:11-08:00 - # 2013:09:30 07:06:05 - date = re.search('([0-9: ]+)([-+][0-9:]+)?', exif[date_key]) - if(date is not None): - date_string = date.group(1) - date_offset = date.group(2) - try: - exif_seconds_since_epoch = time.mktime( - datetime.strptime( - date_string, - '%Y:%m:%d %H:%M:%S' - ).timetuple() - ) - if(exif_seconds_since_epoch < seconds_since_epoch): - seconds_since_epoch = exif_seconds_since_epoch - if date_offset is not None: - offset_parts = date_offset[1:].split(':') - offset_seconds = int(offset_parts[0]) * 3600 - offset_seconds = offset_seconds + int(offset_parts[1]) * 60 # noqa - if date_offset[0] == '-': - seconds_since_epoch - offset_seconds - elif date_offset[0] == '+': - seconds_since_epoch + offset_seconds - except: - pass - - if(seconds_since_epoch == 0): - return None - - return time.gmtime(seconds_since_epoch) diff --git a/elodie/tests/filesystem_test.py b/elodie/tests/filesystem_test.py index 683807f..06575e1 100644 --- a/elodie/tests/filesystem_test.py +++ b/elodie/tests/filesystem_test.py @@ -1324,3 +1324,14 @@ full_path=%year/%album|%month|%"foo"/%month del load_config.config assert path_definition == expected, path_definition + +def test_get_date_taken_without_exif(): + filesystem = FileSystem() + source = helper.get_file('no-exif.jpg') + photo = Photo(source) + date_taken = filesystem.get_date_taken(photo.get_metadata()) + + date_modified = photo.get_metadata()['date_modified'] + + assert date_taken == date_modified, date_taken + diff --git a/elodie/tests/media/audio_test.py b/elodie/tests/media/audio_test.py index e46fea6..fef670e 100644 --- a/elodie/tests/media/audio_test.py +++ b/elodie/tests/media/audio_test.py @@ -72,11 +72,11 @@ def test_get_coordinate_longitude(): assert helper.isclose(coordinate, -95.3677), coordinate -def test_get_date_taken(): - audio = Audio(helper.get_file('audio.m4a')) - date_taken = audio.get_date_taken() +def test_get_date_original(): + media = Media(helper.get_file('audio.m4a')) + date_original = media.get_date_attribute('date_original') - assert date_taken == (2016, 1, 4, 5, 28, 15, 0, 4, 0), date_taken + assert date_original == (2016, 1, 4, 5, 28, 15, 0, 4, 0), date_original def test_get_exiftool_attributes(): audio = Video(helper.get_file('audio.m4a')) @@ -95,25 +95,25 @@ def test_is_not_valid(): assert not audio.is_valid() -def test_set_date_taken(): +def test_set_date_original(): temporary_folder, folder = helper.create_working_folder() origin = '%s/audio.m4a' % folder shutil.copyfile(helper.get_file('audio.m4a'), origin) - audio = Audio(origin) - status = audio.set_date_taken(datetime.datetime(2013, 9, 30, 7, 6, 5)) + media = Media(origin) + status = media.set_date_original(datetime.datetime(2013, 9, 30, 7, 6, 5)) assert status == True, status audio_new = Audio(origin) metadata = audio_new.get_metadata() - date_taken = metadata['date_taken'] + date_original = metadata['date_original'] shutil.rmtree(folder) - assert date_taken == (2013, 9, 30, 7, 6, 5, 0, 273, 0), metadata['date_taken'] + assert date_original == (2013, 9, 30, 7, 6, 5, 0, 273, 0), metadata['date_original'] def test_set_location(): temporary_folder, folder = helper.create_working_folder() diff --git a/elodie/tests/media/photo_test.py b/elodie/tests/media/photo_test.py index dd391ee..7bbcf35 100644 --- a/elodie/tests/media/photo_test.py +++ b/elodie/tests/media/photo_test.py @@ -122,21 +122,12 @@ def test_get_coordinates_with_null_coordinate(): assert latitude is None, latitude assert longitude is None, longitude -def test_get_date_taken(): - photo = Photo(helper.get_file('plain.jpg')) - date_taken = photo.get_date_taken() +def test_get_date_original(): + media = Media(helper.get_file('plain.jpg')) + date_original = media.get_date_attribute('date_original') - #assert date_taken == (2015, 12, 5, 0, 59, 26, 5, 339, 0), date_taken - assert date_taken == helper.time_convert((2015, 12, 5, 0, 59, 26, 5, 339, 0)), date_taken - -def test_get_date_taken_without_exif(): - source = helper.get_file('no-exif.jpg') - photo = Photo(source) - date_taken = photo.get_date_taken() - - date_taken_from_file = time.gmtime(min(os.path.getmtime(source), os.path.getctime(source))) - - assert date_taken == date_taken_from_file, date_taken + #assert date_original == (2015, 12, 5, 0, 59, 26, 5, 339, 0), date_original + assert date_original == helper.time_convert((2015, 12, 5, 0, 59, 26, 5, 339, 0)), date_original def test_get_camera_make(): photo = Photo(helper.get_file('with-location.jpg')) @@ -205,7 +196,7 @@ def test_set_album(): assert metadata_new['album'] == 'Test Album', metadata_new['album'] -def test_set_date_taken_with_missing_datetimeoriginal(): +def test_set_date_original_with_missing_datetimeoriginal(): # When datetimeoriginal (or other key) is missing we have to add it gh-74 # https://github.com/jmathai/elodie/issues/74 temporary_folder, folder = helper.create_working_folder() @@ -213,41 +204,42 @@ def test_set_date_taken_with_missing_datetimeoriginal(): origin = '%s/photo.jpg' % folder shutil.copyfile(helper.get_file('no-exif.jpg'), origin) - photo = Photo(origin) - status = photo.set_date_taken(datetime(2013, 9, 30, 7, 6, 5)) + media = Media(origin) + status = media.set_date_original(datetime.now()) assert status == True, status photo_new = Photo(origin) metadata = photo_new.get_metadata() - date_taken = metadata['date_taken'] + date_original = metadata['date_original'] shutil.rmtree(folder) - #assert date_taken == (2013, 9, 30, 7, 6, 5, 0, 273, 0), metadata['date_taken'] - assert date_taken == helper.time_convert((2013, 9, 30, 7, 6, 5, 0, 273, 0)), metadata['date_taken'] + #assert date_original == (2013, 9, 30, 7, 6, 5, 0, 273, 0), metadata['date_original'] + # assert date_original == helper.time_convert((2013, 9, 30, 7, 6, 5, 0, 273, 0)), metadata['date_original'] + assert date_original == datetime.now(), metadata['date_original'] -def test_set_date_taken(): +def test_set_date_original(): temporary_folder, folder = helper.create_working_folder() origin = '%s/photo.jpg' % folder shutil.copyfile(helper.get_file('plain.jpg'), origin) - photo = Photo(origin) - status = photo.set_date_taken(datetime(2013, 9, 30, 7, 6, 5)) + media = Media(origin) + status = media.set_date_original(datetime(2013, 9, 30, 7, 6, 5)) assert status == True, status photo_new = Photo(origin) metadata = photo_new.get_metadata() - date_taken = metadata['date_taken'] + date_original = metadata['date_original'] shutil.rmtree(folder) - #assert date_taken == (2013, 9, 30, 7, 6, 5, 0, 273, 0), metadata['date_taken'] - assert date_taken == helper.time_convert((2013, 9, 30, 7, 6, 5, 0, 273, 0)), metadata['date_taken'] + #assert date_original == (2013, 9, 30, 7, 6, 5, 0, 273, 0), metadata['date_original'] + assert date_original == helper.time_convert((2013, 9, 30, 7, 6, 5, 0, 273, 0)), metadata['date_original'] def test_set_location(): temporary_folder, folder = helper.create_working_folder() @@ -389,7 +381,7 @@ def _test_photo_type_get(type, date): shutil.rmtree(folder) - assert metadata['date_taken'] == helper.time_convert(date), '{} date {}'.format(type, metadata['date_taken']) + assert metadata['date_original'] == helper.time_convert(date), '{} date {}'.format(type, metadata['date_original']) def _test_photo_type_set(type, date): temporary_folder, folder = helper.create_working_folder() @@ -417,6 +409,6 @@ def _test_photo_type_set(type, date): shutil.rmtree(folder) - assert metadata['date_taken'] == helper.time_convert(date), '{} date {}'.format(type, metadata['date_taken']) + assert metadata['date_original'] == helper.time_convert(date), '{} date {}'.format(type, metadata['date_original']) assert helper.isclose(metadata['latitude'], 11.1111111111), '{} lat {}'.format(type, metadata['latitude']) assert helper.isclose(metadata['longitude'], 99.9999999999), '{} lon {}'.format(type, metadata['latitude'])