Retrieve date from filename using get_date_fron_string function

This commit is contained in:
Cédric Leporcq 2022-04-18 20:14:51 +02:00
parent b883d9ca36
commit 114187415f
4 changed files with 75 additions and 21 deletions

View File

@ -37,7 +37,13 @@ class ExifMetadata:
'QuickTime:CreationDate-und-US', 'QuickTime:CreationDate-und-US',
'QuickTime:MediaCreateDate', 'QuickTime:MediaCreateDate',
] ]
tags_keys['date_modified'] = ['File:FileModifyDate', 'QuickTime:ModifyDate'] tags_keys['date_modified'] = [
'EXIF:ModifyDate',
'QuickTime:ModifyDate',
]
tags_keys['file_modify_date'] = [
'File:FileModifyDate',
]
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make'] tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model'] tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album'] tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
@ -80,9 +86,13 @@ class ExifMetadata:
try: try:
# correct nasty formated date # correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})') regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})[-_ .]')
if re.match(regex, value) is not None: # noqa if re.match(regex, value):
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value) value = re.sub(regex, r'\g<1>-\g<2>-\g<3> ', value)
else:
regex = re.compile(r'(\d{4})(\d{2})(\d{2})[-_ .]?(\d{2})?(\d{2})?(\d{2})?')
if re.match(regex, value):
value = re.sub(regex, r'\g<1>-\g<2>-\g<3> \g<4>:\g<5>:\g<6>', value)
return parser.parse(value) return parser.parse(value)
except BaseException or parser._parser.ParserError as e: except BaseException or parser._parser.ParserError as e:
self.log.warning(e.args, value) self.log.warning(e.args, value)
@ -338,13 +348,15 @@ class Media(ReadExif):
stem = os.path.splitext(filename)[0] stem = os.path.splitext(filename)[0]
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
if self.metadata['original_name']: if self.metadata['original_name']:
date_filename = self.get_date_format(self.metadata['original_name']) date_filename = utils.get_date_from_string(self.metadata['original_name'])
else: else:
date_filename = self.get_date_format(stem) date_filename = utils.get_date_from_string(stem)
self.log.debug(f'date_filename: {date_filename}')
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
date_created = self.metadata['date_created'] date_created = self.metadata['date_created']
date_modified = self.metadata['date_modified'] date_modified = self.metadata['date_modified']
file_modify_date = self.metadata['file_modify_date']
if self.metadata['date_original']: if self.metadata['date_original']:
if date_filename and date_filename != date_original: if date_filename and date_filename != date_original:
self.log.warning( self.log.warning(
@ -372,6 +384,8 @@ class Media(ReadExif):
self.log.warning( self.log.warning(
f"{filename} time mark is more recent than {date_created}" f"{filename} time mark is more recent than {date_created}"
) )
return date_created
if self.interactive: if self.interactive:
choices = [ choices = [
(f"date filename:'{date_filename}'", date_filename), (f"date filename:'{date_filename}'", date_filename),
@ -383,18 +397,24 @@ class Media(ReadExif):
return date_filename return date_filename
if self.use_file_dates: if date_created:
if date_created: self.log.warning(
self.log.warning( f"use date created:{date_created} for {self.file_path}"
f"use date created:{date_created} for {self.file_path}" )
) return date_created
return date_created
if date_modified: if date_modified:
self.log.warning(
f"use date modified:{date_modified} for {self.file_path}"
)
return date_modified
if self.use_file_dates:
if file_modify_date:
self.log.warning( self.log.warning(
f"use date modified:{date_modified} for {self.file_path}" f"use date modified:{file_modify_date} for {self.file_path}"
) )
return date_modified return file_modify_date
elif self.interactive: elif self.interactive:
choices = [] choices = []
@ -404,6 +424,10 @@ class Media(ReadExif):
choices.append((f"date created:'{date_created}'", date_created)) choices.append((f"date created:'{date_created}'", date_created))
if date_modified: if date_modified:
choices.append((f"date modified:'{date_modified}'", date_modified)) choices.append((f"date modified:'{date_modified}'", date_modified))
if file_modify_date:
choices.append(
(f"date modified:'{file_modify_date}'", file_modify_date)
)
choices.append(("custom", None)) choices.append(("custom", None))
default = date_filename default = date_filename
return self._get_date_media_interactive(choices, default) return self._get_date_media_interactive(choices, default)

View File

@ -1,7 +1,10 @@
from math import radians, cos, sqrt from math import radians, cos, sqrt
from datetime import datetime from datetime import datetime
import hashlib import hashlib
import os
import platform
import re import re
import subprocess
def checksum(file_path, blocksize=65536): def checksum(file_path, blocksize=65536):
@ -83,14 +86,17 @@ def get_date_regex(user_regex=None):
return regex return regex
def get_date_from_string(string, user_regex=None): DATE_REGEX = get_date_regex()
def get_date_from_string(string):
"""Retrieve date stamp from string""" """Retrieve date stamp from string"""
# If missing datetime from EXIF data check if filename is in datetime format. # If missing datetime from EXIF data check if filename is in datetime format.
# For this use a user provided regex if possible. # For this use a user provided regex if possible.
# Otherwise assume a filename such as IMG_20160915_123456.jpg as default. # Otherwise assume a filename such as IMG_20160915_123456.jpg as default.
matches = [] matches = []
for i, regex in get_date_regex(user_regex).items(): for i, regex in DATE_REGEX.items():
match = re.findall(regex, string) match = re.findall(regex, string)
if match != []: if match != []:
if i == 'c': if i == 'c':
@ -118,6 +124,13 @@ def get_date_from_string(string, user_regex=None):
return date return date
def match_date_regex(regex, value):
if re.match(regex, value) is not None:
return re.sub(regex, r'\g<1>-\g<2>-\g<3>-', value)
return value
def split_part(dedup_regex, path_part, items=None): def split_part(dedup_regex, path_part, items=None):
""" """
Split part from regex Split part from regex
@ -161,9 +174,6 @@ def camel2snake(name):
r'(?!^)[A-Z]', lambda x: '_' + x.group(0).lower(), name[1:] r'(?!^)[A-Z]', lambda x: '_' + x.group(0).lower(), name[1:]
) )
import os
import platform
import subprocess
def open_file(path): def open_file(path):
if platform.system() == "Windows": if platform.system() == "Windows":

View File

@ -24,6 +24,7 @@ class TestSqlite:
'DateOriginal': datetime(2013, 3, 27), 'DateOriginal': datetime(2013, 3, 27),
'DateCreated': 'date_created', 'DateCreated': 'date_created',
'DateModified': 'date_modified', 'DateModified': 'date_modified',
'FileModifyDate': 'file_modify_date',
'CameraMake': 'camera_make', 'CameraMake': 'camera_make',
'CameraModel': 'camera_model', 'CameraModel': 'camera_model',
'OriginalName':'original_name', 'OriginalName':'original_name',
@ -64,7 +65,24 @@ class TestSqlite:
def test_add_metadata_data(self): def test_add_metadata_data(self):
result = tuple(self.sqlite.cur.execute("""select * from metadata where result = tuple(self.sqlite.cur.execute("""select * from metadata where
rowid=1""").fetchone()) rowid=1""").fetchone())
assert result == ('file_path', 'checksum', 'album', 'title', 2, '2012-03-27 00:00:00', '2013-03-27 00:00:00', 'date_created', 'date_modified', 'camera_make', 'camera_model', 'original_name', 'src_path', 'subdirs', 'filename') assert result == (
'file_path',
'checksum',
'album',
'title',
2,
'2012-03-27 00:00:00',
'2013-03-27 00:00:00',
'date_created',
'date_modified',
'file_modify_date',
'camera_make',
'camera_model',
'original_name',
'src_path',
'subdirs',
'filename'
)
def test_get_checksum(self): def test_get_checksum(self):
assert not self.sqlite.get_checksum('invalid') assert not self.sqlite.get_checksum('invalid')

View File

@ -103,6 +103,8 @@ class TestMedia:
assert date_media == media.metadata['date_created'] assert date_media == media.metadata['date_created']
elif media.metadata['date_modified']: elif media.metadata['date_modified']:
assert date_media == media.metadata['date_modified'] assert date_media == media.metadata['date_modified']
elif media.metadata['file_modify_date']:
assert date_media == media.metadata['file_modify_date']
# Will be changed to get_metadata # Will be changed to get_metadata
# check if metatadata type are correct # check if metatadata type are correct