Fix date detection in filenames

This commit is contained in:
Cédric Leporcq 2022-07-23 20:15:34 +02:00
parent 52768f64db
commit 01b47c8c40
5 changed files with 22 additions and 19 deletions

View File

@ -218,7 +218,7 @@ def _check(**kwargs):
@click.argument('subdirs', required=False, nargs=-1, type=click.Path())
@click.argument('collection', required=True, nargs=1, type=click.Path())
def _clean(**kwargs):
"""Remove empty folders"""
"""Clean media collection"""
folders = kwargs['folders']
log_level = log.get_level(kwargs['verbose'])

View File

@ -138,14 +138,12 @@ class FPath:
elif item == 'name':
# Remove date prefix added to the name.
part = stem
for regex in utils.get_date_regex().values():
part = re.sub(regex, '', part)
date_filename, regex, sep = utils.get_date_from_string(stem)
if date_filename:
part = re.sub(regex, sep, part)
# Delete separator
if re.search('^[-_ .]', part):
part = part[1:]
if part != stem:
# We only want to match first result
break
elif item == 'date':
date = metadata['date_media']
# early morning photos can be grouped with previous day
@ -643,7 +641,7 @@ class SortMedias:
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
for i in range(1, 100):
for i in range(1, 1000):
if conflict != 1:
break

View File

@ -359,9 +359,9 @@ class Media(ReadExif):
stem = os.path.splitext(filename)[0]
date_original = self.metadata['date_original']
if self.metadata['original_name']:
date_filename = utils.get_date_from_string(self.metadata['original_name'])
date_filename, _, _ = utils.get_date_from_string(self.metadata['original_name'])
else:
date_filename = utils.get_date_from_string(stem)
date_filename, _, _ = utils.get_date_from_string(stem)
self.log.debug(f'date_filename: {date_filename}')
date_original = self.metadata['date_original']
@ -387,7 +387,7 @@ class Media(ReadExif):
return self.metadata['date_original']
self.log.warning(f"could not find original date for {self.file_path}")
self.log.warning(f"could not find date original for {self.file_path}")
if self.use_date_filename and date_filename:
self.log.info(

View File

@ -69,17 +69,17 @@ def get_date_regex(user_regex=None):
# regex to match date format type %Y%m%d, %y%m%d, %d%m%Y,
# etc...
'a': re.compile(
r'[-_./](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_.]?(?P<hour>\d{2})[-_.]?(?P<minute>\d{2})[-_.]?(?P<second>\d{2})'
r'[-_./ ](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_.]?(?P<hour>\d{2})[-_.]?(?P<minute>\d{2})[-_.]?(?P<second>\d{2})([-_./ ])'
),
'b': re.compile(
r'[-_./](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]'
r'[-_./ ](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})([-_./ ])'
),
# not very accurate
'c': re.compile(
r'[-_./](?P<year>\d{2})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]'
r'[-_./ ](?P<year>\d{2})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})([-_./ ])'
),
'd': re.compile(
r'[-_./](?P<day>\d{2})[-_.](?P<month>\d{2})[-_.](?P<year>\d{4})[-_./]'
r'[-_./ ](?P<day>\d{2})[-_.](?P<month>\d{2})[-_.](?P<year>\d{4})([-_./ ])'
),
}
@ -96,15 +96,18 @@ def get_date_from_string(string):
# Otherwise assume a filename such as IMG_20160915_123456.jpg as default.
matches = []
sep = ''
for i, regex in DATE_REGEX.items():
match = re.findall(regex, string)
if match != []:
sep = match[0][3]
if i == 'c':
match = [('20' + match[0][0], match[0][1], match[0][2])]
elif i == 'd':
# reorder items
match = [(match[0][2], match[0][1], match[0][0])]
# matches = match + matches
else:
match = [(match[0][0], match[0][1], match[0][2])]
if len(match) != 1:
# The time string is not uniq
continue
@ -119,9 +122,11 @@ def get_date_from_string(string):
date_object = tuple(map(int, matches[0][0]))
date = datetime(*date_object)
except (KeyError, ValueError):
return None
return None, matches[0][1], sep
return date
return date, matches[0][1], sep
return None, None, sep
def match_date_regex(regex, value):

View File

@ -90,10 +90,10 @@ class TestMedia:
date_filename = None
for tag in media.tags_keys['original_name']:
if tag in exif_data:
date_filename = get_date_from_string(exif_data[tag])
date_filename, _, _ = get_date_from_string(exif_data[tag])
break
if not date_filename:
date_filename = get_date_from_string(file_path.name)
date_filename, _, _ = get_date_from_string(file_path.name)
if media.metadata['date_original']:
assert date_media == media.metadata['date_original']