Fix date detection in filenames

This commit is contained in:
Cédric Leporcq 2022-07-23 20:15:34 +02:00
parent 22e87223a3
commit dde2f4f66f
5 changed files with 21 additions and 19 deletions

View File

@ -218,7 +218,7 @@ def _check(**kwargs):
@click.argument('subdirs', required=False, nargs=-1, type=click.Path()) @click.argument('subdirs', required=False, nargs=-1, type=click.Path())
@click.argument('collection', required=True, nargs=1, type=click.Path()) @click.argument('collection', required=True, nargs=1, type=click.Path())
def _clean(**kwargs): def _clean(**kwargs):
"""Remove empty folders""" """Clean media collection"""
folders = kwargs['folders'] folders = kwargs['folders']
log_level = log.get_level(kwargs['verbose']) log_level = log.get_level(kwargs['verbose'])

View File

@ -138,14 +138,13 @@ class FPath:
elif item == 'name': elif item == 'name':
# Remove date prefix added to the name. # Remove date prefix added to the name.
part = stem part = stem
for regex in utils.get_date_regex().values(): date_filename, regex = utils.get_date_from_string(stem)
part = re.sub(regex, '', part) if date_filename:
sep = re.findall(regex, stem)[0][3]
part = re.sub(regex, sep, part)
# Delete separator # Delete separator
if re.search('^[-_ .]', part): if re.search('^[-_ .]', part):
part = part[1:] part = part[1:]
if part != stem:
# We only want to match first result
break
elif item == 'date': elif item == 'date':
date = metadata['date_media'] date = metadata['date_media']
# early morning photos can be grouped with previous day # early morning photos can be grouped with previous day
@ -643,7 +642,7 @@ class SortMedias:
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates) conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
for i in range(1, 100): for i in range(1, 1000):
if conflict != 1: if conflict != 1:
break break

View File

@ -359,9 +359,9 @@ class Media(ReadExif):
stem = os.path.splitext(filename)[0] stem = os.path.splitext(filename)[0]
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
if self.metadata['original_name']: if self.metadata['original_name']:
date_filename = utils.get_date_from_string(self.metadata['original_name']) date_filename, _ = utils.get_date_from_string(self.metadata['original_name'])
else: else:
date_filename = utils.get_date_from_string(stem) date_filename, _ = utils.get_date_from_string(stem)
self.log.debug(f'date_filename: {date_filename}') self.log.debug(f'date_filename: {date_filename}')
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
@ -387,7 +387,7 @@ class Media(ReadExif):
return self.metadata['date_original'] return self.metadata['date_original']
self.log.warning(f"could not find original date for {self.file_path}") self.log.warning(f"could not find date original for {self.file_path}")
if self.use_date_filename and date_filename: if self.use_date_filename and date_filename:
self.log.info( self.log.info(

View File

@ -69,17 +69,17 @@ def get_date_regex(user_regex=None):
# regex to match date format type %Y%m%d, %y%m%d, %d%m%Y, # regex to match date format type %Y%m%d, %y%m%d, %d%m%Y,
# etc... # etc...
'a': re.compile( 'a': re.compile(
r'[-_./](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_.]?(?P<hour>\d{2})[-_.]?(?P<minute>\d{2})[-_.]?(?P<second>\d{2})' r'[-_./ ](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_.]?(?P<hour>\d{2})[-_.]?(?P<minute>\d{2})[-_.]?(?P<second>\d{2})([-_./ ])'
), ),
'b': re.compile( 'b': re.compile(
r'[-_./](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]' r'[-_./ ](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})([-_./ ])'
), ),
# not very accurate # not very accurate
'c': re.compile( 'c': re.compile(
r'[-_./](?P<year>\d{2})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]' r'[-_./ ](?P<year>\d{2})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})([-_./ ])'
), ),
'd': re.compile( 'd': re.compile(
r'[-_./](?P<day>\d{2})[-_.](?P<month>\d{2})[-_.](?P<year>\d{4})[-_./]' r'[-_./ ](?P<day>\d{2})[-_.](?P<month>\d{2})[-_.](?P<year>\d{4})([-_./ ])'
), ),
} }
@ -104,7 +104,8 @@ def get_date_from_string(string):
elif i == 'd': elif i == 'd':
# reorder items # reorder items
match = [(match[0][2], match[0][1], match[0][0])] match = [(match[0][2], match[0][1], match[0][0])]
# matches = match + matches else:
match = [(match[0][0], match[0][1], match[0][2])]
if len(match) != 1: if len(match) != 1:
# The time string is not uniq # The time string is not uniq
continue continue
@ -119,9 +120,11 @@ def get_date_from_string(string):
date_object = tuple(map(int, matches[0][0])) date_object = tuple(map(int, matches[0][0]))
date = datetime(*date_object) date = datetime(*date_object)
except (KeyError, ValueError): except (KeyError, ValueError):
return None return None, matches[0][1]
return date return date, matches[0][1]
return None, None
def match_date_regex(regex, value): def match_date_regex(regex, value):

View File

@ -90,10 +90,10 @@ class TestMedia:
date_filename = None date_filename = None
for tag in media.tags_keys['original_name']: for tag in media.tags_keys['original_name']:
if tag in exif_data: if tag in exif_data:
date_filename = get_date_from_string(exif_data[tag]) date_filename, _ = get_date_from_string(exif_data[tag])
break break
if not date_filename: if not date_filename:
date_filename = get_date_from_string(file_path.name) date_filename, _ = get_date_from_string(file_path.name)
if media.metadata['date_original']: if media.metadata['date_original']:
assert date_media == media.metadata['date_original'] assert date_media == media.metadata['date_original']