From dde2f4f66f9053801deae3191c7acb45fe9104d0 Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Sat, 23 Jul 2022 20:15:34 +0200 Subject: [PATCH] Fix date detection in filenames --- ordigi/cli.py | 2 +- ordigi/collection.py | 11 +++++------ ordigi/media.py | 6 +++--- ordigi/utils.py | 17 ++++++++++------- tests/test_media.py | 4 ++-- 5 files changed, 21 insertions(+), 19 deletions(-) diff --git a/ordigi/cli.py b/ordigi/cli.py index fd066ed..30ae81c 100755 --- a/ordigi/cli.py +++ b/ordigi/cli.py @@ -218,7 +218,7 @@ def _check(**kwargs): @click.argument('subdirs', required=False, nargs=-1, type=click.Path()) @click.argument('collection', required=True, nargs=1, type=click.Path()) def _clean(**kwargs): - """Remove empty folders""" + """Clean media collection""" folders = kwargs['folders'] log_level = log.get_level(kwargs['verbose']) diff --git a/ordigi/collection.py b/ordigi/collection.py index 9bff2f0..3375fef 100644 --- a/ordigi/collection.py +++ b/ordigi/collection.py @@ -138,14 +138,13 @@ class FPath: elif item == 'name': # Remove date prefix added to the name. part = stem - for regex in utils.get_date_regex().values(): - part = re.sub(regex, '', part) + date_filename, regex = utils.get_date_from_string(stem) + if date_filename: + sep = re.findall(regex, stem)[0][3] + part = re.sub(regex, sep, part) # Delete separator if re.search('^[-_ .]', part): part = part[1:] - if part != stem: - # We only want to match first result - break elif item == 'date': date = metadata['date_media'] # early morning photos can be grouped with previous day @@ -643,7 +642,7 @@ class SortMedias: conflict = self.check_conflicts(src_path, dest_path, remove_duplicates) - for i in range(1, 100): + for i in range(1, 1000): if conflict != 1: break diff --git a/ordigi/media.py b/ordigi/media.py index ac8544e..14616f7 100644 --- a/ordigi/media.py +++ b/ordigi/media.py @@ -359,9 +359,9 @@ class Media(ReadExif): stem = os.path.splitext(filename)[0] date_original = self.metadata['date_original'] if self.metadata['original_name']: - date_filename = utils.get_date_from_string(self.metadata['original_name']) + date_filename, _ = utils.get_date_from_string(self.metadata['original_name']) else: - date_filename = utils.get_date_from_string(stem) + date_filename, _ = utils.get_date_from_string(stem) self.log.debug(f'date_filename: {date_filename}') date_original = self.metadata['date_original'] @@ -387,7 +387,7 @@ class Media(ReadExif): return self.metadata['date_original'] - self.log.warning(f"could not find original date for {self.file_path}") + self.log.warning(f"could not find date original for {self.file_path}") if self.use_date_filename and date_filename: self.log.info( diff --git a/ordigi/utils.py b/ordigi/utils.py index 5a54913..2a9529b 100644 --- a/ordigi/utils.py +++ b/ordigi/utils.py @@ -69,17 +69,17 @@ def get_date_regex(user_regex=None): # regex to match date format type %Y%m%d, %y%m%d, %d%m%Y, # etc... 'a': re.compile( - r'[-_./](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})' + r'[-_./ ](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})([-_./ ])' ), 'b': re.compile( - r'[-_./](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]' + r'[-_./ ](?P\d{4})[-_.]?(?P\d{2})[-_.]?(?P\d{2})([-_./ ])' ), # not very accurate 'c': re.compile( - r'[-_./](?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})[-_./]' + r'[-_./ ](?P\d{2})[-_.]?(?P\d{2})[-_.]?(?P\d{2})([-_./ ])' ), 'd': re.compile( - r'[-_./](?P\d{2})[-_.](?P\d{2})[-_.](?P\d{4})[-_./]' + r'[-_./ ](?P\d{2})[-_.](?P\d{2})[-_.](?P\d{4})([-_./ ])' ), } @@ -104,7 +104,8 @@ def get_date_from_string(string): elif i == 'd': # reorder items match = [(match[0][2], match[0][1], match[0][0])] - # matches = match + matches + else: + match = [(match[0][0], match[0][1], match[0][2])] if len(match) != 1: # The time string is not uniq continue @@ -119,9 +120,11 @@ def get_date_from_string(string): date_object = tuple(map(int, matches[0][0])) date = datetime(*date_object) except (KeyError, ValueError): - return None + return None, matches[0][1] - return date + return date, matches[0][1] + + return None, None def match_date_regex(regex, value): diff --git a/tests/test_media.py b/tests/test_media.py index 9b4a379..cff7e4b 100644 --- a/tests/test_media.py +++ b/tests/test_media.py @@ -90,10 +90,10 @@ class TestMedia: date_filename = None for tag in media.tags_keys['original_name']: if tag in exif_data: - date_filename = get_date_from_string(exif_data[tag]) + date_filename, _ = get_date_from_string(exif_data[tag]) break if not date_filename: - date_filename = get_date_from_string(file_path.name) + date_filename, _ = get_date_from_string(file_path.name) if media.metadata['date_original']: assert date_media == media.metadata['date_original']