From 86d88b72c85a51bd98ef6cf29c7b1f53cc96f566 Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Fri, 24 Sep 2021 18:50:38 +0200 Subject: [PATCH] Add options and interactive mode for handling media date --- ordigi.py | 7 ++- ordigi/collection.py | 28 ++++----- ordigi/database.py | 2 +- ordigi/media.py | 123 +++++++++++++++++++++++++++++---------- tests/test_collection.py | 42 ++++--------- tests/test_media.py | 28 +++++++++ 6 files changed, 154 insertions(+), 76 deletions(-) diff --git a/ordigi.py b/ordigi.py index 9b68f18..ae0352d 100755 --- a/ordigi.py +++ b/ordigi.py @@ -84,6 +84,10 @@ def _get_exclude(opt, exclude): and a file hash') @click.option('--reset-cache', '-r', default=False, is_flag=True, help='Regenerate the hash.json and location.json database ') +@click.option('--use-date-filename', '-f', default=False, is_flag=True, + help="Use filename date for media original date.") +@click.option('--use-file-dates', '-F', default=False, is_flag=True, + help="Use file date created or modified for media original date.") @click.argument('paths', required=True, nargs=-1, type=click.Path()) def sort(**kwargs): """Sort files or directories by reading their EXIF and organizing them @@ -134,7 +138,8 @@ def sort(**kwargs): collection = Collection(destination, opt['path_format'], kwargs['album_from_folder'], cache, opt['day_begins'], kwargs['dry_run'], exclude, filter_by_ext, kwargs['glob'], kwargs['interactive'], - logger, max_deep, mode) + logger, max_deep, mode, kwargs['use_date_filename'], + kwargs['use_file_dates']) loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], opt['timeout']) diff --git a/ordigi/collection.py b/ordigi/collection.py index 8512ef6..d1357a5 100644 --- a/ordigi/collection.py +++ b/ordigi/collection.py @@ -30,7 +30,8 @@ class Collection(object): def __init__(self, root, path_format, album_from_folder=False, cache=False, day_begins=0, dry_run=False, exclude=set(), filter_by_ext=set(), glob='**/*', interactive=False, - logger=logging.getLogger(), max_deep=None, mode='copy'): + logger=logging.getLogger(), max_deep=None, mode='copy', + use_date_filename=False, use_file_dates=False): # Attributes self.root = Path(root).expanduser().absolute() @@ -60,8 +61,9 @@ class Collection(object): self.logger = logger self.max_deep = max_deep self.mode = mode - self.summary = Summary() + self.use_date_filename = use_date_filename + self.use_file_dates = use_file_dates self.whitespace_regex = '[ \t\n\r\f\v]+' # Constants @@ -88,13 +90,12 @@ class Collection(object): 'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string } - def check_for_early_morning_photos(self, date): + def _check_for_early_morning_photos(self, date): """check for early hour photos to be grouped with previous day""" - if date.hour < self.day_begins: - self.logger.info('moving this photo to the previous day for\ - classification purposes (day_begins=' + str(self.day_begins) + ')') - date = date - timedelta(hours=date.hour+1) # push it to the day before for classificiation purposes + self.logger.info("moving this photo to the previous day for classification purposes") + # push it to the day before for classification purposes + date = date - timedelta(hours=date.hour+1) return date @@ -162,10 +163,10 @@ class Collection(object): for i, rx in get_date_regex(basename): part = re.sub(rx, '', part) elif item == 'date': - date = metadata['date_taken'] + date = metadata['date_media'] # early morning photos can be grouped with previous day - date = self.check_for_early_morning_photos(date) if date is not None: + date = self._check_for_early_morning_photos(date) part = date.strftime(mask) elif item == 'folder': part = os.path.basename(subdirs) @@ -577,12 +578,12 @@ class Collection(object): return path - def set_utime_from_metadata(self, date_taken, file_path): + def set_utime_from_metadata(self, date_media, file_path): """ Set the modification time on the file based on the file name. """ # Initialize date taken to what's returned from the metadata function. - os.utime(file_path, (int(datetime.now().timestamp()), int(date_taken.timestamp()))) + os.utime(file_path, (int(datetime.now().timestamp()), int(date_media.timestamp()))) def dedup_regex(self, path, dedup_regex, logger, remove_duplicates=False): # cycle throught files @@ -678,8 +679,9 @@ class Collection(object): subdirs = src_path.relative_to(path).parent # Process files src_checksum = self.checksum(src_path) - media = Media(src_path, path, self.album_from_folder, ignore_tags, - self.interactive, self.logger) + media = Media(src_path, path, self.album_from_folder, + ignore_tags, self.interactive, self.logger, + self.use_date_filename, self.use_file_dates) if media: metadata = media.get_metadata(loc, self.db, self.cache) # Get the destination path according to metadata diff --git a/ordigi/database.py b/ordigi/database.py index 444cee8..dee0a68 100644 --- a/ordigi/database.py +++ b/ordigi/database.py @@ -46,7 +46,7 @@ class Sqlite: 'Checksum': 'text', 'Album': 'text', 'LocationId': 'integer', - 'DateTaken': 'text', + 'DateMedia': 'text', 'DateOriginal': 'text', 'DateCreated': 'text', 'DateModified': 'text', diff --git a/ordigi/media.py b/ordigi/media.py index 8ade7a1..d8a2999 100644 --- a/ordigi/media.py +++ b/ordigi/media.py @@ -34,8 +34,9 @@ class Media(): extensions = PHOTO + AUDIO + VIDEO - def __init__(self, file_path, root, album_from_folder=False, ignore_tags=set(), - interactive=False, logger=logging.getLogger()): + def __init__(self, file_path, root, album_from_folder=False, + ignore_tags=set(), interactive=False, logger=logging.getLogger(), + use_date_filename=False, use_file_dates=False): """ :params: Path, Path, bool, set, bool, Logger """ @@ -46,12 +47,14 @@ class Media(): self.filename = str(file_path.name) self.album_from_folder = album_from_folder - self.ignore_tags = ignore_tags - self.tags_keys = self.get_tags() self.exif_metadata = None + self.ignore_tags = ignore_tags self.interactive = interactive - self.metadata = None self.logger = logger + self.metadata = None + self.tags_keys = self.get_tags() + self.use_date_filename = use_date_filename + self.use_file_dates = use_file_dates self.theme = request.load_theme() @@ -203,7 +206,27 @@ class Media(): return None - def get_date_taken(self): + def _get_date_media_interactive(self, choices, default): + print(f"Date conflict for file: {self.file_path}") + choices_list = [ + inquirer.List('date_list', + message=f"Choice appropriate original date", + choices=choices, + default=default + ), + ] + prompt = [ + inquirer.Text('date_custom', message="date"), + ] + + answers = inquirer.prompt(choices_list, theme=self.theme) + if not answers['date_list']: + answers = inquirer.prompt(prompt, theme=self.theme) + return get_date_from_string(answers['date_custom']) + else: + return answers['date_list'] + + def get_date_media(self): ''' Get the date taken from self.metadata or filename :returns: datetime or None. @@ -213,42 +236,78 @@ class Media(): basename = os.path.splitext(self.metadata['filename'])[0] date_original = self.metadata['date_original'] - if self.metadata['original_name'] is not None: + if self.metadata['original_name']: date_filename = get_date_from_string(self.metadata['original_name']) else: date_filename = get_date_from_string(basename) + date_original = self.metadata['date_original'] date_created = self.metadata['date_created'] - if self.metadata['date_original'] is not None: - if (date_filename is not None and - date_filename != date_original): - self.logger.warn(f"{basename} time mark is different from {date_original}") - # TODO ask for keep date taken, filename time, or neither + date_modified = self.metadata['date_modified'] + if self.metadata['date_original']: + if (date_filename and date_filename != date_original): + self.logger.warning(f"{basename} time mark is different from {date_original}") + if self.interactive: + # Ask for keep date taken, filename time, or neither + choices = [ + (f"date original:'{date_original}'", date_original), + (f"date filename:'{date_filename}'", date_filename), + ("custom", None), + ] + default = f'{date_original}' + return self._get_date_media_interactive(choices, default) + return self.metadata['date_original'] - elif True: - if date_filename is not None: - if date_created is not None and date_filename > date_created: - self.logger.warn(f"{basename} time mark is more recent than {date_created}") - return date_filename - if True: - # TODO warm and ask for confirmation - if date_created is not None: + + self.logger.warning(f"could not find original date for {self.file_path}") + + if self.use_date_filename and date_filename: + self.logger.info(f"use date from filename:{date_filename} for {self.file_path}") + if date_created and date_filename > date_created: + self.logger.warning(f"{basename} time mark is more recent than {date_created}") + if self.interactive: + choices = [ + (f"date filename:'{date_filename}'", date_filename), + (f"date created:'{date_created}'", date_created), + ("custom", None), + ] + default = date_filename + return self._get_date_media_interactive(choices, default) + + return date_filename + + elif self.use_file_dates: + if date_created: + self.logger.warning(f"use date created:{date_created} for {self.file_path}") return date_created - elif self.metadata['date_modified'] is not None: - return self.metadata['date_modified'] + elif date_modified: + self.logger.warning(f"use date modified:{date_modified} for {self.file_path}") + return date_modified + elif self.interactive: + choices = [] + if date_filename: + choices.append((f"date filename:'{date_filename}'", + date_filename)) + if date_created: + choices.append((f"date created:'{date_created}'", date_created)) + if date_modified: + choices.append((f"date modified:'{date_modified}'", date_modified)) + choices.append(("custom", None)) + default = date_filename + return self._get_date_media_interactive(choices, default) def get_exif_metadata(self): # Get metadata from exiftool. self.exif_metadata = ExifToolCaching(self.file_path, logger=self.logger).asdict() def _set_album(self, album, folder): - print(f"Conflict for file: {self.file_path}") + print(f"Metadata conflict for file: {self.file_path}") choices_list = [ inquirer.List('album', message=f"Exif album is already set to {album}, choices", choices=[ - (f"album:'{album}'", f'{album}'), - (f"folder:'{folder}'", f'{folder}'), + (f"album:'{album}'", album), + (f"folder:'{folder}'", folder), ("custom", None), ], default=f'{album}' @@ -299,7 +358,12 @@ class Media(): self.metadata['src_path'] = self.root self.metadata['subdirs'] = self.subdirs self.metadata['filename'] = self.filename - self.metadata['date_taken'] = self.get_date_taken() + + original_name = self.metadata['original_name'] + if not original_name or original_name == '': + self.set_value('original_name', self.filename) + + self.metadata['date_media'] = self.get_date_media() if self.album_from_folder: album = self.metadata['album'] @@ -307,10 +371,6 @@ class Media(): if album and album != '': if self.interactive: answer = self._set_album(album, folder) - # print(f"Conflict for file: {self.file_path}") - # print(f"Exif album is already set to '{album}'', folder='{folder}'") - # i = f"Choice for 'album': (a) '{album}', (f) '{folder}', (c) custom ?\n" - # answer = input(i) if answer == 'c': self.metadata['album'] = input('album=') self.set_value('album', folder) @@ -321,6 +381,7 @@ class Media(): if not album or album == '': self.metadata['album'] = folder + self.set_value('album', folder) loc_keys = ('latitude', 'longitude', 'city', 'state', 'country', 'default') location_id = None @@ -388,7 +449,7 @@ class Media(): """ return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value) - def set_date_taken(self, date_key, time): + def set_date_media(self, date_key, time): """Set the date/time a photo was taken. :param datetime time: datetime object of when the photo was taken diff --git a/tests/test_collection.py b/tests/test_collection.py index 1165ddb..266ac24 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -2,6 +2,7 @@ from datetime import datetime import os import pytest +import shutil import sqlite3 from pathlib import Path import re @@ -15,7 +16,7 @@ from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exif from ordigi.collection import Collection from ordigi.geolocation import GeoLocation from ordigi.media import Media -from ordigi.utils import get_date_from_string, get_date_regex +from ordigi.utils import get_date_regex class TestCollection: @@ -34,7 +35,8 @@ class TestCollection: Test all parts """ # Item to search for: - collection = Collection(tmp_path, self.path_format) + collection = Collection(tmp_path, self.path_format, + use_date_filename=True, use_file_dates=True) items = collection.get_items() masks = [ '{album}', @@ -58,7 +60,8 @@ class TestCollection: ] for file_path in self.file_paths: - media = Media(file_path, self.src_path) + media = Media(file_path, self.src_path, use_date_filename=True, + use_file_dates=True) subdirs = file_path.relative_to(self.src_path).parent exif_tags = {} for key in ('album', 'camera_make', 'camera_model', 'latitude', @@ -79,6 +82,8 @@ class TestCollection: if item == 'basename': assert part == file_path.stem, file_path elif item == 'date': + if part == '': + media.get_date_media() assert datetime.strptime(part, mask[1:-1]) elif item == 'folder': assert part == subdirs.name, file_path @@ -107,32 +112,6 @@ class TestCollection: else: assert part == '', file_path - - def test_get_date_taken(self, tmp_path): - collection = Collection(tmp_path, self.path_format) - for file_path in self.file_paths: - exif_data = ExifToolCaching(str(file_path)).asdict() - media = Media(file_path, self.src_path) - metadata = media.get_metadata() - date_taken = media.get_date_taken() - - date_filename = None - for tag in media.tags_keys['original_name']: - if tag in exif_data: - date_filename = get_date_from_string(exif_data[tag]) - break - if not date_filename: - date_filename = get_date_from_string(file_path.name) - - if media.metadata['date_original']: - assert date_taken == media.metadata['date_original'] - elif date_filename: - assert date_taken == date_filename - elif media.metadata['date_created']: - assert date_taken == media.metadata['date_created'] - elif media.metadata['date_modified']: - assert date_taken == media.metadata['date_modified'] - def test_sort_files(self, tmp_path): collection = Collection(tmp_path, self.path_format, album_from_folder=True) loc = GeoLocation() @@ -182,6 +161,7 @@ class TestCollection: assert src_path.exists() else: assert not src_path.exists() + shutil.copyfile(dest_path, src_path) # TODO check for conflicts @@ -189,7 +169,9 @@ class TestCollection: # TODO check date def test__get_files_in_path(self, tmp_path): - collection = Collection(tmp_path, self.path_format, exclude='**/*.dng') + collection = Collection(tmp_path, self.path_format, + exclude={'**/*.dng',}, + use_date_filename=True, use_file_dates=True) paths = [x for x in collection._get_files_in_path(self.src_path, maxlevel=1, glob='**/photo*')] assert len(paths) == 6 diff --git a/tests/test_media.py b/tests/test_media.py index dd7249d..a0d9c4f 100644 --- a/tests/test_media.py +++ b/tests/test_media.py @@ -10,6 +10,7 @@ from ordigi import constants from ordigi.media import Media from ordigi.images import Images from ordigi.exiftool import ExifTool, ExifToolCaching +from ordigi.utils import get_date_from_string ORDIGI_PATH = Path(__file__).parent.parent CACHING = True @@ -70,6 +71,33 @@ class TestMetadata: if has_exif_data == False: assert not media.has_exif_data() + def test_get_date_media(self): + # collection = Collection(tmp_path, self.path_format, + # use_date_filename=True, use_file_dates=True) + for file_path in self.file_paths: + exif_data = ExifToolCaching(str(file_path)).asdict() + media = Media(file_path, self.src_path, use_date_filename=True, + use_file_dates=True) + metadata = media.get_metadata() + date_media = media.get_date_media() + + date_filename = None + for tag in media.tags_keys['original_name']: + if tag in exif_data: + date_filename = get_date_from_string(exif_data[tag]) + break + if not date_filename: + date_filename = get_date_from_string(file_path.name) + + if media.metadata['date_original']: + assert date_media == media.metadata['date_original'] + elif date_filename: + assert date_media == date_filename + elif media.metadata['date_created']: + assert date_media == media.metadata['date_created'] + elif media.metadata['date_modified']: + assert date_media == media.metadata['date_modified'] + # Will be changed to get_metadata # check if metatadata type are correct