Add options and interactive mode for handling media date

This commit is contained in:
Cédric Leporcq 2021-09-24 18:50:38 +02:00
parent 6203498f20
commit 86d88b72c8
6 changed files with 154 additions and 76 deletions

View File

@ -84,6 +84,10 @@ def _get_exclude(opt, exclude):
and a file hash')
@click.option('--reset-cache', '-r', default=False, is_flag=True,
help='Regenerate the hash.json and location.json database ')
@click.option('--use-date-filename', '-f', default=False, is_flag=True,
help="Use filename date for media original date.")
@click.option('--use-file-dates', '-F', default=False, is_flag=True,
help="Use file date created or modified for media original date.")
@click.argument('paths', required=True, nargs=-1, type=click.Path())
def sort(**kwargs):
"""Sort files or directories by reading their EXIF and organizing them
@ -134,7 +138,8 @@ def sort(**kwargs):
collection = Collection(destination, opt['path_format'],
kwargs['album_from_folder'], cache, opt['day_begins'], kwargs['dry_run'],
exclude, filter_by_ext, kwargs['glob'], kwargs['interactive'],
logger, max_deep, mode)
logger, max_deep, mode, kwargs['use_date_filename'],
kwargs['use_file_dates'])
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])

View File

@ -30,7 +30,8 @@ class Collection(object):
def __init__(self, root, path_format, album_from_folder=False,
cache=False, day_begins=0, dry_run=False, exclude=set(),
filter_by_ext=set(), glob='**/*', interactive=False,
logger=logging.getLogger(), max_deep=None, mode='copy'):
logger=logging.getLogger(), max_deep=None, mode='copy',
use_date_filename=False, use_file_dates=False):
# Attributes
self.root = Path(root).expanduser().absolute()
@ -60,8 +61,9 @@ class Collection(object):
self.logger = logger
self.max_deep = max_deep
self.mode = mode
self.summary = Summary()
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
self.whitespace_regex = '[ \t\n\r\f\v]+'
# Constants
@ -88,13 +90,12 @@ class Collection(object):
'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string
}
def check_for_early_morning_photos(self, date):
def _check_for_early_morning_photos(self, date):
"""check for early hour photos to be grouped with previous day"""
if date.hour < self.day_begins:
self.logger.info('moving this photo to the previous day for\
classification purposes (day_begins=' + str(self.day_begins) + ')')
date = date - timedelta(hours=date.hour+1) # push it to the day before for classificiation purposes
self.logger.info("moving this photo to the previous day for classification purposes")
# push it to the day before for classification purposes
date = date - timedelta(hours=date.hour+1)
return date
@ -162,10 +163,10 @@ class Collection(object):
for i, rx in get_date_regex(basename):
part = re.sub(rx, '', part)
elif item == 'date':
date = metadata['date_taken']
date = metadata['date_media']
# early morning photos can be grouped with previous day
date = self.check_for_early_morning_photos(date)
if date is not None:
date = self._check_for_early_morning_photos(date)
part = date.strftime(mask)
elif item == 'folder':
part = os.path.basename(subdirs)
@ -577,12 +578,12 @@ class Collection(object):
return path
def set_utime_from_metadata(self, date_taken, file_path):
def set_utime_from_metadata(self, date_media, file_path):
""" Set the modification time on the file based on the file name.
"""
# Initialize date taken to what's returned from the metadata function.
os.utime(file_path, (int(datetime.now().timestamp()), int(date_taken.timestamp())))
os.utime(file_path, (int(datetime.now().timestamp()), int(date_media.timestamp())))
def dedup_regex(self, path, dedup_regex, logger, remove_duplicates=False):
# cycle throught files
@ -678,8 +679,9 @@ class Collection(object):
subdirs = src_path.relative_to(path).parent
# Process files
src_checksum = self.checksum(src_path)
media = Media(src_path, path, self.album_from_folder, ignore_tags,
self.interactive, self.logger)
media = Media(src_path, path, self.album_from_folder,
ignore_tags, self.interactive, self.logger,
self.use_date_filename, self.use_file_dates)
if media:
metadata = media.get_metadata(loc, self.db, self.cache)
# Get the destination path according to metadata

View File

@ -46,7 +46,7 @@ class Sqlite:
'Checksum': 'text',
'Album': 'text',
'LocationId': 'integer',
'DateTaken': 'text',
'DateMedia': 'text',
'DateOriginal': 'text',
'DateCreated': 'text',
'DateModified': 'text',

View File

@ -34,8 +34,9 @@ class Media():
extensions = PHOTO + AUDIO + VIDEO
def __init__(self, file_path, root, album_from_folder=False, ignore_tags=set(),
interactive=False, logger=logging.getLogger()):
def __init__(self, file_path, root, album_from_folder=False,
ignore_tags=set(), interactive=False, logger=logging.getLogger(),
use_date_filename=False, use_file_dates=False):
"""
:params: Path, Path, bool, set, bool, Logger
"""
@ -46,12 +47,14 @@ class Media():
self.filename = str(file_path.name)
self.album_from_folder = album_from_folder
self.ignore_tags = ignore_tags
self.tags_keys = self.get_tags()
self.exif_metadata = None
self.ignore_tags = ignore_tags
self.interactive = interactive
self.metadata = None
self.logger = logger
self.metadata = None
self.tags_keys = self.get_tags()
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
self.theme = request.load_theme()
@ -203,7 +206,27 @@ class Media():
return None
def get_date_taken(self):
def _get_date_media_interactive(self, choices, default):
print(f"Date conflict for file: {self.file_path}")
choices_list = [
inquirer.List('date_list',
message=f"Choice appropriate original date",
choices=choices,
default=default
),
]
prompt = [
inquirer.Text('date_custom', message="date"),
]
answers = inquirer.prompt(choices_list, theme=self.theme)
if not answers['date_list']:
answers = inquirer.prompt(prompt, theme=self.theme)
return get_date_from_string(answers['date_custom'])
else:
return answers['date_list']
def get_date_media(self):
'''
Get the date taken from self.metadata or filename
:returns: datetime or None.
@ -213,42 +236,78 @@ class Media():
basename = os.path.splitext(self.metadata['filename'])[0]
date_original = self.metadata['date_original']
if self.metadata['original_name'] is not None:
if self.metadata['original_name']:
date_filename = get_date_from_string(self.metadata['original_name'])
else:
date_filename = get_date_from_string(basename)
date_original = self.metadata['date_original']
date_created = self.metadata['date_created']
if self.metadata['date_original'] is not None:
if (date_filename is not None and
date_filename != date_original):
self.logger.warn(f"{basename} time mark is different from {date_original}")
# TODO ask for keep date taken, filename time, or neither
date_modified = self.metadata['date_modified']
if self.metadata['date_original']:
if (date_filename and date_filename != date_original):
self.logger.warning(f"{basename} time mark is different from {date_original}")
if self.interactive:
# Ask for keep date taken, filename time, or neither
choices = [
(f"date original:'{date_original}'", date_original),
(f"date filename:'{date_filename}'", date_filename),
("custom", None),
]
default = f'{date_original}'
return self._get_date_media_interactive(choices, default)
return self.metadata['date_original']
elif True:
if date_filename is not None:
if date_created is not None and date_filename > date_created:
self.logger.warn(f"{basename} time mark is more recent than {date_created}")
return date_filename
if True:
# TODO warm and ask for confirmation
if date_created is not None:
self.logger.warning(f"could not find original date for {self.file_path}")
if self.use_date_filename and date_filename:
self.logger.info(f"use date from filename:{date_filename} for {self.file_path}")
if date_created and date_filename > date_created:
self.logger.warning(f"{basename} time mark is more recent than {date_created}")
if self.interactive:
choices = [
(f"date filename:'{date_filename}'", date_filename),
(f"date created:'{date_created}'", date_created),
("custom", None),
]
default = date_filename
return self._get_date_media_interactive(choices, default)
return date_filename
elif self.use_file_dates:
if date_created:
self.logger.warning(f"use date created:{date_created} for {self.file_path}")
return date_created
elif self.metadata['date_modified'] is not None:
return self.metadata['date_modified']
elif date_modified:
self.logger.warning(f"use date modified:{date_modified} for {self.file_path}")
return date_modified
elif self.interactive:
choices = []
if date_filename:
choices.append((f"date filename:'{date_filename}'",
date_filename))
if date_created:
choices.append((f"date created:'{date_created}'", date_created))
if date_modified:
choices.append((f"date modified:'{date_modified}'", date_modified))
choices.append(("custom", None))
default = date_filename
return self._get_date_media_interactive(choices, default)
def get_exif_metadata(self):
# Get metadata from exiftool.
self.exif_metadata = ExifToolCaching(self.file_path, logger=self.logger).asdict()
def _set_album(self, album, folder):
print(f"Conflict for file: {self.file_path}")
print(f"Metadata conflict for file: {self.file_path}")
choices_list = [
inquirer.List('album',
message=f"Exif album is already set to {album}, choices",
choices=[
(f"album:'{album}'", f'{album}'),
(f"folder:'{folder}'", f'{folder}'),
(f"album:'{album}'", album),
(f"folder:'{folder}'", folder),
("custom", None),
],
default=f'{album}'
@ -299,7 +358,12 @@ class Media():
self.metadata['src_path'] = self.root
self.metadata['subdirs'] = self.subdirs
self.metadata['filename'] = self.filename
self.metadata['date_taken'] = self.get_date_taken()
original_name = self.metadata['original_name']
if not original_name or original_name == '':
self.set_value('original_name', self.filename)
self.metadata['date_media'] = self.get_date_media()
if self.album_from_folder:
album = self.metadata['album']
@ -307,10 +371,6 @@ class Media():
if album and album != '':
if self.interactive:
answer = self._set_album(album, folder)
# print(f"Conflict for file: {self.file_path}")
# print(f"Exif album is already set to '{album}'', folder='{folder}'")
# i = f"Choice for 'album': (a) '{album}', (f) '{folder}', (c) custom ?\n"
# answer = input(i)
if answer == 'c':
self.metadata['album'] = input('album=')
self.set_value('album', folder)
@ -321,6 +381,7 @@ class Media():
if not album or album == '':
self.metadata['album'] = folder
self.set_value('album', folder)
loc_keys = ('latitude', 'longitude', 'city', 'state', 'country', 'default')
location_id = None
@ -388,7 +449,7 @@ class Media():
"""
return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
def set_date_taken(self, date_key, time):
def set_date_media(self, date_key, time):
"""Set the date/time a photo was taken.
:param datetime time: datetime object of when the photo was taken

View File

@ -2,6 +2,7 @@
from datetime import datetime
import os
import pytest
import shutil
import sqlite3
from pathlib import Path
import re
@ -15,7 +16,7 @@ from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exif
from ordigi.collection import Collection
from ordigi.geolocation import GeoLocation
from ordigi.media import Media
from ordigi.utils import get_date_from_string, get_date_regex
from ordigi.utils import get_date_regex
class TestCollection:
@ -34,7 +35,8 @@ class TestCollection:
Test all parts
"""
# Item to search for:
collection = Collection(tmp_path, self.path_format)
collection = Collection(tmp_path, self.path_format,
use_date_filename=True, use_file_dates=True)
items = collection.get_items()
masks = [
'{album}',
@ -58,7 +60,8 @@ class TestCollection:
]
for file_path in self.file_paths:
media = Media(file_path, self.src_path)
media = Media(file_path, self.src_path, use_date_filename=True,
use_file_dates=True)
subdirs = file_path.relative_to(self.src_path).parent
exif_tags = {}
for key in ('album', 'camera_make', 'camera_model', 'latitude',
@ -79,6 +82,8 @@ class TestCollection:
if item == 'basename':
assert part == file_path.stem, file_path
elif item == 'date':
if part == '':
media.get_date_media()
assert datetime.strptime(part, mask[1:-1])
elif item == 'folder':
assert part == subdirs.name, file_path
@ -107,32 +112,6 @@ class TestCollection:
else:
assert part == '', file_path
def test_get_date_taken(self, tmp_path):
collection = Collection(tmp_path, self.path_format)
for file_path in self.file_paths:
exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(file_path, self.src_path)
metadata = media.get_metadata()
date_taken = media.get_date_taken()
date_filename = None
for tag in media.tags_keys['original_name']:
if tag in exif_data:
date_filename = get_date_from_string(exif_data[tag])
break
if not date_filename:
date_filename = get_date_from_string(file_path.name)
if media.metadata['date_original']:
assert date_taken == media.metadata['date_original']
elif date_filename:
assert date_taken == date_filename
elif media.metadata['date_created']:
assert date_taken == media.metadata['date_created']
elif media.metadata['date_modified']:
assert date_taken == media.metadata['date_modified']
def test_sort_files(self, tmp_path):
collection = Collection(tmp_path, self.path_format, album_from_folder=True)
loc = GeoLocation()
@ -182,6 +161,7 @@ class TestCollection:
assert src_path.exists()
else:
assert not src_path.exists()
shutil.copyfile(dest_path, src_path)
# TODO check for conflicts
@ -189,7 +169,9 @@ class TestCollection:
# TODO check date
def test__get_files_in_path(self, tmp_path):
collection = Collection(tmp_path, self.path_format, exclude='**/*.dng')
collection = Collection(tmp_path, self.path_format,
exclude={'**/*.dng',},
use_date_filename=True, use_file_dates=True)
paths = [x for x in collection._get_files_in_path(self.src_path,
maxlevel=1, glob='**/photo*')]
assert len(paths) == 6

View File

@ -10,6 +10,7 @@ from ordigi import constants
from ordigi.media import Media
from ordigi.images import Images
from ordigi.exiftool import ExifTool, ExifToolCaching
from ordigi.utils import get_date_from_string
ORDIGI_PATH = Path(__file__).parent.parent
CACHING = True
@ -70,6 +71,33 @@ class TestMetadata:
if has_exif_data == False:
assert not media.has_exif_data()
def test_get_date_media(self):
# collection = Collection(tmp_path, self.path_format,
# use_date_filename=True, use_file_dates=True)
for file_path in self.file_paths:
exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(file_path, self.src_path, use_date_filename=True,
use_file_dates=True)
metadata = media.get_metadata()
date_media = media.get_date_media()
date_filename = None
for tag in media.tags_keys['original_name']:
if tag in exif_data:
date_filename = get_date_from_string(exif_data[tag])
break
if not date_filename:
date_filename = get_date_from_string(file_path.name)
if media.metadata['date_original']:
assert date_media == media.metadata['date_original']
elif date_filename:
assert date_media == date_filename
elif media.metadata['date_created']:
assert date_media == media.metadata['date_created']
elif media.metadata['date_modified']:
assert date_media == media.metadata['date_modified']
# Will be changed to get_metadata
# check if metatadata type are correct