Add options and interactive mode for handling media date

This commit is contained in:
Cédric Leporcq 2021-09-24 18:50:38 +02:00
parent 6203498f20
commit 86d88b72c8
6 changed files with 154 additions and 76 deletions

View File

@ -84,6 +84,10 @@ def _get_exclude(opt, exclude):
and a file hash') and a file hash')
@click.option('--reset-cache', '-r', default=False, is_flag=True, @click.option('--reset-cache', '-r', default=False, is_flag=True,
help='Regenerate the hash.json and location.json database ') help='Regenerate the hash.json and location.json database ')
@click.option('--use-date-filename', '-f', default=False, is_flag=True,
help="Use filename date for media original date.")
@click.option('--use-file-dates', '-F', default=False, is_flag=True,
help="Use file date created or modified for media original date.")
@click.argument('paths', required=True, nargs=-1, type=click.Path()) @click.argument('paths', required=True, nargs=-1, type=click.Path())
def sort(**kwargs): def sort(**kwargs):
"""Sort files or directories by reading their EXIF and organizing them """Sort files or directories by reading their EXIF and organizing them
@ -134,7 +138,8 @@ def sort(**kwargs):
collection = Collection(destination, opt['path_format'], collection = Collection(destination, opt['path_format'],
kwargs['album_from_folder'], cache, opt['day_begins'], kwargs['dry_run'], kwargs['album_from_folder'], cache, opt['day_begins'], kwargs['dry_run'],
exclude, filter_by_ext, kwargs['glob'], kwargs['interactive'], exclude, filter_by_ext, kwargs['glob'], kwargs['interactive'],
logger, max_deep, mode) logger, max_deep, mode, kwargs['use_date_filename'],
kwargs['use_file_dates'])
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout']) opt['timeout'])

View File

@ -30,7 +30,8 @@ class Collection(object):
def __init__(self, root, path_format, album_from_folder=False, def __init__(self, root, path_format, album_from_folder=False,
cache=False, day_begins=0, dry_run=False, exclude=set(), cache=False, day_begins=0, dry_run=False, exclude=set(),
filter_by_ext=set(), glob='**/*', interactive=False, filter_by_ext=set(), glob='**/*', interactive=False,
logger=logging.getLogger(), max_deep=None, mode='copy'): logger=logging.getLogger(), max_deep=None, mode='copy',
use_date_filename=False, use_file_dates=False):
# Attributes # Attributes
self.root = Path(root).expanduser().absolute() self.root = Path(root).expanduser().absolute()
@ -60,8 +61,9 @@ class Collection(object):
self.logger = logger self.logger = logger
self.max_deep = max_deep self.max_deep = max_deep
self.mode = mode self.mode = mode
self.summary = Summary() self.summary = Summary()
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
self.whitespace_regex = '[ \t\n\r\f\v]+' self.whitespace_regex = '[ \t\n\r\f\v]+'
# Constants # Constants
@ -88,13 +90,12 @@ class Collection(object):
'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string 'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string
} }
def check_for_early_morning_photos(self, date): def _check_for_early_morning_photos(self, date):
"""check for early hour photos to be grouped with previous day""" """check for early hour photos to be grouped with previous day"""
if date.hour < self.day_begins: if date.hour < self.day_begins:
self.logger.info('moving this photo to the previous day for\ self.logger.info("moving this photo to the previous day for classification purposes")
classification purposes (day_begins=' + str(self.day_begins) + ')') # push it to the day before for classification purposes
date = date - timedelta(hours=date.hour+1) # push it to the day before for classificiation purposes date = date - timedelta(hours=date.hour+1)
return date return date
@ -162,10 +163,10 @@ class Collection(object):
for i, rx in get_date_regex(basename): for i, rx in get_date_regex(basename):
part = re.sub(rx, '', part) part = re.sub(rx, '', part)
elif item == 'date': elif item == 'date':
date = metadata['date_taken'] date = metadata['date_media']
# early morning photos can be grouped with previous day # early morning photos can be grouped with previous day
date = self.check_for_early_morning_photos(date)
if date is not None: if date is not None:
date = self._check_for_early_morning_photos(date)
part = date.strftime(mask) part = date.strftime(mask)
elif item == 'folder': elif item == 'folder':
part = os.path.basename(subdirs) part = os.path.basename(subdirs)
@ -577,12 +578,12 @@ class Collection(object):
return path return path
def set_utime_from_metadata(self, date_taken, file_path): def set_utime_from_metadata(self, date_media, file_path):
""" Set the modification time on the file based on the file name. """ Set the modification time on the file based on the file name.
""" """
# Initialize date taken to what's returned from the metadata function. # Initialize date taken to what's returned from the metadata function.
os.utime(file_path, (int(datetime.now().timestamp()), int(date_taken.timestamp()))) os.utime(file_path, (int(datetime.now().timestamp()), int(date_media.timestamp())))
def dedup_regex(self, path, dedup_regex, logger, remove_duplicates=False): def dedup_regex(self, path, dedup_regex, logger, remove_duplicates=False):
# cycle throught files # cycle throught files
@ -678,8 +679,9 @@ class Collection(object):
subdirs = src_path.relative_to(path).parent subdirs = src_path.relative_to(path).parent
# Process files # Process files
src_checksum = self.checksum(src_path) src_checksum = self.checksum(src_path)
media = Media(src_path, path, self.album_from_folder, ignore_tags, media = Media(src_path, path, self.album_from_folder,
self.interactive, self.logger) ignore_tags, self.interactive, self.logger,
self.use_date_filename, self.use_file_dates)
if media: if media:
metadata = media.get_metadata(loc, self.db, self.cache) metadata = media.get_metadata(loc, self.db, self.cache)
# Get the destination path according to metadata # Get the destination path according to metadata

View File

@ -46,7 +46,7 @@ class Sqlite:
'Checksum': 'text', 'Checksum': 'text',
'Album': 'text', 'Album': 'text',
'LocationId': 'integer', 'LocationId': 'integer',
'DateTaken': 'text', 'DateMedia': 'text',
'DateOriginal': 'text', 'DateOriginal': 'text',
'DateCreated': 'text', 'DateCreated': 'text',
'DateModified': 'text', 'DateModified': 'text',

View File

@ -34,8 +34,9 @@ class Media():
extensions = PHOTO + AUDIO + VIDEO extensions = PHOTO + AUDIO + VIDEO
def __init__(self, file_path, root, album_from_folder=False, ignore_tags=set(), def __init__(self, file_path, root, album_from_folder=False,
interactive=False, logger=logging.getLogger()): ignore_tags=set(), interactive=False, logger=logging.getLogger(),
use_date_filename=False, use_file_dates=False):
""" """
:params: Path, Path, bool, set, bool, Logger :params: Path, Path, bool, set, bool, Logger
""" """
@ -46,12 +47,14 @@ class Media():
self.filename = str(file_path.name) self.filename = str(file_path.name)
self.album_from_folder = album_from_folder self.album_from_folder = album_from_folder
self.ignore_tags = ignore_tags
self.tags_keys = self.get_tags()
self.exif_metadata = None self.exif_metadata = None
self.ignore_tags = ignore_tags
self.interactive = interactive self.interactive = interactive
self.metadata = None
self.logger = logger self.logger = logger
self.metadata = None
self.tags_keys = self.get_tags()
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
self.theme = request.load_theme() self.theme = request.load_theme()
@ -203,7 +206,27 @@ class Media():
return None return None
def get_date_taken(self): def _get_date_media_interactive(self, choices, default):
print(f"Date conflict for file: {self.file_path}")
choices_list = [
inquirer.List('date_list',
message=f"Choice appropriate original date",
choices=choices,
default=default
),
]
prompt = [
inquirer.Text('date_custom', message="date"),
]
answers = inquirer.prompt(choices_list, theme=self.theme)
if not answers['date_list']:
answers = inquirer.prompt(prompt, theme=self.theme)
return get_date_from_string(answers['date_custom'])
else:
return answers['date_list']
def get_date_media(self):
''' '''
Get the date taken from self.metadata or filename Get the date taken from self.metadata or filename
:returns: datetime or None. :returns: datetime or None.
@ -213,42 +236,78 @@ class Media():
basename = os.path.splitext(self.metadata['filename'])[0] basename = os.path.splitext(self.metadata['filename'])[0]
date_original = self.metadata['date_original'] date_original = self.metadata['date_original']
if self.metadata['original_name'] is not None: if self.metadata['original_name']:
date_filename = get_date_from_string(self.metadata['original_name']) date_filename = get_date_from_string(self.metadata['original_name'])
else: else:
date_filename = get_date_from_string(basename) date_filename = get_date_from_string(basename)
date_original = self.metadata['date_original']
date_created = self.metadata['date_created'] date_created = self.metadata['date_created']
if self.metadata['date_original'] is not None: date_modified = self.metadata['date_modified']
if (date_filename is not None and if self.metadata['date_original']:
date_filename != date_original): if (date_filename and date_filename != date_original):
self.logger.warn(f"{basename} time mark is different from {date_original}") self.logger.warning(f"{basename} time mark is different from {date_original}")
# TODO ask for keep date taken, filename time, or neither if self.interactive:
# Ask for keep date taken, filename time, or neither
choices = [
(f"date original:'{date_original}'", date_original),
(f"date filename:'{date_filename}'", date_filename),
("custom", None),
]
default = f'{date_original}'
return self._get_date_media_interactive(choices, default)
return self.metadata['date_original'] return self.metadata['date_original']
elif True:
if date_filename is not None: self.logger.warning(f"could not find original date for {self.file_path}")
if date_created is not None and date_filename > date_created:
self.logger.warn(f"{basename} time mark is more recent than {date_created}") if self.use_date_filename and date_filename:
self.logger.info(f"use date from filename:{date_filename} for {self.file_path}")
if date_created and date_filename > date_created:
self.logger.warning(f"{basename} time mark is more recent than {date_created}")
if self.interactive:
choices = [
(f"date filename:'{date_filename}'", date_filename),
(f"date created:'{date_created}'", date_created),
("custom", None),
]
default = date_filename
return self._get_date_media_interactive(choices, default)
return date_filename return date_filename
if True:
# TODO warm and ask for confirmation elif self.use_file_dates:
if date_created is not None: if date_created:
self.logger.warning(f"use date created:{date_created} for {self.file_path}")
return date_created return date_created
elif self.metadata['date_modified'] is not None: elif date_modified:
return self.metadata['date_modified'] self.logger.warning(f"use date modified:{date_modified} for {self.file_path}")
return date_modified
elif self.interactive:
choices = []
if date_filename:
choices.append((f"date filename:'{date_filename}'",
date_filename))
if date_created:
choices.append((f"date created:'{date_created}'", date_created))
if date_modified:
choices.append((f"date modified:'{date_modified}'", date_modified))
choices.append(("custom", None))
default = date_filename
return self._get_date_media_interactive(choices, default)
def get_exif_metadata(self): def get_exif_metadata(self):
# Get metadata from exiftool. # Get metadata from exiftool.
self.exif_metadata = ExifToolCaching(self.file_path, logger=self.logger).asdict() self.exif_metadata = ExifToolCaching(self.file_path, logger=self.logger).asdict()
def _set_album(self, album, folder): def _set_album(self, album, folder):
print(f"Conflict for file: {self.file_path}") print(f"Metadata conflict for file: {self.file_path}")
choices_list = [ choices_list = [
inquirer.List('album', inquirer.List('album',
message=f"Exif album is already set to {album}, choices", message=f"Exif album is already set to {album}, choices",
choices=[ choices=[
(f"album:'{album}'", f'{album}'), (f"album:'{album}'", album),
(f"folder:'{folder}'", f'{folder}'), (f"folder:'{folder}'", folder),
("custom", None), ("custom", None),
], ],
default=f'{album}' default=f'{album}'
@ -299,7 +358,12 @@ class Media():
self.metadata['src_path'] = self.root self.metadata['src_path'] = self.root
self.metadata['subdirs'] = self.subdirs self.metadata['subdirs'] = self.subdirs
self.metadata['filename'] = self.filename self.metadata['filename'] = self.filename
self.metadata['date_taken'] = self.get_date_taken()
original_name = self.metadata['original_name']
if not original_name or original_name == '':
self.set_value('original_name', self.filename)
self.metadata['date_media'] = self.get_date_media()
if self.album_from_folder: if self.album_from_folder:
album = self.metadata['album'] album = self.metadata['album']
@ -307,10 +371,6 @@ class Media():
if album and album != '': if album and album != '':
if self.interactive: if self.interactive:
answer = self._set_album(album, folder) answer = self._set_album(album, folder)
# print(f"Conflict for file: {self.file_path}")
# print(f"Exif album is already set to '{album}'', folder='{folder}'")
# i = f"Choice for 'album': (a) '{album}', (f) '{folder}', (c) custom ?\n"
# answer = input(i)
if answer == 'c': if answer == 'c':
self.metadata['album'] = input('album=') self.metadata['album'] = input('album=')
self.set_value('album', folder) self.set_value('album', folder)
@ -321,6 +381,7 @@ class Media():
if not album or album == '': if not album or album == '':
self.metadata['album'] = folder self.metadata['album'] = folder
self.set_value('album', folder)
loc_keys = ('latitude', 'longitude', 'city', 'state', 'country', 'default') loc_keys = ('latitude', 'longitude', 'city', 'state', 'country', 'default')
location_id = None location_id = None
@ -388,7 +449,7 @@ class Media():
""" """
return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value) return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
def set_date_taken(self, date_key, time): def set_date_media(self, date_key, time):
"""Set the date/time a photo was taken. """Set the date/time a photo was taken.
:param datetime time: datetime object of when the photo was taken :param datetime time: datetime object of when the photo was taken

View File

@ -2,6 +2,7 @@
from datetime import datetime from datetime import datetime
import os import os
import pytest import pytest
import shutil
import sqlite3 import sqlite3
from pathlib import Path from pathlib import Path
import re import re
@ -15,7 +16,7 @@ from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exif
from ordigi.collection import Collection from ordigi.collection import Collection
from ordigi.geolocation import GeoLocation from ordigi.geolocation import GeoLocation
from ordigi.media import Media from ordigi.media import Media
from ordigi.utils import get_date_from_string, get_date_regex from ordigi.utils import get_date_regex
class TestCollection: class TestCollection:
@ -34,7 +35,8 @@ class TestCollection:
Test all parts Test all parts
""" """
# Item to search for: # Item to search for:
collection = Collection(tmp_path, self.path_format) collection = Collection(tmp_path, self.path_format,
use_date_filename=True, use_file_dates=True)
items = collection.get_items() items = collection.get_items()
masks = [ masks = [
'{album}', '{album}',
@ -58,7 +60,8 @@ class TestCollection:
] ]
for file_path in self.file_paths: for file_path in self.file_paths:
media = Media(file_path, self.src_path) media = Media(file_path, self.src_path, use_date_filename=True,
use_file_dates=True)
subdirs = file_path.relative_to(self.src_path).parent subdirs = file_path.relative_to(self.src_path).parent
exif_tags = {} exif_tags = {}
for key in ('album', 'camera_make', 'camera_model', 'latitude', for key in ('album', 'camera_make', 'camera_model', 'latitude',
@ -79,6 +82,8 @@ class TestCollection:
if item == 'basename': if item == 'basename':
assert part == file_path.stem, file_path assert part == file_path.stem, file_path
elif item == 'date': elif item == 'date':
if part == '':
media.get_date_media()
assert datetime.strptime(part, mask[1:-1]) assert datetime.strptime(part, mask[1:-1])
elif item == 'folder': elif item == 'folder':
assert part == subdirs.name, file_path assert part == subdirs.name, file_path
@ -107,32 +112,6 @@ class TestCollection:
else: else:
assert part == '', file_path assert part == '', file_path
def test_get_date_taken(self, tmp_path):
collection = Collection(tmp_path, self.path_format)
for file_path in self.file_paths:
exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(file_path, self.src_path)
metadata = media.get_metadata()
date_taken = media.get_date_taken()
date_filename = None
for tag in media.tags_keys['original_name']:
if tag in exif_data:
date_filename = get_date_from_string(exif_data[tag])
break
if not date_filename:
date_filename = get_date_from_string(file_path.name)
if media.metadata['date_original']:
assert date_taken == media.metadata['date_original']
elif date_filename:
assert date_taken == date_filename
elif media.metadata['date_created']:
assert date_taken == media.metadata['date_created']
elif media.metadata['date_modified']:
assert date_taken == media.metadata['date_modified']
def test_sort_files(self, tmp_path): def test_sort_files(self, tmp_path):
collection = Collection(tmp_path, self.path_format, album_from_folder=True) collection = Collection(tmp_path, self.path_format, album_from_folder=True)
loc = GeoLocation() loc = GeoLocation()
@ -182,6 +161,7 @@ class TestCollection:
assert src_path.exists() assert src_path.exists()
else: else:
assert not src_path.exists() assert not src_path.exists()
shutil.copyfile(dest_path, src_path)
# TODO check for conflicts # TODO check for conflicts
@ -189,7 +169,9 @@ class TestCollection:
# TODO check date # TODO check date
def test__get_files_in_path(self, tmp_path): def test__get_files_in_path(self, tmp_path):
collection = Collection(tmp_path, self.path_format, exclude='**/*.dng') collection = Collection(tmp_path, self.path_format,
exclude={'**/*.dng',},
use_date_filename=True, use_file_dates=True)
paths = [x for x in collection._get_files_in_path(self.src_path, paths = [x for x in collection._get_files_in_path(self.src_path,
maxlevel=1, glob='**/photo*')] maxlevel=1, glob='**/photo*')]
assert len(paths) == 6 assert len(paths) == 6

View File

@ -10,6 +10,7 @@ from ordigi import constants
from ordigi.media import Media from ordigi.media import Media
from ordigi.images import Images from ordigi.images import Images
from ordigi.exiftool import ExifTool, ExifToolCaching from ordigi.exiftool import ExifTool, ExifToolCaching
from ordigi.utils import get_date_from_string
ORDIGI_PATH = Path(__file__).parent.parent ORDIGI_PATH = Path(__file__).parent.parent
CACHING = True CACHING = True
@ -70,6 +71,33 @@ class TestMetadata:
if has_exif_data == False: if has_exif_data == False:
assert not media.has_exif_data() assert not media.has_exif_data()
def test_get_date_media(self):
# collection = Collection(tmp_path, self.path_format,
# use_date_filename=True, use_file_dates=True)
for file_path in self.file_paths:
exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(file_path, self.src_path, use_date_filename=True,
use_file_dates=True)
metadata = media.get_metadata()
date_media = media.get_date_media()
date_filename = None
for tag in media.tags_keys['original_name']:
if tag in exif_data:
date_filename = get_date_from_string(exif_data[tag])
break
if not date_filename:
date_filename = get_date_from_string(file_path.name)
if media.metadata['date_original']:
assert date_media == media.metadata['date_original']
elif date_filename:
assert date_media == date_filename
elif media.metadata['date_created']:
assert date_media == media.metadata['date_created']
elif media.metadata['date_modified']:
assert date_media == media.metadata['date_modified']
# Will be changed to get_metadata # Will be changed to get_metadata
# check if metatadata type are correct # check if metatadata type are correct