ordigi/ordigi/media.py

716 lines
23 KiB
Python
Raw Normal View History

2021-08-14 21:31:37 +02:00
import logging
2021-06-20 19:51:21 +02:00
import mimetypes
import os
import re
import sys
2021-11-07 09:44:43 +01:00
from dateutil import parser
import inquirer
2021-08-13 21:11:24 +02:00
from ordigi.exiftool import ExifTool, ExifToolCaching
from ordigi import utils
from ordigi import request
2021-11-08 07:02:21 +01:00
class ExifMetadata:
2021-11-08 07:02:21 +01:00
def __init__(self, file_path, ignore_tags=None):
self.file_path = file_path
2021-11-08 07:02:21 +01:00
if ignore_tags is None:
ignore_tags = set()
self.ignore_tags = ignore_tags
self.tags_keys = self.get_tags()
def get_tags(self) -> dict:
"""Get exif tags groups in dict"""
tags_keys = {}
tags_keys['date_original'] = [
'EXIF:DateTimeOriginal',
'H264:DateTimeOriginal',
'QuickTime:ContentCreateDate',
]
tags_keys['date_created'] = [
'EXIF:CreateDate',
'QuickTime:CreationDate',
'QuickTime:CreateDate',
'QuickTime:CreationDate-und-US',
'QuickTime:MediaCreateDate',
]
tags_keys['date_modified'] = ['File:FileModifyDate', 'QuickTime:ModifyDate']
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
tags_keys['title'] = ['XMP:Title', 'XMP:DisplayName']
tags_keys['latitude'] = [
'EXIF:GPSLatitude',
'XMP:GPSLatitude',
# 'QuickTime:GPSLatitude',
'Composite:GPSLatitude',
]
tags_keys['longitude'] = [
'EXIF:GPSLongitude',
'XMP:GPSLongitude',
# 'QuickTime:GPSLongitude',
'Composite:GPSLongitude',
]
tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef']
tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef']
tags_keys['original_name'] = ['XMP:OriginalFileName']
# Remove ignored tag from list
for tag_regex in self.ignore_tags:
for key, tags in tags_keys.items():
for i, tag in enumerate(tags):
if re.match(tag_regex, tag):
del tags_keys[key][i]
return tags_keys
class ReadExif(ExifMetadata):
2021-11-11 16:24:37 +01:00
"""Read exif metadata to file"""
2021-11-08 07:02:21 +01:00
def __init__(
self,
file_path,
exif_metadata=None,
ignore_tags=None,
logger=logging.getLogger(),
):
super().__init__(file_path, ignore_tags)
# Options
self.logger = logger.getChild(self.__class__.__name__)
if exif_metadata:
self.exif_metadata = exif_metadata
else:
self.exif_metadata = self.get_exif_metadata()
def get_exif_metadata(self):
"""Get metadata from exiftool."""
return ExifToolCaching(self.file_path, logger=self.logger).asdict()
2021-11-11 16:24:37 +01:00
def get_key_values(self, key):
"""
Get the first value of a tag set
:returns: str or None if no exif tag
"""
if self.exif_metadata is None:
return None
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
yield self.exif_metadata[tag]
2021-11-11 16:24:37 +01:00
def get_coordinates(self, key, value):
"""Get latitude or longitude value
:param str key: Type of coordinate to get. Either "latitude" or
"longitude".
:returns: float or None
"""
2021-11-11 16:24:37 +01:00
if value is None:
return None
2021-11-11 16:24:37 +01:00
if isinstance(value, str) and len(value) == 0:
# If exiftool GPS output is empty, the data returned will be a str
# with 0 length.
# https://github.com/jmathai/elodie/issues/354
return None
2021-11-11 16:24:37 +01:00
# Cast coordinate to a float due to a bug in exiftool's
# -json output format.
# https://github.com/jmathai/elodie/issues/171
# http://u88.n24.queensu.ca/exiftool/forum/index.php/topic,7952.0.html # noqa
this_coordinate = float(value)
direction_multiplier = 1.0
# when self.set_gps_ref != True
if key == 'latitude':
if 'EXIF:GPSLatitudeRef' in self.exif_metadata:
if self.exif_metadata['EXIF:GPSLatitudeRef'] == 'S':
direction_multiplier = -1.0
elif key == 'longitude':
if 'EXIF:GPSLongitudeRef' in self.exif_metadata:
if self.exif_metadata['EXIF:GPSLongitudeRef'] == 'W':
direction_multiplier = -1.0
return this_coordinate * direction_multiplier
class WriteExif(ExifMetadata):
2021-11-11 16:24:37 +01:00
"""Write exif metadata to file"""
def __init__(
self,
file_path,
metadata,
ignore_tags=None,
logger=logging.getLogger(),
2021-11-08 07:02:21 +01:00
):
super().__init__(file_path, ignore_tags)
self.metadata = metadata
2021-11-08 07:02:21 +01:00
self.logger = logger.getChild(self.__class__.__name__)
def set_value(self, tag, value):
"""Set value of a tag.
:returns: value (str)
"""
2021-11-11 16:24:37 +01:00
# TODO overwrite mode check if fail
2021-11-08 07:02:21 +01:00
return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
def set_key_values(self, key, value):
"""Set tags values for given key"""
status = True
for tag in self.tags_keys[key]:
if not self.set_value(tag, value):
status = False
2021-11-08 07:02:21 +01:00
return status
def set_date_media(self, time):
"""
Set the date/time a photo was taken.
:param datetime time: datetime object of when the photo was taken
:returns: bool
"""
if time is None:
return False
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
status = self.set_value('date_original', formatted_time)
if not status:
2021-11-08 07:02:21 +01:00
# exif attribute date_original d'ont exist
status = self.set_value('date_created', formatted_time)
return status
def set_coordinates(self, latitude, longitude):
status = []
if self.metadata['latitude_ref']:
latitude = abs(latitude)
if latitude > 0:
status.append(self.set_value('latitude_ref', 'N'))
else:
status.append(self.set_value('latitude_ref', 'S'))
status.append(self.set_value('latitude', latitude))
if self.metadata['longitude_ref']:
longitude = abs(longitude)
if longitude > 0:
status.append(self.set_value('latitude_ref', 'E'))
else:
status.append(self.set_value('longitude_ref', 'W'))
status.append(self.set_value('longitude', longitude))
if all(status):
return True
2021-11-11 16:24:37 +01:00
return False
2021-11-08 07:02:21 +01:00
def set_album_from_folder(self):
"""Set the album attribute based on the leaf folder name
:returns: bool
"""
2021-11-11 16:24:37 +01:00
# TODO use tag key
return self.set_value('Album', self.file_path.parent.name)
2021-11-08 07:02:21 +01:00
class Media(ReadExif):
2021-11-07 09:44:43 +01:00
"""
2021-11-08 07:02:21 +01:00
Extract matadatas from exiftool and sort them to dict structure
2015-10-07 08:47:51 +02:00
"""
2016-01-08 23:49:06 +01:00
d_coordinates = {'latitude': 'latitude_ref', 'longitude': 'longitude_ref'}
2016-02-12 20:22:26 +01:00
def __init__(
self,
file_path,
2021-10-23 07:51:53 +02:00
src_dir,
album_from_folder=False,
2021-11-03 21:29:06 +01:00
ignore_tags=None,
interactive=False,
logger=logging.getLogger(),
use_date_filename=False,
use_file_dates=False,
):
super().__init__(
file_path,
ignore_tags=ignore_tags,
logger=logger,
)
2021-10-23 07:51:53 +02:00
self.src_dir = src_dir
2021-09-12 07:39:37 +02:00
self.album_from_folder = album_from_folder
self.interactive = interactive
2021-10-16 19:29:52 +02:00
self.logger = logger.getChild(self.__class__.__name__)
2021-11-11 16:24:37 +01:00
self.metadata = None
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
self.theme = request.load_theme()
2021-06-20 19:51:21 +02:00
def get_mimetype(self):
"""
Get the mimetype of the file.
:returns: str or None
2021-06-20 19:51:21 +02:00
"""
2021-11-11 16:24:37 +01:00
# TODO add to metadata
2021-08-14 21:31:37 +02:00
mimetype = mimetypes.guess_type(self.file_path)
if mimetype is None:
return None
2021-06-20 19:51:21 +02:00
return mimetype[0]
2021-06-20 19:51:21 +02:00
def get_date_format(self, value):
"""
Formatting date attribute.
:returns: datetime object or None
"""
# We need to parse a string to datetime format.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
# in %Y:%m:%d %H:%M:%S format
if value is None:
return None
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if re.match(regex, value) is not None: # noqa
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value)
return parser.parse(value)
except BaseException or parser._parser.ParserError as e:
self.logger.warning(e.args, value)
return None
def _get_date_media_interactive(self, choices, default):
print(f"Date conflict for file: {self.file_path}")
choices_list = [
inquirer.List(
'date_list',
2021-11-11 16:24:37 +01:00
message="Choice appropriate original date",
choices=choices,
default=default,
),
]
2021-11-11 16:24:37 +01:00
# import ipdb; ipdb.set_trace()
answers = inquirer.prompt(choices_list, theme=self.theme)
2021-10-23 07:51:53 +02:00
if not answers['date_list']:
2021-10-23 07:51:53 +02:00
prompt = [
inquirer.Text('date_custom', message="date"),
]
answers = inquirer.prompt(prompt, theme=self.theme)
2021-10-23 07:51:53 +02:00
return self.get_date_format(answers['date_custom'])
2021-11-11 16:24:37 +01:00
return answers['date_list']
def get_date_media(self):
2021-09-12 07:39:37 +02:00
'''
Get the date taken from self.metadata or filename
:returns: datetime or None.
'''
if self.metadata is None:
return None
filename = self.metadata['filename']
stem = os.path.splitext(filename)[0]
2021-09-12 07:39:37 +02:00
date_original = self.metadata['date_original']
if self.metadata['original_name']:
2021-10-23 07:51:53 +02:00
date_filename = self.get_date_format(self.metadata['original_name'])
2021-09-12 07:39:37 +02:00
else:
date_filename = self.get_date_format(stem)
2021-09-12 07:39:37 +02:00
date_original = self.metadata['date_original']
2021-09-12 07:39:37 +02:00
date_created = self.metadata['date_created']
date_modified = self.metadata['date_modified']
if self.metadata['date_original']:
if date_filename and date_filename != date_original:
self.logger.warning(
f"{filename} time mark is different from {date_original}"
)
if self.interactive:
# Ask for keep date taken, filename time, or neither
choices = [
(f"date original:'{date_original}'", date_original),
(f"date filename:'{date_filename}'", date_filename),
("custom", None),
]
default = f'{date_original}'
return self._get_date_media_interactive(choices, default)
2021-09-12 07:39:37 +02:00
return self.metadata['date_original']
self.logger.warning(f"could not find original date for {self.file_path}")
if self.use_date_filename and date_filename:
self.logger.info(
f"use date from filename:{date_filename} for {self.file_path}"
)
if date_created and date_filename > date_created:
self.logger.warning(
f"{filename} time mark is more recent than {date_created}"
)
if self.interactive:
choices = [
(f"date filename:'{date_filename}'", date_filename),
(f"date created:'{date_created}'", date_created),
("custom", None),
]
default = date_filename
return self._get_date_media_interactive(choices, default)
return date_filename
2021-11-11 16:24:37 +01:00
if self.use_file_dates:
if date_created:
self.logger.warning(
f"use date created:{date_created} for {self.file_path}"
)
2021-09-12 07:39:37 +02:00
return date_created
2021-11-11 16:24:37 +01:00
if date_modified:
self.logger.warning(
f"use date modified:{date_modified} for {self.file_path}"
)
return date_modified
2021-11-11 16:24:37 +01:00
elif self.interactive:
choices = []
if date_filename:
choices.append((f"date filename:'{date_filename}'", date_filename))
if date_created:
choices.append((f"date created:'{date_created}'", date_created))
if date_modified:
choices.append((f"date modified:'{date_modified}'", date_modified))
choices.append(("custom", None))
default = date_filename
return self._get_date_media_interactive(choices, default)
2021-09-12 07:39:37 +02:00
def _set_album(self, album, folder):
print(f"Metadata conflict for file: {self.file_path}")
choices_list = [
inquirer.List(
'album',
message=f"Exif album is already set to {album}, choices",
choices=[
(f"album:'{album}'", album),
(f"folder:'{folder}'", folder),
("custom", None),
],
default=f'{album}',
),
]
prompt = [
inquirer.Text('custom', message="album"),
]
answers = inquirer.prompt(choices_list, theme=self.theme)
if not answers['album']:
answers = inquirer.prompt(prompt, theme=self.theme)
return answers['custom']
2021-06-20 19:51:21 +02:00
return answers['album']
def _set_metadata_from_exif(self):
"""
Get selected metadata from exif to dict structure
"""
2021-11-07 09:44:43 +01:00
if not self.exif_metadata:
return
2021-11-07 09:44:43 +01:00
for key in self.tags_keys:
formated_data = None
2021-11-11 16:24:37 +01:00
for value in self.get_key_values(key):
if 'date' in key:
formated_data = self.get_date_format(value)
2021-11-07 09:44:43 +01:00
elif key in ('latitude', 'longitude'):
formated_data = self.get_coordinates(key, value)
else:
2021-11-07 09:44:43 +01:00
if value is not None and value != '':
formated_data = value
else:
2021-11-07 09:44:43 +01:00
formated_data = None
if formated_data:
# Use this data and break
break
self.metadata[key] = formated_data
def _set_metadata_from_db(self, db, relpath):
# Get metadata from db
formated_data = None
for key in self.tags_keys:
if key in (
'latitude',
'longitude',
'latitude_ref',
'longitude_ref',
'file_path',
):
continue
label = utils.snake2camel(key)
value = db.get_metadata_data(relpath, label)
if 'date' in key:
formated_data = self.get_date_format(value)
else:
formated_data = value
self.metadata[key] = formated_data
for key in 'src_dir', 'subdirs', 'filename':
label = utils.snake2camel(key)
formated_data = db.get_metadata_data(relpath, label)
self.metadata[key] = formated_data
return db.get_metadata_data(relpath, 'LocationId')
def _check_file(self, db, root):
2021-11-11 16:24:37 +01:00
"""Check if file_path is a subpath of root"""
2021-11-07 09:44:43 +01:00
if str(self.file_path).startswith(str(root)):
relpath = os.path.relpath(self.file_path, root)
db_checksum = db.get_checksum(relpath)
file_checksum = self.metadata['checksum']
# Check if checksum match
if db_checksum and db_checksum != file_checksum:
self.logger.error(f'{self.file_path} checksum has changed')
self.logger.error('(modified or corrupted file).')
self.logger.error(
f'file_checksum={file_checksum},\ndb_checksum={db_checksum}'
)
self.logger.info(
'Use --reset-cache, check database integrity or try to restore the file'
)
# We d'ont want to silently ignore or correct this without
# resetting the cache as is could be due to file corruption
sys.exit(1)
2021-11-07 09:44:43 +01:00
return relpath, db_checksum
2021-11-11 16:24:37 +01:00
return None, None
2021-11-07 09:44:43 +01:00
def _set_location_metadata(self, location_id, db, loc=None):
self.metadata['location_id'] = location_id
2021-09-12 07:39:37 +02:00
loc_keys = (
'latitude',
'longitude',
'latitude_ref',
'longitude_ref',
'city',
'state',
'country',
'default',
)
2021-08-31 16:18:41 +02:00
if location_id:
for key in loc_keys:
# use str to convert non string format data like latitude and
# longitude
self.metadata[key] = str(
db.get_location_data(location_id, utils.snake2camel(key))
)
2021-08-31 16:18:41 +02:00
elif loc:
for key in 'latitude', 'longitude', 'latitude_ref', 'longitude_ref':
self.metadata[key] = None
2021-08-31 16:18:41 +02:00
place_name = loc.place_name(
2021-10-16 19:29:52 +02:00
self.metadata['latitude'], self.metadata['longitude']
2021-08-31 16:18:41 +02:00
)
for key in ('city', 'state', 'country', 'default'):
# mask = 'city'
# place_name = {'default': u'Sunnyvale', 'city-random': u'Sunnyvale'}
if key in place_name:
2021-08-31 16:18:41 +02:00
self.metadata[key] = place_name[key]
else:
self.metadata[key] = None
else:
for key in loc_keys:
self.metadata[key] = None
2021-11-07 09:44:43 +01:00
def _set_album_from_folder(self):
album = self.metadata['album']
folder = self.file_path.parent.name
if album and album != '':
if self.interactive:
answer = self._set_album(album, folder)
if answer == 'c':
self.metadata['album'] = input('album=')
if answer == 'a':
self.metadata['album'] = album
elif answer == 'f':
self.metadata['album'] = folder
if not album or album == '':
self.metadata['album'] = folder
def get_metadata(self, root, loc=None, db=None, cache=False):
2021-11-07 09:44:43 +01:00
"""
Get a dictionary of metadata from exif.
All keys will be present and have a value of None if not obtained.
"""
self.metadata = {}
self.metadata['checksum'] = utils.checksum(self.file_path)
2021-11-07 09:44:43 +01:00
db_checksum = False
location_id = None
if cache and db:
relpath, db_checksum = self._check_file(db, root)
if db_checksum:
location_id = self._set_metadata_from_db(db, relpath)
else:
self.metadata['src_dir'] = str(self.src_dir)
self.metadata['subdirs'] = str(
self.file_path.relative_to(self.src_dir).parent
)
self.metadata['filename'] = self.file_path.name
2021-11-07 09:44:43 +01:00
self._set_metadata_from_exif()
self.metadata['date_media'] = self.get_date_media()
self._set_location_metadata(location_id, db, loc)
if self.album_from_folder:
self._set_album_from_folder()
2021-06-20 19:51:21 +02:00
def has_exif_data(self):
"""Check if file has metadata, date original"""
if not self.metadata:
return False
2021-06-20 19:51:21 +02:00
if 'date_original' in self.metadata:
2021-11-07 09:44:43 +01:00
if self.metadata['date_original']:
return True
2021-06-20 19:51:21 +02:00
return False
2021-06-20 19:51:21 +02:00
class Medias:
"""
Extract matadatas from exiftool in paths and sort them to dict structure
"""
2021-06-20 19:51:21 +02:00
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
2021-06-20 19:51:21 +02:00
extensions = PHOTO + AUDIO + VIDEO
2021-11-08 07:02:21 +01:00
def __init__(
self,
paths,
root,
album_from_folder=False,
cache=False,
db=None,
interactive=False,
ignore_tags=None,
logger=logging.getLogger(),
use_date_filename=False,
use_file_dates=False,
):
# Modules
self.db = db
self.paths = paths
# Arguments
self.root = root
# Options
self.cache = cache
self.album_from_folder = album_from_folder
self.ignore_tags = ignore_tags
self.interactive = interactive
self.logger = logger.getChild(self.__class__.__name__)
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
# Attributes
# List to store medias datas
self.datas = {}
self.theme = request.load_theme()
2021-11-11 16:24:37 +01:00
def get_media(self, file_path, src_dir):
media = Media(
2021-11-08 07:02:21 +01:00
file_path,
src_dir,
self.album_from_folder,
self.ignore_tags,
self.interactive,
self.logger,
self.use_date_filename,
self.use_file_dates,
)
2021-11-11 16:24:37 +01:00
return media
def get_metadata(self, file_path, src_dir, loc=None):
media = self.get_media(file_path, src_dir)
media.get_metadata(self.root, loc, self.db.sqlite,
self.cache)
return media.metadata
def get_metadatas(self, src_dirs, imp=False, loc=None):
2021-11-08 07:02:21 +01:00
"""Get medias data"""
for src_dir in src_dirs:
src_dir = self.paths.check(src_dir)
paths = self.paths.get_paths_list(src_dir)
# Get medias and src_dirs
for src_path in paths:
if self.root not in src_path.parents:
if not imp:
self.logger.error(f"""{src_path} not in {self.root}
collection, use `ordigi import`""")
sys.exit(1)
# Get file metadata
2021-11-11 16:24:37 +01:00
metadata = self.get_metadata(src_path, src_dir, loc)
2021-11-08 07:02:21 +01:00
2021-11-11 16:24:37 +01:00
yield src_path, metadata
2021-11-08 07:02:21 +01:00
def update_exif_data(self, metadata):
file_path = self.root / metadata['file_path']
exif = WriteExif(
file_path,
metadata,
ignore_tags=self.ignore_tags,
logger=self.logger
)
2021-11-08 07:02:21 +01:00
updated = False
if self.album_from_folder:
exif.set_album_from_folder()
updated = True
if metadata['original_name'] in (False, ''):
exif.set_value('original_name', metadata['filename'])
updated = True
if self.album_from_folder:
album = metadata['album']
if album and album != '':
exif.set_value('album', album)
updated = True
if updated:
return True
return False