ordigi/ordigi/media.py

443 lines
15 KiB
Python
Raw Normal View History

2015-10-07 08:47:51 +02:00
"""
2021-08-14 21:31:37 +02:00
Media :class:`Media` class to get file metadata
2015-10-07 08:47:51 +02:00
"""
2021-08-14 21:31:37 +02:00
import logging
2021-06-20 19:51:21 +02:00
import mimetypes
import os
2015-10-07 08:47:51 +02:00
# load modules
from dateutil.parser import parse
import re
2021-08-13 21:11:24 +02:00
from ordigi.exiftool import ExifTool, ExifToolCaching
2021-09-12 07:39:37 +02:00
from ordigi.utils import get_date_from_string
2021-06-20 19:51:21 +02:00
class Media():
"""The media class for all media objects.
2016-01-08 23:49:06 +01:00
2021-08-14 21:31:37 +02:00
:param str file_path: The fully qualified path to the media file.
2015-10-07 08:47:51 +02:00
"""
2016-01-08 23:49:06 +01:00
2016-02-12 20:22:26 +01:00
d_coordinates = {
'latitude': 'latitude_ref',
'longitude': 'longitude_ref'
}
2016-02-12 20:22:26 +01:00
2021-06-20 19:51:21 +02:00
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
extensions = PHOTO + AUDIO + VIDEO
2021-09-18 22:06:34 +02:00
def __init__(self, file_path, root, album_from_folder=False, ignore_tags=set(),
2021-09-12 07:39:37 +02:00
interactive=False, logger=logging.getLogger()):
2021-09-18 22:06:34 +02:00
"""
:params: Path, Path, bool, set, bool, Logger
"""
self.file_path = str(file_path)
self.root = str(root)
self.subdirs = str(file_path.relative_to(root).parent)
self.folder = str(file_path.parent.name)
self.filename = str(file_path.name)
2021-09-12 07:39:37 +02:00
self.album_from_folder = album_from_folder
self.ignore_tags = ignore_tags
self.tags_keys = self.get_tags()
self.exif_metadata = None
2021-09-12 07:39:37 +02:00
self.interactive = interactive
self.metadata = None
self.logger = logger
def get_tags(self):
tags_keys = {}
tags_keys['date_original'] = [
2021-06-20 19:51:21 +02:00
'EXIF:DateTimeOriginal',
'H264:DateTimeOriginal',
'QuickTime:ContentCreateDate'
]
tags_keys['date_created'] = [
2021-06-20 19:51:21 +02:00
'EXIF:CreateDate',
'QuickTime:CreationDate',
'QuickTime:CreateDate',
'QuickTime:CreationDate-und-US',
'QuickTime:MediaCreateDate'
]
tags_keys['date_modified'] = [
'File:FileModifyDate',
'QuickTime:ModifyDate'
]
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
tags_keys['title'] = ['XMP:Title', 'XMP:DisplayName']
tags_keys['latitude'] = [
2021-06-20 19:51:21 +02:00
'EXIF:GPSLatitude',
'XMP:GPSLatitude',
# 'QuickTime:GPSLatitude',
'Composite:GPSLatitude'
]
tags_keys['longitude'] = [
2021-06-20 19:51:21 +02:00
'EXIF:GPSLongitude',
'XMP:GPSLongitude',
# 'QuickTime:GPSLongitude',
'Composite:GPSLongitude'
]
tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef']
tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef']
tags_keys['original_name'] = ['XMP:OriginalFileName']
# Remove ignored tag from list
for tag_regex in self.ignore_tags:
ignored_tags = set()
for key, tags in tags_keys.items():
for n, tag in enumerate(tags):
if re.match(tag_regex, tag):
del(tags_keys[key][n])
return tags_keys
def _del_ignored_tags(self, exif_metadata):
for tag_regex in self.ignore_tags:
ignored_tags = set()
for tag in exif_metadata:
if re.search(tag_regex, tag) is not None:
ignored_tags.add(tag)
for ignored_tag in ignored_tags:
del exif_metadata[ignored_tag]
2021-06-20 19:51:21 +02:00
def get_mimetype(self):
"""Get the mimetype of the file.
2021-06-20 19:51:21 +02:00
:returns: str or None
2021-06-20 19:51:21 +02:00
"""
2021-08-14 21:31:37 +02:00
mimetype = mimetypes.guess_type(self.file_path)
if(mimetype is None):
return None
2021-06-20 19:51:21 +02:00
return mimetype[0]
2021-06-20 19:51:21 +02:00
def _get_key_values(self, key):
"""Get the first value of a tag set
2021-06-20 19:51:21 +02:00
:returns: str or None if no exif tag
2021-06-20 19:51:21 +02:00
"""
if self.exif_metadata is None:
2021-06-20 19:51:21 +02:00
return None
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
yield self.exif_metadata[tag]
2021-06-20 19:51:21 +02:00
def get_value(self, tag):
"""Get given value from EXIF.
2021-06-20 19:51:21 +02:00
:returns: str or None
2021-06-20 19:51:21 +02:00
"""
2021-09-12 07:39:37 +02:00
if self.exif_metadata is None:
return None
2021-09-12 07:39:37 +02:00
if(tag not in self.exif_metadata):
2021-06-20 19:51:21 +02:00
return None
2021-09-12 07:39:37 +02:00
return self.exif_metadata[tag]
2021-06-20 19:51:21 +02:00
def get_date_format(self, value):
"""Formate date attribute.
:returns: datetime object or None
2021-06-20 19:51:21 +02:00
"""
# We need to parse a string to datetime format.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
# in %Y:%m:%d %H:%M:%S format
if value is None:
2021-06-20 19:51:21 +02:00
return None
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if(re.match(regex , value) is not None): # noqa
value = re.sub(regex , r'\g<1>-\g<2>-\g<3>', value)
return parse(value)
except BaseException or dateutil.parser._parser.ParserError as e:
2021-08-31 16:18:41 +02:00
self.logger.warning(e.args, value)
2021-06-20 19:51:21 +02:00
return None
def get_coordinates(self, key, value):
"""Get latitude or longitude value
2021-06-20 19:51:21 +02:00
:param str key: Type of coordinate to get. Either "latitude" or
"longitude".
:returns: float or None
2021-06-20 19:51:21 +02:00
"""
if value is None:
return None
2021-06-20 19:51:21 +02:00
if isinstance(value, str) and len(value) == 0:
# If exiftool GPS output is empty, the data returned will be a str
# with 0 length.
# https://github.com/jmathai/elodie/issues/354
return None
2021-06-20 19:51:21 +02:00
# Cast coordinate to a float due to a bug in exiftool's
# -json output format.
# https://github.com/jmathai/elodie/issues/171
# http://u88.n24.queensu.ca/exiftool/forum/index.php/topic,7952.0.html # noqa
this_coordinate = float(value)
2021-06-20 19:51:21 +02:00
direction_multiplier = 1.0
# when self.set_gps_ref != True
if key == 'latitude':
if 'EXIF:GPSLatitudeRef' in self.exif_metadata:
if self.exif_metadata['EXIF:GPSLatitudeRef'] == 'S':
direction_multiplier = -1.0
elif key == 'longitude':
if 'EXIF:GPSLongitudeRef' in self.exif_metadata:
if self.exif_metadata['EXIF:GPSLongitudeRef'] == 'W':
direction_multiplier = -1.0
return this_coordinate * direction_multiplier
2021-06-20 19:51:21 +02:00
return None
2021-06-20 19:51:21 +02:00
2021-09-12 07:39:37 +02:00
def get_date_taken(self):
'''
Get the date taken from self.metadata or filename
:returns: datetime or None.
'''
if self.metadata is None:
return None
basename = os.path.splitext(self.metadata['filename'])[0]
date_original = self.metadata['date_original']
if self.metadata['original_name'] is not None:
date_filename = get_date_from_string(self.metadata['original_name'])
else:
date_filename = get_date_from_string(basename)
date_created = self.metadata['date_created']
if self.metadata['date_original'] is not None:
if (date_filename is not None and
date_filename != date_original):
self.logger.warn(f"{basename} time mark is different from {date_original}")
# TODO ask for keep date taken, filename time, or neither
return self.metadata['date_original']
elif True:
if date_filename is not None:
if date_created is not None and date_filename > date_created:
self.logger.warn(f"{basename} time mark is more recent than {date_created}")
return date_filename
if True:
# TODO warm and ask for confirmation
if date_created is not None:
return date_created
elif self.metadata['date_modified'] is not None:
return self.metadata['date_modified']
def get_exif_metadata(self):
# Get metadata from exiftool.
self.exif_metadata = ExifToolCaching(self.file_path, logger=self.logger).asdict()
2021-08-31 16:18:41 +02:00
def get_metadata(self, loc=None, db=None, cache=False):
"""Get a dictionary of metadata from exif.
All keys will be present and have a value of None if not obtained.
2021-06-20 19:51:21 +02:00
:returns: dict
"""
2021-09-12 07:39:37 +02:00
self.get_exif_metadata()
2021-06-20 19:51:21 +02:00
self.metadata = {}
# Retrieve selected metadata to dict
if not self.exif_metadata:
return self.metadata
2021-06-20 19:51:21 +02:00
for key in self.tags_keys:
formated_data = None
for value in self._get_key_values(key):
if 'date' in key:
formated_data = self.get_date_format(value)
elif key in ('latitude', 'longitude'):
formated_data = self.get_coordinates(key, value)
else:
if value is not None and value != '':
formated_data = value
else:
formated_data = None
if formated_data:
# Use this data and break
break
self.metadata[key] = formated_data
2021-09-18 22:06:34 +02:00
self.metadata['src_path'] = self.root
2021-09-12 07:39:37 +02:00
self.metadata['subdirs'] = self.subdirs
self.metadata['filename'] = self.filename
self.metadata['date_taken'] = self.get_date_taken()
if self.album_from_folder:
album = self.metadata['album']
2021-09-18 22:06:34 +02:00
folder = self.folder
2021-09-12 07:39:37 +02:00
if album and album != '':
if self.interactive:
print(f"Conflict for file: {self.file_path}")
print(f"Exif album is already set to '{album}'', folder='{folder}'")
i = f"Choice for 'album': (a) '{album}', (f) '{folder}', (c) custom ?\n"
answer = input(i)
if answer == 'c':
self.metadata['album'] = input('album=')
self.set_value('album', folder)
if answer == 'a':
self.metadata['album'] = album
elif answer == 'f':
self.metadata['album'] = folder
if not album or album == '':
self.metadata['album'] = folder
2021-08-31 16:18:41 +02:00
loc_keys = ('latitude', 'longitude', 'city', 'state', 'country', 'default')
location_id = None
if cache and db:
2021-09-12 07:39:37 +02:00
location_id = db.get_metadata_data(self.file_path, 'LocationId')
2021-08-31 16:18:41 +02:00
if location_id:
for key in loc_keys:
# use str to convert non string format data like latitude and
# longitude
self.metadata[key] = str(db.get_location(location_id, key.capitalize()))
elif loc:
place_name = loc.place_name(
self.metadata['latitude'],
self.metadata['longitude'],
self.logger
)
for key in ('city', 'state', 'country', 'default'):
# mask = 'city'
# place_name = {'default': u'Sunnyvale', 'city-random': u'Sunnyvale'}
if(key in place_name):
self.metadata[key] = place_name[key]
else:
self.metadata[key] = None
else:
for key in loc_keys:
self.metadata[key] = None
self.metadata['location_id'] = location_id
2021-06-20 19:51:21 +02:00
return self.metadata
2021-06-20 19:51:21 +02:00
def has_exif_data(self):
"""Check if file has metadata, date original"""
if not self.metadata:
return False
2021-06-20 19:51:21 +02:00
if 'date_original' in self.metadata:
if self.metadata['date_original'] != None:
return True
2021-06-20 19:51:21 +02:00
return False
2021-06-20 19:51:21 +02:00
@classmethod
2021-08-08 13:02:15 +02:00
def get_class_by_file(cls, _file, classes, ignore_tags=set(), logger=logging.getLogger()):
2021-06-20 19:51:21 +02:00
"""Static method to get a media object by file.
"""
2021-08-14 21:31:37 +02:00
if not os.path.isfile(_file):
2021-06-20 19:51:21 +02:00
return None
extension = os.path.splitext(_file)[1][1:].lower()
if len(extension) > 0:
for i in classes:
if(extension in i.extensions):
2021-08-14 21:31:37 +02:00
return i(_file, ignore_tags=ignore_tags, logger=logger)
2021-06-20 19:51:21 +02:00
2021-08-14 21:31:37 +02:00
return Media(_file, logger, ignore_tags=ignore_tags, logger=logger)
2021-06-20 19:51:21 +02:00
2021-08-31 16:18:41 +02:00
def set_value(self, tag, value):
"""Set value of a tag.
:returns: value (str)
"""
2021-09-18 22:06:34 +02:00
return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
2021-08-31 16:18:41 +02:00
def set_date_taken(self, date_key, time):
"""Set the date/time a photo was taken.
:param datetime time: datetime object of when the photo was taken
:returns: bool
"""
if(time is None):
return False
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
status = self.set_value('date_original', formatted_time)
2021-06-20 08:35:28 +02:00
if status == False:
# exif attribute date_original d'ont exist
status = self.set_value('date_created', formatted_time)
return status
def set_coordinates(self, latitude, longitude):
status = []
if self.metadata['latitude_ref']:
latitude = abs(latitude)
if latitude > 0:
status.append(self.set_value('latitude_ref', 'N'))
else:
status.append(self.set_value('latitude_ref', 'S'))
2021-06-20 19:51:21 +02:00
status.append(self.set_value('latitude', latitude))
if self.metadata['longitude_ref']:
longitude = abs(longitude)
if longitude > 0:
status.append(self.set_value('latitude_ref', 'E'))
else:
status.append(self.set_value('longitude_ref', 'W'))
status.append(self.set_value('longitude', longitude))
2021-06-20 19:51:21 +02:00
if all(status):
return True
else:
return False
2021-06-20 19:51:21 +02:00
2021-09-12 07:39:37 +02:00
def set_album_from_folder(self):
"""Set the album attribute based on the leaf folder name
:returns: bool
"""
2021-09-18 22:06:34 +02:00
return self.set_value('album', self.folder)
2021-06-20 19:51:21 +02:00
def get_all_subclasses(cls=None):
"""Module method to get all subclasses of Media.
"""
subclasses = set()
this_class = Media
if cls is not None:
this_class = cls
subclasses.add(this_class)
this_class_subclasses = this_class.__subclasses__()
for child_class in this_class_subclasses:
subclasses.update(get_all_subclasses(child_class))
return subclasses
2021-07-16 21:26:42 +02:00
2021-08-08 13:02:15 +02:00
def get_media_class(_file, ignore_tags=set(), logger=logging.getLogger()):
2021-07-16 21:26:42 +02:00
if not os.path.exists(_file):
2021-08-08 13:02:15 +02:00
logger.warning(f'Could not find {_file}')
logger.error(f'Could not find {_file}')
2021-07-16 21:26:42 +02:00
return False
2021-08-08 13:02:15 +02:00
media = Media.get_class_by_file(_file, get_all_subclasses(),
ignore_tags=set(), logger=logger)
2021-07-16 21:26:42 +02:00
if not media:
2021-08-08 13:02:15 +02:00
logger.warning(f'File{_file} is not supported')
logger.error(f'File {_file} can\'t be imported')
2021-07-16 21:26:42 +02:00
return False
return media