2015-10-07 08:47:51 +02:00
|
|
|
"""
|
2021-07-30 07:41:02 +02:00
|
|
|
Base :class:`Media` class for media objects that are tracked by Dozo.
|
|
|
|
The Media class provides some base functionality used by all the media types.
|
|
|
|
Sub-classes (:class:`~dozo.media.Audio`, :class:`~dozo.media.Photo`, and :class:`~dozo.media.Video`).
|
2015-10-07 08:47:51 +02:00
|
|
|
"""
|
|
|
|
|
2021-06-20 19:51:21 +02:00
|
|
|
import mimetypes
|
2017-01-22 09:19:44 +01:00
|
|
|
import os
|
2020-01-18 18:03:40 +01:00
|
|
|
import six
|
2021-07-16 21:26:42 +02:00
|
|
|
import logging
|
2017-01-22 09:19:44 +01:00
|
|
|
|
2015-10-07 08:47:51 +02:00
|
|
|
# load modules
|
2021-04-17 05:08:58 +02:00
|
|
|
from dateutil.parser import parse
|
|
|
|
import re
|
2021-08-13 19:09:52 +02:00
|
|
|
from dozo.exiftool import ExifTool, ExifToolCaching
|
2015-10-21 08:51:14 +02:00
|
|
|
|
2021-06-20 19:51:21 +02:00
|
|
|
class Media():
|
|
|
|
|
|
|
|
"""The media class for all media objects.
|
2016-01-08 23:49:06 +01:00
|
|
|
|
|
|
|
:param str source: The fully qualified path to the video file.
|
2015-10-07 08:47:51 +02:00
|
|
|
"""
|
2016-01-08 23:49:06 +01:00
|
|
|
|
|
|
|
__name__ = 'Media'
|
|
|
|
|
2016-02-12 20:22:26 +01:00
|
|
|
d_coordinates = {
|
2016-02-14 09:55:39 +01:00
|
|
|
'latitude': 'latitude_ref',
|
|
|
|
'longitude': 'longitude_ref'
|
|
|
|
}
|
2016-02-12 20:22:26 +01:00
|
|
|
|
2021-06-20 19:51:21 +02:00
|
|
|
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
|
|
|
|
AUDIO = ('m4a',)
|
|
|
|
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
|
|
|
|
|
|
|
|
extensions = PHOTO + AUDIO + VIDEO
|
|
|
|
|
2021-08-08 13:02:15 +02:00
|
|
|
def __init__(self, sources=None, ignore_tags=set(), logger=logging.getLogger()):
|
2021-06-20 19:51:21 +02:00
|
|
|
self.source = sources
|
2021-08-13 19:09:52 +02:00
|
|
|
self.ignore_tags = ignore_tags
|
|
|
|
self.tags_keys = self.get_tags()
|
|
|
|
self.exif_metadata = None
|
|
|
|
self.metadata = None
|
|
|
|
self.logger = logger
|
|
|
|
|
|
|
|
def get_tags(self):
|
|
|
|
tags_keys = {}
|
|
|
|
tags_keys['date_original'] = [
|
2021-06-20 19:51:21 +02:00
|
|
|
'EXIF:DateTimeOriginal',
|
|
|
|
'H264:DateTimeOriginal',
|
|
|
|
'QuickTime:ContentCreateDate'
|
|
|
|
]
|
2021-08-13 19:09:52 +02:00
|
|
|
tags_keys['date_created'] = [
|
2021-06-20 19:51:21 +02:00
|
|
|
'EXIF:CreateDate',
|
|
|
|
'QuickTime:CreationDate',
|
|
|
|
'QuickTime:CreateDate',
|
|
|
|
'QuickTime:CreationDate-und-US',
|
|
|
|
'QuickTime:MediaCreateDate'
|
|
|
|
]
|
2021-08-13 19:09:52 +02:00
|
|
|
tags_keys['date_modified'] = [
|
|
|
|
'File:FileModifyDate',
|
|
|
|
'QuickTime:ModifyDate'
|
|
|
|
]
|
|
|
|
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
|
|
|
|
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
|
|
|
|
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
|
|
|
|
tags_keys['title'] = ['XMP:Title', 'XMP:DisplayName']
|
|
|
|
tags_keys['latitude'] = [
|
2021-06-20 19:51:21 +02:00
|
|
|
'EXIF:GPSLatitude',
|
|
|
|
'XMP:GPSLatitude',
|
|
|
|
# 'QuickTime:GPSLatitude',
|
|
|
|
'Composite:GPSLatitude'
|
|
|
|
]
|
2021-08-13 19:09:52 +02:00
|
|
|
tags_keys['longitude'] = [
|
2021-06-20 19:51:21 +02:00
|
|
|
'EXIF:GPSLongitude',
|
|
|
|
'XMP:GPSLongitude',
|
|
|
|
# 'QuickTime:GPSLongitude',
|
|
|
|
'Composite:GPSLongitude'
|
|
|
|
]
|
2021-08-13 19:09:52 +02:00
|
|
|
tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef']
|
|
|
|
tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef']
|
|
|
|
tags_keys['original_name'] = ['XMP:OriginalFileName']
|
|
|
|
|
|
|
|
# Remove ignored tag from list
|
|
|
|
for tag_regex in self.ignore_tags:
|
|
|
|
ignored_tags = set()
|
|
|
|
for key, tags in tags_keys.items():
|
|
|
|
for n, tag in enumerate(tags):
|
|
|
|
if re.match(tag_regex, tag):
|
|
|
|
del(tags_keys[key][n])
|
|
|
|
|
|
|
|
return tags_keys
|
|
|
|
|
|
|
|
def _del_ignored_tags(self, exif_metadata):
|
|
|
|
for tag_regex in self.ignore_tags:
|
|
|
|
ignored_tags = set()
|
|
|
|
for tag in exif_metadata:
|
|
|
|
if re.search(tag_regex, tag) is not None:
|
|
|
|
ignored_tags.add(tag)
|
|
|
|
for ignored_tag in ignored_tags:
|
|
|
|
del exif_metadata[ignored_tag]
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def get_mimetype(self):
|
|
|
|
"""Get the mimetype of the file.
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
:returns: str or None
|
2021-06-20 19:51:21 +02:00
|
|
|
"""
|
2021-08-13 19:09:52 +02:00
|
|
|
mimetype = mimetypes.guess_type(self.source)
|
|
|
|
if(mimetype is None):
|
|
|
|
return None
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
return mimetype[0]
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def _get_key_values(self, key):
|
|
|
|
"""Get the first value of a tag set
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
:returns: str or None if no exif tag
|
2021-06-20 19:51:21 +02:00
|
|
|
"""
|
2021-08-13 19:09:52 +02:00
|
|
|
if self.exif_metadata is None:
|
2021-06-20 19:51:21 +02:00
|
|
|
return None
|
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
for tag in self.tags_keys[key]:
|
|
|
|
if tag in self.exif_metadata:
|
|
|
|
yield self.exif_metadata[tag]
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def get_value(self, tag):
|
|
|
|
"""Get given value from EXIF.
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
:returns: str or None
|
2021-06-20 19:51:21 +02:00
|
|
|
"""
|
2021-08-13 19:09:52 +02:00
|
|
|
exiftool_attributes = self.get_exiftool_attributes()
|
|
|
|
if exiftool_attributes is None:
|
|
|
|
return None
|
|
|
|
if(tag not in exiftool_attributes):
|
2021-06-20 19:51:21 +02:00
|
|
|
return None
|
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
return exiftool_attributes[tag]
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def get_date_format(self, value):
|
|
|
|
"""Formate date attribute.
|
|
|
|
:returns: datetime object or None
|
2021-06-20 19:51:21 +02:00
|
|
|
"""
|
2021-08-13 19:09:52 +02:00
|
|
|
# We need to parse a string to datetime format.
|
|
|
|
# EXIF DateTimeOriginal and EXIF DateTime are both stored
|
|
|
|
# in %Y:%m:%d %H:%M:%S format
|
|
|
|
if value is None:
|
2021-06-20 19:51:21 +02:00
|
|
|
return None
|
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
try:
|
|
|
|
# correct nasty formated date
|
|
|
|
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
|
|
|
|
if(re.match(regex , value) is not None): # noqa
|
|
|
|
value = re.sub(regex , r'\g<1>-\g<2>-\g<3>', value)
|
|
|
|
return parse(value)
|
|
|
|
except BaseException or dateutil.parser._parser.ParserError as e:
|
|
|
|
self.logger.error(e)
|
2021-06-20 19:51:21 +02:00
|
|
|
return None
|
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def get_coordinates(self, key, value):
|
|
|
|
"""Get latitude or longitude value
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
:param str key: Type of coordinate to get. Either "latitude" or
|
|
|
|
"longitude".
|
|
|
|
:returns: float or None
|
2021-06-20 19:51:21 +02:00
|
|
|
"""
|
2021-08-13 19:09:52 +02:00
|
|
|
if value is None:
|
|
|
|
return None
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
if isinstance(value, str) and len(value) == 0:
|
|
|
|
# If exiftool GPS output is empty, the data returned will be a str
|
|
|
|
# with 0 length.
|
|
|
|
# https://github.com/jmathai/elodie/issues/354
|
|
|
|
return None
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
# Cast coordinate to a float due to a bug in exiftool's
|
|
|
|
# -json output format.
|
|
|
|
# https://github.com/jmathai/elodie/issues/171
|
|
|
|
# http://u88.n24.queensu.ca/exiftool/forum/index.php/topic,7952.0.html # noqa
|
|
|
|
this_coordinate = float(value)
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
direction_multiplier = 1.0
|
|
|
|
# when self.set_gps_ref != True
|
|
|
|
if key == 'latitude':
|
|
|
|
if 'EXIF:GPSLatitudeRef' in self.exif_metadata:
|
|
|
|
if self.exif_metadata['EXIF:GPSLatitudeRef'] == 'S':
|
|
|
|
direction_multiplier = -1.0
|
|
|
|
elif key == 'longitude':
|
|
|
|
if 'EXIF:GPSLongitudeRef' in self.exif_metadata:
|
|
|
|
if self.exif_metadata['EXIF:GPSLongitudeRef'] == 'W':
|
|
|
|
direction_multiplier = -1.0
|
|
|
|
return this_coordinate * direction_multiplier
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
return None
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def get_metadata(self):
|
|
|
|
"""Get a dictionary of metadata from exif.
|
|
|
|
All keys will be present and have a value of None if not obtained.
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
:returns: dict
|
|
|
|
"""
|
|
|
|
# Get metadata from exiftool.
|
|
|
|
self.exif_metadata = ExifToolCaching(self.source, logger=self.logger).asdict()
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
# TODO to be removed
|
|
|
|
self.metadata = {}
|
|
|
|
# Retrieve selected metadata to dict
|
|
|
|
if not self.exif_metadata:
|
|
|
|
return self.metadata
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
for key in self.tags_keys:
|
|
|
|
formated_data = None
|
|
|
|
for value in self._get_key_values(key):
|
|
|
|
if 'date' in key:
|
|
|
|
formated_data = self.get_date_format(value)
|
|
|
|
elif key in ('latitude', 'longitude'):
|
|
|
|
formated_data = self.get_coordinates(key, value)
|
|
|
|
else:
|
|
|
|
if value is not None and value != '':
|
|
|
|
formated_data = value
|
|
|
|
else:
|
|
|
|
formated_data = None
|
|
|
|
if formated_data:
|
|
|
|
# Use this data and break
|
|
|
|
break
|
|
|
|
|
|
|
|
self.metadata[key] = formated_data
|
|
|
|
|
|
|
|
self.metadata['base_name'] = os.path.basename(os.path.splitext(self.source)[0])
|
|
|
|
self.metadata['ext'] = os.path.splitext(self.source)[1][1:]
|
|
|
|
self.metadata['directory_path'] = os.path.dirname(self.source)
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
return self.metadata
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def has_exif_data(self):
|
|
|
|
"""Check if file has metadata, date original"""
|
|
|
|
if not self.metadata:
|
|
|
|
return False
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
if 'date_original' in self.metadata:
|
|
|
|
if self.metadata['date_original'] != None:
|
|
|
|
return True
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
return False
|
2021-06-20 19:51:21 +02:00
|
|
|
|
|
|
|
@classmethod
|
2021-08-08 13:02:15 +02:00
|
|
|
def get_class_by_file(cls, _file, classes, ignore_tags=set(), logger=logging.getLogger()):
|
2021-06-20 19:51:21 +02:00
|
|
|
"""Static method to get a media object by file.
|
|
|
|
"""
|
|
|
|
basestring = (bytes, str)
|
|
|
|
if not isinstance(_file, basestring) or not os.path.isfile(_file):
|
|
|
|
return None
|
|
|
|
|
|
|
|
extension = os.path.splitext(_file)[1][1:].lower()
|
|
|
|
|
|
|
|
if len(extension) > 0:
|
|
|
|
for i in classes:
|
|
|
|
if(extension in i.extensions):
|
2021-07-17 16:47:31 +02:00
|
|
|
return i(_file, ignore_tags=ignore_tags)
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-07-16 21:26:42 +02:00
|
|
|
exclude_list = ['.DS_Store', '.directory']
|
|
|
|
if os.path.basename(_file) == '.DS_Store':
|
|
|
|
return None
|
|
|
|
else:
|
2021-08-08 13:02:15 +02:00
|
|
|
return Media(_file, ignore_tags=ignore_tags, logger=logger)
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def set_date_taken(self, date_key, time):
|
2016-06-21 20:19:40 +02:00
|
|
|
"""Set the date/time a photo was taken.
|
|
|
|
|
|
|
|
:param datetime time: datetime object of when the photo was taken
|
|
|
|
:returns: bool
|
|
|
|
"""
|
|
|
|
if(time is None):
|
2015-10-21 08:51:14 +02:00
|
|
|
return False
|
|
|
|
|
2016-06-21 20:19:40 +02:00
|
|
|
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
|
2021-08-13 19:09:52 +02:00
|
|
|
status = self.set_value('date_original', formatted_time)
|
2021-06-20 08:35:28 +02:00
|
|
|
if status == False:
|
|
|
|
# exif attribute date_original d'ont exist
|
2021-08-13 19:09:52 +02:00
|
|
|
status = self.set_value('date_created', formatted_time)
|
2016-06-21 20:19:40 +02:00
|
|
|
|
|
|
|
return status
|
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def set_coordinates(self, latitude, longitude):
|
|
|
|
status = []
|
|
|
|
if self.metadata['latitude_ref']:
|
|
|
|
latitude = abs(latitude)
|
|
|
|
if latitude > 0:
|
|
|
|
status.append(self.set_value('latitude_ref', 'N'))
|
|
|
|
else:
|
|
|
|
status.append(self.set_value('latitude_ref', 'S'))
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
status.append(self.set_value('latitude', latitude))
|
2017-01-22 09:19:44 +01:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
if self.metadata['longitude_ref']:
|
|
|
|
longitude = abs(longitude)
|
|
|
|
if longitude > 0:
|
|
|
|
status.append(self.set_value('latitude_ref', 'E'))
|
|
|
|
else:
|
|
|
|
status.append(self.set_value('longitude_ref', 'W'))
|
2017-01-22 09:19:44 +01:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
status.append(self.set_value('longitude', longitude))
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
if all(status):
|
|
|
|
return True
|
|
|
|
else:
|
|
|
|
return False
|
2021-06-20 19:51:21 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
def set_album_from_folder(self, path):
|
|
|
|
"""Set the album attribute based on the leaf folder name
|
2016-06-21 20:19:40 +02:00
|
|
|
|
|
|
|
:returns: bool
|
|
|
|
"""
|
2021-08-13 19:09:52 +02:00
|
|
|
folder = os.path.basename(os.path.dirname(self.source))
|
2016-06-21 20:19:40 +02:00
|
|
|
|
2021-08-13 19:09:52 +02:00
|
|
|
return set_value(self, 'album', folder)
|
2021-06-20 19:51:21 +02:00
|
|
|
|
|
|
|
|
|
|
|
def get_all_subclasses(cls=None):
|
|
|
|
"""Module method to get all subclasses of Media.
|
|
|
|
"""
|
|
|
|
subclasses = set()
|
|
|
|
|
|
|
|
this_class = Media
|
|
|
|
if cls is not None:
|
|
|
|
this_class = cls
|
|
|
|
|
|
|
|
subclasses.add(this_class)
|
|
|
|
|
|
|
|
this_class_subclasses = this_class.__subclasses__()
|
|
|
|
for child_class in this_class_subclasses:
|
|
|
|
subclasses.update(get_all_subclasses(child_class))
|
|
|
|
|
|
|
|
return subclasses
|
|
|
|
|
2021-07-16 21:26:42 +02:00
|
|
|
|
2021-08-08 13:02:15 +02:00
|
|
|
def get_media_class(_file, ignore_tags=set(), logger=logging.getLogger()):
|
2021-07-16 21:26:42 +02:00
|
|
|
if not os.path.exists(_file):
|
2021-08-08 13:02:15 +02:00
|
|
|
logger.warning(f'Could not find {_file}')
|
|
|
|
logger.error(f'Could not find {_file}')
|
2021-07-16 21:26:42 +02:00
|
|
|
return False
|
|
|
|
|
2021-08-08 13:02:15 +02:00
|
|
|
media = Media.get_class_by_file(_file, get_all_subclasses(),
|
|
|
|
ignore_tags=set(), logger=logger)
|
2021-07-16 21:26:42 +02:00
|
|
|
if not media:
|
2021-08-08 13:02:15 +02:00
|
|
|
logger.warning(f'File{_file} is not supported')
|
|
|
|
logger.error(f'File {_file} can\'t be imported')
|
2021-07-16 21:26:42 +02:00
|
|
|
return False
|
|
|
|
|
|
|
|
return media
|
|
|
|
|