Refactoring media class (2), create ReadExif and WriteExif class

This commit is contained in:
Cédric Leporcq 2021-11-11 11:54:44 +01:00
parent a693e6018a
commit 8fd65fda34
4 changed files with 132 additions and 280 deletions

View File

@ -15,7 +15,7 @@ from pathlib import Path, PurePath
import inquirer import inquirer
from ordigi.database import Sqlite from ordigi.database import Sqlite
from ordigi.media import Media, Medias from ordigi.media import Medias
from ordigi.images import Image, Images from ordigi.images import Image, Images
from ordigi import request from ordigi import request
from ordigi.summary import Summary from ordigi.summary import Summary
@ -319,7 +319,7 @@ class Paths:
if extensions and '%media' in extensions: if extensions and '%media' in extensions:
extensions.remove('%media') extensions.remove('%media')
self.extensions = extensions.union(Media.extensions) self.extensions = extensions.union(Medias.extensions)
else: else:
self.extensions = extensions self.extensions = extensions
@ -362,8 +362,8 @@ class Paths:
def get_files(self, path): def get_files(self, path):
"""Recursively get files which match a path and extension. """Recursively get files which match a path and extension.
:param str path string: Path to start recursive file listing :param Path path: Path to start recursive file listing
:returns: Path file_path, Path subdirs :returns: Path generator File
""" """
for path0 in path.glob(self.glob): for path0 in path.glob(self.glob):
if path0.is_dir(): if path0.is_dir():

View File

@ -1,7 +1,3 @@
"""
Media :class:`Media` class to get file metadata
"""
import logging import logging
import mimetypes import mimetypes
import os import os
@ -16,20 +12,14 @@ from ordigi import utils
from ordigi import request from ordigi import request
class Metadata:
def __init__(self, ignore_tags=None): class ExifMetadata:
# Options def __init__(self, file_path, ignore_tags=None):
self.file_path = file_path
if ignore_tags is None: if ignore_tags is None:
ignore_tags = set() ignore_tags = set()
self.exif_metadata = []
self.metadata = {}
# self.datas = {}
self.ignore_tags = ignore_tags self.ignore_tags = ignore_tags
# Attributes
self.tags_keys = self.get_tags() self.tags_keys = self.get_tags()
def get_tags(self) -> dict: def get_tags(self) -> dict:
@ -77,33 +67,72 @@ class Metadata:
return tags_keys return tags_keys
def _del_ignored_tags(self):
for tag_regex in self.ignore_tags:
ignored_tags = set()
for tag in self.exif_metadata:
if re.search(tag_regex, tag) is not None:
ignored_tags.add(tag)
for ignored_tag in ignored_tags:
del self.exif_metadata[ignored_tag]
class WriteExif(Metadata): class ReadExif(ExifMetadata):
def __init__(
self,
file_path,
exif_metadata=None,
ignore_tags=None,
logger=logging.getLogger(),
):
super().__init__(file_path, ignore_tags)
# Options
self.logger = logger.getChild(self.__class__.__name__)
if exif_metadata:
self.exif_metadata = exif_metadata
else:
self.exif_metadata = self.get_exif_metadata()
def get_exif_metadata(self):
"""Get metadata from exiftool."""
return ExifToolCaching(self.file_path, logger=self.logger).asdict()
def _get_key_values(self, key):
"""
Get the first value of a tag set
:returns: str or None if no exif tag
"""
if self.exif_metadata is None:
return None
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
yield self.exif_metadata[tag]
def get_value(self, tag):
"""
Get given value from EXIF.
:returns: str or None
"""
if self.exif_metadata is None:
return None
if tag not in self.exif_metadata:
return None
return self.exif_metadata[tag]
class WriteExif(ExifMetadata):
def __init__( def __init__(
self, self,
file_path, file_path,
metadata, metadata,
exif_metadata=None,
ignore_tags=None, ignore_tags=None,
logger=logging.getLogger(), logger=logging.getLogger(),
): ):
super().__init__(ignore_tags)
self.file_path = file_path super().__init__(file_path, ignore_tags)
self.metadata = metadata self.metadata = metadata
if not exif_metadata:
exif_metadata = []
self.exif_metadata = exif_metadata
self.logger = logger.getChild(self.__class__.__name__) self.logger = logger.getChild(self.__class__.__name__)
def set_value(self, tag, value): def set_value(self, tag, value):
@ -116,13 +145,9 @@ class WriteExif(Metadata):
def set_key_values(self, key, value): def set_key_values(self, key, value):
"""Set tags values for given key""" """Set tags values for given key"""
status = True status = True
if self.exif_metadata is None:
return False
for tag in self.tags_keys[key]: for tag in self.tags_keys[key]:
if tag in self.exif_metadata: if not self.set_value(tag, value):
if not self.set_value(tag, value): status = False
status = False
return status return status
@ -137,7 +162,7 @@ class WriteExif(Metadata):
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S') formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
status = self.set_value('date_original', formatted_time) status = self.set_value('date_original', formatted_time)
if status == False: if not status:
# exif attribute date_original d'ont exist # exif attribute date_original d'ont exist
status = self.set_value('date_created', formatted_time) status = self.set_value('date_created', formatted_time)
@ -173,38 +198,16 @@ class WriteExif(Metadata):
:returns: bool :returns: bool
""" """
return self.set_value('album', self.file_path.parent.name) return self.set_value('Album', self.file_path.parent.name)
class ReadExif(Metadata): class Media(ReadExif):
def __init__(
self,
file_path,
src_dir,
album_from_folder=False,
ignore_tags=None,
interactive=False,
logger=logging.getLogger(),
use_date_filename=False,
use_file_dates=False,
):
super().__init__(ignore_tags)
class Media:
""" """
Extract matadatas from exiftool and sort them to dict structure Extract matadatas from exiftool and sort them to dict structure
""" """
d_coordinates = {'latitude': 'latitude_ref', 'longitude': 'longitude_ref'} d_coordinates = {'latitude': 'latitude_ref', 'longitude': 'longitude_ref'}
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
extensions = PHOTO + AUDIO + VIDEO
def __init__( def __init__(
self, self,
file_path, file_path,
@ -216,84 +219,28 @@ class Media:
use_date_filename=False, use_date_filename=False,
use_file_dates=False, use_file_dates=False,
): ):
""" super().__init__(
:params: Path, Path, bool, set, bool, Logger file_path,
""" ignore_tags=ignore_tags,
self.file_path = file_path logger=logger,
)
self.src_dir = src_dir self.src_dir = src_dir
self.album_from_folder = album_from_folder self.album_from_folder = album_from_folder
self.exif_metadata = None
if ignore_tags is None:
ignore_tags = set()
self.ignore_tags = ignore_tags
self.interactive = interactive self.interactive = interactive
self.logger = logger.getChild(self.__class__.__name__) self.logger = logger.getChild(self.__class__.__name__)
self.metadata = None
self.tags_keys = self.get_tags()
self.use_date_filename = use_date_filename self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates self.use_file_dates = use_file_dates
self.theme = request.load_theme() self.theme = request.load_theme()
def get_tags(self) -> dict: # get self.metadata
"""Get exif tags groups in dict""" self.get_metadata(self.file_path)
tags_keys = {}
tags_keys['date_original'] = [
'EXIF:DateTimeOriginal',
'H264:DateTimeOriginal',
'QuickTime:ContentCreateDate',
]
tags_keys['date_created'] = [
'EXIF:CreateDate',
'QuickTime:CreationDate',
'QuickTime:CreateDate',
'QuickTime:CreationDate-und-US',
'QuickTime:MediaCreateDate',
]
tags_keys['date_modified'] = ['File:FileModifyDate', 'QuickTime:ModifyDate']
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
tags_keys['title'] = ['XMP:Title', 'XMP:DisplayName']
tags_keys['latitude'] = [
'EXIF:GPSLatitude',
'XMP:GPSLatitude',
# 'QuickTime:GPSLatitude',
'Composite:GPSLatitude',
]
tags_keys['longitude'] = [
'EXIF:GPSLongitude',
'XMP:GPSLongitude',
# 'QuickTime:GPSLongitude',
'Composite:GPSLongitude',
]
tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef']
tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef']
tags_keys['original_name'] = ['XMP:OriginalFileName']
# Remove ignored tag from list
for tag_regex in self.ignore_tags:
for key, tags in tags_keys.items():
for i, tag in enumerate(tags):
if re.match(tag_regex, tag):
del tags_keys[key][i]
return tags_keys
def _del_ignored_tags(self, exif_metadata):
for tag_regex in self.ignore_tags:
ignored_tags = set()
for tag in exif_metadata:
if re.search(tag_regex, tag) is not None:
ignored_tags.add(tag)
for ignored_tag in ignored_tags:
del exif_metadata[ignored_tag]
def get_mimetype(self): def get_mimetype(self):
"""Get the mimetype of the file. """
Get the mimetype of the file.
:returns: str or None :returns: str or None
""" """
mimetype = mimetypes.guess_type(self.file_path) mimetype = mimetypes.guess_type(self.file_path)
@ -302,50 +249,6 @@ class Media:
return mimetype[0] return mimetype[0]
def _get_key_values(self, key):
"""
Get the first value of a tag set
:returns: str or None if no exif tag
"""
if self.exif_metadata is None:
return None
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
yield self.exif_metadata[tag]
def get_value(self, tag):
"""
Get given value from EXIF.
:returns: str or None
"""
if self.exif_metadata is None:
return None
if tag not in self.exif_metadata:
return None
return self.exif_metadata[tag]
def get_date_format(self, value):
"""Formatting date attribute.
:returns: datetime object or None
"""
# We need to parse a string to datetime format.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
# in %Y:%m:%d %H:%M:%S format
if value is None:
return None
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if re.match(regex, value) is not None: # noqa
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value)
return parser.parse(value)
except BaseException or parser._parser.ParserError as e:
self.logger.warning(e.args, value)
return None
def get_coordinates(self, key, value): def get_coordinates(self, key, value):
"""Get latitude or longitude value """Get latitude or longitude value
@ -382,6 +285,27 @@ class Media:
return None return None
def get_date_format(self, value):
"""
Formatting date attribute.
:returns: datetime object or None
"""
# We need to parse a string to datetime format.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
# in %Y:%m:%d %H:%M:%S format
if value is None:
return None
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if re.match(regex, value) is not None: # noqa
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value)
return parser.parse(value)
except BaseException or parser._parser.ParserError as e:
self.logger.warning(e.args, value)
return None
def _get_date_media_interactive(self, choices, default): def _get_date_media_interactive(self, choices, default):
print(f"Date conflict for file: {self.file_path}") print(f"Date conflict for file: {self.file_path}")
choices_list = [ choices_list = [
@ -483,12 +407,6 @@ class Media:
default = date_filename default = date_filename
return self._get_date_media_interactive(choices, default) return self._get_date_media_interactive(choices, default)
def get_exif_metadata(self):
# Get metadata from exiftool.
self.exif_metadata = ExifToolCaching(
self.file_path, logger=self.logger
).asdict()
def _set_album(self, album, folder): def _set_album(self, album, folder):
print(f"Metadata conflict for file: {self.file_path}") print(f"Metadata conflict for file: {self.file_path}")
choices_list = [ choices_list = [
@ -511,23 +429,15 @@ class Media:
if not answers['album']: if not answers['album']:
answers = inquirer.prompt(prompt, theme=self.theme) answers = inquirer.prompt(prompt, theme=self.theme)
return answers['custom'] return answers['custom']
else:
return answers['album'] return answers['album']
def _set_metadata_from_exif(self): def _set_metadata_from_exif(self):
"""
self.metadata['src_dir'] = str(self.src_dir) Get selected metadata from exif to dict structure
self.metadata['subdirs'] = str( """
self.file_path.relative_to(self.src_dir).parent
)
self.metadata['filename'] = self.file_path.name
# Get metadata from exif
self.get_exif_metadata()
# Retrieve selected metadata to dict
if not self.exif_metadata: if not self.exif_metadata:
return self.metadata return
for key in self.tags_keys: for key in self.tags_keys:
formated_data = None formated_data = None
@ -655,7 +565,7 @@ class Media:
if not album or album == '': if not album or album == '':
self.metadata['album'] = folder self.metadata['album'] = folder
def get_metadata(self, root, loc=None, db=None, cache=False) -> dict: def get_metadata(self, root, loc=None, db=None, cache=False):
""" """
Get a dictionary of metadata from exif. Get a dictionary of metadata from exif.
All keys will be present and have a value of None if not obtained. All keys will be present and have a value of None if not obtained.
@ -670,6 +580,12 @@ class Media:
if db_checksum: if db_checksum:
location_id = self._set_metadata_from_db(db, relpath) location_id = self._set_metadata_from_db(db, relpath)
else: else:
self.metadata['src_dir'] = str(self.src_dir)
self.metadata['subdirs'] = str(
self.file_path.relative_to(self.src_dir).parent
)
self.metadata['filename'] = self.file_path.name
self._set_metadata_from_exif() self._set_metadata_from_exif()
self.metadata['date_media'] = self.get_date_media() self.metadata['date_media'] = self.get_date_media()
@ -679,8 +595,6 @@ class Media:
if self.album_from_folder: if self.album_from_folder:
self._set_album_from_folder() self._set_album_from_folder()
return self.metadata
def has_exif_data(self): def has_exif_data(self):
"""Check if file has metadata, date original""" """Check if file has metadata, date original"""
if not self.metadata: if not self.metadata:
@ -692,78 +606,17 @@ class Media:
return False return False
def set_value(self, tag, value):
"""Set value of a tag.
:returns: value (str)
"""
return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
def set_key_values(self, key, value):
"""Set tags values for given key"""
status = True
if self.exif_metadata is None:
return False
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
if not self.set_value(tag, value):
status = False
return status
def set_date_media(self, time):
"""Set the date/time a photo was taken.
:param datetime time: datetime object of when the photo was taken
:returns: bool
"""
if time is None:
return False
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
status = self.set_value('date_original', formatted_time)
if status == False:
# exif attribute date_original d'ont exist
status = self.set_value('date_created', formatted_time)
return status
def set_coordinates(self, latitude, longitude):
status = []
if self.metadata['latitude_ref']:
latitude = abs(latitude)
if latitude > 0:
status.append(self.set_value('latitude_ref', 'N'))
else:
status.append(self.set_value('latitude_ref', 'S'))
status.append(self.set_value('latitude', latitude))
if self.metadata['longitude_ref']:
longitude = abs(longitude)
if longitude > 0:
status.append(self.set_value('latitude_ref', 'E'))
else:
status.append(self.set_value('longitude_ref', 'W'))
status.append(self.set_value('longitude', longitude))
if all(status):
return True
else:
return False
def set_album_from_folder(self):
"""Set the album attribute based on the leaf folder name
:returns: bool
"""
return self.set_value('album', self.file_path.parent.name)
class Medias: class Medias:
"""Get media data in collection or source path""" """
Extract matadatas from exiftool in paths and sort them to dict structure
"""
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
extensions = PHOTO + AUDIO + VIDEO
def __init__( def __init__(
self, self,
@ -801,7 +654,7 @@ class Medias:
self.theme = request.load_theme() self.theme = request.load_theme()
def get_media(self, file_path, src_dir, loc=None): def get_media(self, file_path, src_dir, loc=None):
media = Media( return Media(
file_path, file_path,
src_dir, src_dir,
self.album_from_folder, self.album_from_folder,
@ -811,9 +664,6 @@ class Medias:
self.use_date_filename, self.use_date_filename,
self.use_file_dates, self.use_file_dates,
) )
media.get_metadata(self.root, loc, self.db.sqlite, self.cache)
return media
def get_medias(self, src_dirs, imp=False, loc=None): def get_medias(self, src_dirs, imp=False, loc=None):
"""Get medias data""" """Get medias data"""
@ -837,7 +687,12 @@ class Medias:
def update_exif_data(self, metadata): def update_exif_data(self, metadata):
file_path = self.root / metadata['file_path'] file_path = self.root / metadata['file_path']
exif = WriteExif(file_path, metadata, self.ignore_tags) exif = WriteExif(
file_path,
metadata,
ignore_tags=self.ignore_tags,
logger=self.logger
)
updated = False updated = False
if self.album_from_folder: if self.album_from_folder:

View File

@ -65,7 +65,7 @@ class TestFPath:
exif_data = ExifToolCaching(str(file_path)).asdict() exif_data = ExifToolCaching(str(file_path)).asdict()
loc = GeoLocation() loc = GeoLocation()
metadata = media.get_metadata(self.src_path, loc) metadata = media.metadata
for item, regex in items.items(): for item, regex in items.items():
for mask in masks: for mask in masks:
matched = re.search(regex, mask) matched = re.search(regex, mask)
@ -153,10 +153,10 @@ class TestCollection:
assert not summary.errors assert not summary.errors
# check if album value are set # check if album value are set
for file_path in tmp_path.glob('**/*'): paths = Paths(glob='**/*').get_files(tmp_path)
for file_path in paths:
if '.db' not in str(file_path): if '.db' not in str(file_path):
media = Media(file_path, tmp_path, album_from_folder=True) media = Media(file_path, tmp_path, album_from_folder=True)
media.get_exif_metadata()
for value in media._get_key_values('album'): for value in media._get_key_values('album'):
assert value != '' or None assert value != '' or None
@ -206,7 +206,6 @@ class TestCollection:
# copy mode # copy mode
src_path = Path(self.src_path, 'test_exif', 'photo.png') src_path = Path(self.src_path, 'test_exif', 'photo.png')
media = Media(src_path, self.src_path) media = Media(src_path, self.src_path)
media.get_metadata(tmp_path)
name = 'photo_' + str(imp) + '.png' name = 'photo_' + str(imp) + '.png'
media.metadata['file_path'] = name media.metadata['file_path'] = name
dest_path = Path(tmp_path, name) dest_path = Path(tmp_path, name)

View File

@ -14,7 +14,7 @@ from ordigi.utils import get_date_from_string
ORDIGI_PATH = Path(__file__).parent.parent ORDIGI_PATH = Path(__file__).parent.parent
CACHING = True CACHING = True
class TestMetadata: class TestMedia:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_class(cls, sample_files_paths): def setup_class(cls, sample_files_paths):
@ -31,8 +31,7 @@ class TestMetadata:
for file_path, media in self.get_media(): for file_path, media in self.get_media():
# test get metadata from cache or exif # test get metadata from cache or exif
for root in self.src_path, tmp_path: for root in self.src_path, tmp_path:
result = media.get_metadata(root) media.get_metadata(root)
assert result
assert isinstance(media.metadata, dict), media.metadata assert isinstance(media.metadata, dict), media.metadata
#check if all tags key are present #check if all tags key are present
for tags_key, tags in media.tags_keys.items(): for tags_key, tags in media.tags_keys.items():
@ -77,7 +76,6 @@ class TestMetadata:
exif_data = ExifToolCaching(str(file_path)).asdict() exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(file_path, self.src_path, use_date_filename=True, media = Media(file_path, self.src_path, use_date_filename=True,
use_file_dates=True) use_file_dates=True)
metadata = media.get_metadata(self.src_path)
date_media = media.get_date_media() date_media = media.get_date_media()
date_filename = None date_filename = None