From 8fd65fda345dba636827819a815ff05c4adbef1f Mon Sep 17 00:00:00 2001 From: Cedric Leporcq Date: Thu, 11 Nov 2021 11:54:44 +0100 Subject: [PATCH] Refactoring media class (2), create ReadExif and WriteExif class --- ordigi/collection.py | 8 +- ordigi/media.py | 391 ++++++++++++--------------------------- tests/test_collection.py | 7 +- tests/test_media.py | 6 +- 4 files changed, 132 insertions(+), 280 deletions(-) diff --git a/ordigi/collection.py b/ordigi/collection.py index 0f056a7..18aae67 100644 --- a/ordigi/collection.py +++ b/ordigi/collection.py @@ -15,7 +15,7 @@ from pathlib import Path, PurePath import inquirer from ordigi.database import Sqlite -from ordigi.media import Media, Medias +from ordigi.media import Medias from ordigi.images import Image, Images from ordigi import request from ordigi.summary import Summary @@ -319,7 +319,7 @@ class Paths: if extensions and '%media' in extensions: extensions.remove('%media') - self.extensions = extensions.union(Media.extensions) + self.extensions = extensions.union(Medias.extensions) else: self.extensions = extensions @@ -362,8 +362,8 @@ class Paths: def get_files(self, path): """Recursively get files which match a path and extension. - :param str path string: Path to start recursive file listing - :returns: Path file_path, Path subdirs + :param Path path: Path to start recursive file listing + :returns: Path generator File """ for path0 in path.glob(self.glob): if path0.is_dir(): diff --git a/ordigi/media.py b/ordigi/media.py index 409d59a..149f77a 100644 --- a/ordigi/media.py +++ b/ordigi/media.py @@ -1,7 +1,3 @@ -""" -Media :class:`Media` class to get file metadata -""" - import logging import mimetypes import os @@ -16,20 +12,14 @@ from ordigi import utils from ordigi import request -class Metadata: - def __init__(self, ignore_tags=None): +class ExifMetadata: - # Options + def __init__(self, file_path, ignore_tags=None): + self.file_path = file_path if ignore_tags is None: ignore_tags = set() - - self.exif_metadata = [] - self.metadata = {} - # self.datas = {} self.ignore_tags = ignore_tags - - # Attributes self.tags_keys = self.get_tags() def get_tags(self) -> dict: @@ -77,33 +67,72 @@ class Metadata: return tags_keys - def _del_ignored_tags(self): - for tag_regex in self.ignore_tags: - ignored_tags = set() - for tag in self.exif_metadata: - if re.search(tag_regex, tag) is not None: - ignored_tags.add(tag) - for ignored_tag in ignored_tags: - del self.exif_metadata[ignored_tag] -class WriteExif(Metadata): +class ReadExif(ExifMetadata): + + def __init__( + self, + file_path, + exif_metadata=None, + ignore_tags=None, + logger=logging.getLogger(), + ): + + super().__init__(file_path, ignore_tags) + + # Options + self.logger = logger.getChild(self.__class__.__name__) + + if exif_metadata: + self.exif_metadata = exif_metadata + else: + self.exif_metadata = self.get_exif_metadata() + + + def get_exif_metadata(self): + """Get metadata from exiftool.""" + + return ExifToolCaching(self.file_path, logger=self.logger).asdict() + + def _get_key_values(self, key): + """ + Get the first value of a tag set + :returns: str or None if no exif tag + """ + if self.exif_metadata is None: + return None + + for tag in self.tags_keys[key]: + if tag in self.exif_metadata: + yield self.exif_metadata[tag] + + def get_value(self, tag): + """ + Get given value from EXIF. + :returns: str or None + """ + if self.exif_metadata is None: + return None + if tag not in self.exif_metadata: + return None + + return self.exif_metadata[tag] + + +class WriteExif(ExifMetadata): def __init__( self, file_path, metadata, - exif_metadata=None, ignore_tags=None, logger=logging.getLogger(), ): - super().__init__(ignore_tags) - self.file_path = file_path + + super().__init__(file_path, ignore_tags) + self.metadata = metadata - if not exif_metadata: - exif_metadata = [] - - self.exif_metadata = exif_metadata self.logger = logger.getChild(self.__class__.__name__) def set_value(self, tag, value): @@ -116,13 +145,9 @@ class WriteExif(Metadata): def set_key_values(self, key, value): """Set tags values for given key""" status = True - if self.exif_metadata is None: - return False - for tag in self.tags_keys[key]: - if tag in self.exif_metadata: - if not self.set_value(tag, value): - status = False + if not self.set_value(tag, value): + status = False return status @@ -137,7 +162,7 @@ class WriteExif(Metadata): formatted_time = time.strftime('%Y:%m:%d %H:%M:%S') status = self.set_value('date_original', formatted_time) - if status == False: + if not status: # exif attribute date_original d'ont exist status = self.set_value('date_created', formatted_time) @@ -173,38 +198,16 @@ class WriteExif(Metadata): :returns: bool """ - return self.set_value('album', self.file_path.parent.name) + return self.set_value('Album', self.file_path.parent.name) -class ReadExif(Metadata): - def __init__( - self, - file_path, - src_dir, - album_from_folder=False, - ignore_tags=None, - interactive=False, - logger=logging.getLogger(), - use_date_filename=False, - use_file_dates=False, - ): - super().__init__(ignore_tags) - - - -class Media: +class Media(ReadExif): """ Extract matadatas from exiftool and sort them to dict structure """ d_coordinates = {'latitude': 'latitude_ref', 'longitude': 'longitude_ref'} - PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2') - AUDIO = ('m4a',) - VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts') - - extensions = PHOTO + AUDIO + VIDEO - def __init__( self, file_path, @@ -216,84 +219,28 @@ class Media: use_date_filename=False, use_file_dates=False, ): - """ - :params: Path, Path, bool, set, bool, Logger - """ - self.file_path = file_path + super().__init__( + file_path, + ignore_tags=ignore_tags, + logger=logger, + ) + self.src_dir = src_dir self.album_from_folder = album_from_folder - self.exif_metadata = None - - if ignore_tags is None: - ignore_tags = set() - self.ignore_tags = ignore_tags self.interactive = interactive self.logger = logger.getChild(self.__class__.__name__) - self.metadata = None - self.tags_keys = self.get_tags() self.use_date_filename = use_date_filename self.use_file_dates = use_file_dates self.theme = request.load_theme() - def get_tags(self) -> dict: - """Get exif tags groups in dict""" - tags_keys = {} - tags_keys['date_original'] = [ - 'EXIF:DateTimeOriginal', - 'H264:DateTimeOriginal', - 'QuickTime:ContentCreateDate', - ] - tags_keys['date_created'] = [ - 'EXIF:CreateDate', - 'QuickTime:CreationDate', - 'QuickTime:CreateDate', - 'QuickTime:CreationDate-und-US', - 'QuickTime:MediaCreateDate', - ] - tags_keys['date_modified'] = ['File:FileModifyDate', 'QuickTime:ModifyDate'] - tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make'] - tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model'] - tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album'] - tags_keys['title'] = ['XMP:Title', 'XMP:DisplayName'] - tags_keys['latitude'] = [ - 'EXIF:GPSLatitude', - 'XMP:GPSLatitude', - # 'QuickTime:GPSLatitude', - 'Composite:GPSLatitude', - ] - tags_keys['longitude'] = [ - 'EXIF:GPSLongitude', - 'XMP:GPSLongitude', - # 'QuickTime:GPSLongitude', - 'Composite:GPSLongitude', - ] - tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef'] - tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef'] - tags_keys['original_name'] = ['XMP:OriginalFileName'] - - # Remove ignored tag from list - for tag_regex in self.ignore_tags: - for key, tags in tags_keys.items(): - for i, tag in enumerate(tags): - if re.match(tag_regex, tag): - del tags_keys[key][i] - - return tags_keys - - def _del_ignored_tags(self, exif_metadata): - for tag_regex in self.ignore_tags: - ignored_tags = set() - for tag in exif_metadata: - if re.search(tag_regex, tag) is not None: - ignored_tags.add(tag) - for ignored_tag in ignored_tags: - del exif_metadata[ignored_tag] + # get self.metadata + self.get_metadata(self.file_path) def get_mimetype(self): - """Get the mimetype of the file. - + """ + Get the mimetype of the file. :returns: str or None """ mimetype = mimetypes.guess_type(self.file_path) @@ -302,50 +249,6 @@ class Media: return mimetype[0] - def _get_key_values(self, key): - """ - Get the first value of a tag set - :returns: str or None if no exif tag - """ - if self.exif_metadata is None: - return None - - for tag in self.tags_keys[key]: - if tag in self.exif_metadata: - yield self.exif_metadata[tag] - - def get_value(self, tag): - """ - Get given value from EXIF. - :returns: str or None - """ - if self.exif_metadata is None: - return None - if tag not in self.exif_metadata: - return None - - return self.exif_metadata[tag] - - def get_date_format(self, value): - """Formatting date attribute. - :returns: datetime object or None - """ - # We need to parse a string to datetime format. - # EXIF DateTimeOriginal and EXIF DateTime are both stored - # in %Y:%m:%d %H:%M:%S format - if value is None: - return None - - try: - # correct nasty formated date - regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})') - if re.match(regex, value) is not None: # noqa - value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value) - return parser.parse(value) - except BaseException or parser._parser.ParserError as e: - self.logger.warning(e.args, value) - return None - def get_coordinates(self, key, value): """Get latitude or longitude value @@ -382,6 +285,27 @@ class Media: return None + def get_date_format(self, value): + """ + Formatting date attribute. + :returns: datetime object or None + """ + # We need to parse a string to datetime format. + # EXIF DateTimeOriginal and EXIF DateTime are both stored + # in %Y:%m:%d %H:%M:%S format + if value is None: + return None + + try: + # correct nasty formated date + regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})') + if re.match(regex, value) is not None: # noqa + value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value) + return parser.parse(value) + except BaseException or parser._parser.ParserError as e: + self.logger.warning(e.args, value) + return None + def _get_date_media_interactive(self, choices, default): print(f"Date conflict for file: {self.file_path}") choices_list = [ @@ -483,12 +407,6 @@ class Media: default = date_filename return self._get_date_media_interactive(choices, default) - def get_exif_metadata(self): - # Get metadata from exiftool. - self.exif_metadata = ExifToolCaching( - self.file_path, logger=self.logger - ).asdict() - def _set_album(self, album, folder): print(f"Metadata conflict for file: {self.file_path}") choices_list = [ @@ -511,23 +429,15 @@ class Media: if not answers['album']: answers = inquirer.prompt(prompt, theme=self.theme) return answers['custom'] - else: - return answers['album'] + + return answers['album'] def _set_metadata_from_exif(self): - - self.metadata['src_dir'] = str(self.src_dir) - self.metadata['subdirs'] = str( - self.file_path.relative_to(self.src_dir).parent - ) - self.metadata['filename'] = self.file_path.name - # Get metadata from exif - - self.get_exif_metadata() - - # Retrieve selected metadata to dict + """ + Get selected metadata from exif to dict structure + """ if not self.exif_metadata: - return self.metadata + return for key in self.tags_keys: formated_data = None @@ -655,7 +565,7 @@ class Media: if not album or album == '': self.metadata['album'] = folder - def get_metadata(self, root, loc=None, db=None, cache=False) -> dict: + def get_metadata(self, root, loc=None, db=None, cache=False): """ Get a dictionary of metadata from exif. All keys will be present and have a value of None if not obtained. @@ -670,6 +580,12 @@ class Media: if db_checksum: location_id = self._set_metadata_from_db(db, relpath) else: + self.metadata['src_dir'] = str(self.src_dir) + self.metadata['subdirs'] = str( + self.file_path.relative_to(self.src_dir).parent + ) + self.metadata['filename'] = self.file_path.name + self._set_metadata_from_exif() self.metadata['date_media'] = self.get_date_media() @@ -679,8 +595,6 @@ class Media: if self.album_from_folder: self._set_album_from_folder() - return self.metadata - def has_exif_data(self): """Check if file has metadata, date original""" if not self.metadata: @@ -692,78 +606,17 @@ class Media: return False - def set_value(self, tag, value): - """Set value of a tag. - - :returns: value (str) - """ - return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value) - - def set_key_values(self, key, value): - """Set tags values for given key""" - status = True - if self.exif_metadata is None: - return False - - for tag in self.tags_keys[key]: - if tag in self.exif_metadata: - if not self.set_value(tag, value): - status = False - - return status - - def set_date_media(self, time): - """Set the date/time a photo was taken. - - :param datetime time: datetime object of when the photo was taken - :returns: bool - """ - if time is None: - return False - - formatted_time = time.strftime('%Y:%m:%d %H:%M:%S') - status = self.set_value('date_original', formatted_time) - if status == False: - # exif attribute date_original d'ont exist - status = self.set_value('date_created', formatted_time) - - return status - - def set_coordinates(self, latitude, longitude): - status = [] - if self.metadata['latitude_ref']: - latitude = abs(latitude) - if latitude > 0: - status.append(self.set_value('latitude_ref', 'N')) - else: - status.append(self.set_value('latitude_ref', 'S')) - - status.append(self.set_value('latitude', latitude)) - - if self.metadata['longitude_ref']: - longitude = abs(longitude) - if longitude > 0: - status.append(self.set_value('latitude_ref', 'E')) - else: - status.append(self.set_value('longitude_ref', 'W')) - - status.append(self.set_value('longitude', longitude)) - - if all(status): - return True - else: - return False - - def set_album_from_folder(self): - """Set the album attribute based on the leaf folder name - - :returns: bool - """ - return self.set_value('album', self.file_path.parent.name) - class Medias: - """Get media data in collection or source path""" + """ + Extract matadatas from exiftool in paths and sort them to dict structure + """ + + PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2') + AUDIO = ('m4a',) + VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts') + + extensions = PHOTO + AUDIO + VIDEO def __init__( self, @@ -801,7 +654,7 @@ class Medias: self.theme = request.load_theme() def get_media(self, file_path, src_dir, loc=None): - media = Media( + return Media( file_path, src_dir, self.album_from_folder, @@ -811,9 +664,6 @@ class Medias: self.use_date_filename, self.use_file_dates, ) - media.get_metadata(self.root, loc, self.db.sqlite, self.cache) - - return media def get_medias(self, src_dirs, imp=False, loc=None): """Get medias data""" @@ -837,7 +687,12 @@ class Medias: def update_exif_data(self, metadata): file_path = self.root / metadata['file_path'] - exif = WriteExif(file_path, metadata, self.ignore_tags) + exif = WriteExif( + file_path, + metadata, + ignore_tags=self.ignore_tags, + logger=self.logger + ) updated = False if self.album_from_folder: diff --git a/tests/test_collection.py b/tests/test_collection.py index e9f7b0e..1434bdf 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -65,7 +65,7 @@ class TestFPath: exif_data = ExifToolCaching(str(file_path)).asdict() loc = GeoLocation() - metadata = media.get_metadata(self.src_path, loc) + metadata = media.metadata for item, regex in items.items(): for mask in masks: matched = re.search(regex, mask) @@ -153,10 +153,10 @@ class TestCollection: assert not summary.errors # check if album value are set - for file_path in tmp_path.glob('**/*'): + paths = Paths(glob='**/*').get_files(tmp_path) + for file_path in paths: if '.db' not in str(file_path): media = Media(file_path, tmp_path, album_from_folder=True) - media.get_exif_metadata() for value in media._get_key_values('album'): assert value != '' or None @@ -206,7 +206,6 @@ class TestCollection: # copy mode src_path = Path(self.src_path, 'test_exif', 'photo.png') media = Media(src_path, self.src_path) - media.get_metadata(tmp_path) name = 'photo_' + str(imp) + '.png' media.metadata['file_path'] = name dest_path = Path(tmp_path, name) diff --git a/tests/test_media.py b/tests/test_media.py index 30f6269..f4f3b98 100644 --- a/tests/test_media.py +++ b/tests/test_media.py @@ -14,7 +14,7 @@ from ordigi.utils import get_date_from_string ORDIGI_PATH = Path(__file__).parent.parent CACHING = True -class TestMetadata: +class TestMedia: @pytest.fixture(autouse=True) def setup_class(cls, sample_files_paths): @@ -31,8 +31,7 @@ class TestMetadata: for file_path, media in self.get_media(): # test get metadata from cache or exif for root in self.src_path, tmp_path: - result = media.get_metadata(root) - assert result + media.get_metadata(root) assert isinstance(media.metadata, dict), media.metadata #check if all tags key are present for tags_key, tags in media.tags_keys.items(): @@ -77,7 +76,6 @@ class TestMetadata: exif_data = ExifToolCaching(str(file_path)).asdict() media = Media(file_path, self.src_path, use_date_filename=True, use_file_dates=True) - metadata = media.get_metadata(self.src_path) date_media = media.get_date_media() date_filename = None