Refactoring media class (2), create ReadExif and WriteExif class

This commit is contained in:
Cédric Leporcq 2021-11-11 11:54:44 +01:00
parent a693e6018a
commit 8fd65fda34
4 changed files with 132 additions and 280 deletions

View File

@ -15,7 +15,7 @@ from pathlib import Path, PurePath
import inquirer
from ordigi.database import Sqlite
from ordigi.media import Media, Medias
from ordigi.media import Medias
from ordigi.images import Image, Images
from ordigi import request
from ordigi.summary import Summary
@ -319,7 +319,7 @@ class Paths:
if extensions and '%media' in extensions:
extensions.remove('%media')
self.extensions = extensions.union(Media.extensions)
self.extensions = extensions.union(Medias.extensions)
else:
self.extensions = extensions
@ -362,8 +362,8 @@ class Paths:
def get_files(self, path):
"""Recursively get files which match a path and extension.
:param str path string: Path to start recursive file listing
:returns: Path file_path, Path subdirs
:param Path path: Path to start recursive file listing
:returns: Path generator File
"""
for path0 in path.glob(self.glob):
if path0.is_dir():

View File

@ -1,7 +1,3 @@
"""
Media :class:`Media` class to get file metadata
"""
import logging
import mimetypes
import os
@ -16,20 +12,14 @@ from ordigi import utils
from ordigi import request
class Metadata:
def __init__(self, ignore_tags=None):
class ExifMetadata:
# Options
def __init__(self, file_path, ignore_tags=None):
self.file_path = file_path
if ignore_tags is None:
ignore_tags = set()
self.exif_metadata = []
self.metadata = {}
# self.datas = {}
self.ignore_tags = ignore_tags
# Attributes
self.tags_keys = self.get_tags()
def get_tags(self) -> dict:
@ -77,33 +67,72 @@ class Metadata:
return tags_keys
def _del_ignored_tags(self):
for tag_regex in self.ignore_tags:
ignored_tags = set()
for tag in self.exif_metadata:
if re.search(tag_regex, tag) is not None:
ignored_tags.add(tag)
for ignored_tag in ignored_tags:
del self.exif_metadata[ignored_tag]
class WriteExif(Metadata):
class ReadExif(ExifMetadata):
def __init__(
self,
file_path,
exif_metadata=None,
ignore_tags=None,
logger=logging.getLogger(),
):
super().__init__(file_path, ignore_tags)
# Options
self.logger = logger.getChild(self.__class__.__name__)
if exif_metadata:
self.exif_metadata = exif_metadata
else:
self.exif_metadata = self.get_exif_metadata()
def get_exif_metadata(self):
"""Get metadata from exiftool."""
return ExifToolCaching(self.file_path, logger=self.logger).asdict()
def _get_key_values(self, key):
"""
Get the first value of a tag set
:returns: str or None if no exif tag
"""
if self.exif_metadata is None:
return None
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
yield self.exif_metadata[tag]
def get_value(self, tag):
"""
Get given value from EXIF.
:returns: str or None
"""
if self.exif_metadata is None:
return None
if tag not in self.exif_metadata:
return None
return self.exif_metadata[tag]
class WriteExif(ExifMetadata):
def __init__(
self,
file_path,
metadata,
exif_metadata=None,
ignore_tags=None,
logger=logging.getLogger(),
):
super().__init__(ignore_tags)
self.file_path = file_path
super().__init__(file_path, ignore_tags)
self.metadata = metadata
if not exif_metadata:
exif_metadata = []
self.exif_metadata = exif_metadata
self.logger = logger.getChild(self.__class__.__name__)
def set_value(self, tag, value):
@ -116,13 +145,9 @@ class WriteExif(Metadata):
def set_key_values(self, key, value):
"""Set tags values for given key"""
status = True
if self.exif_metadata is None:
return False
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
if not self.set_value(tag, value):
status = False
if not self.set_value(tag, value):
status = False
return status
@ -137,7 +162,7 @@ class WriteExif(Metadata):
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
status = self.set_value('date_original', formatted_time)
if status == False:
if not status:
# exif attribute date_original d'ont exist
status = self.set_value('date_created', formatted_time)
@ -173,38 +198,16 @@ class WriteExif(Metadata):
:returns: bool
"""
return self.set_value('album', self.file_path.parent.name)
return self.set_value('Album', self.file_path.parent.name)
class ReadExif(Metadata):
def __init__(
self,
file_path,
src_dir,
album_from_folder=False,
ignore_tags=None,
interactive=False,
logger=logging.getLogger(),
use_date_filename=False,
use_file_dates=False,
):
super().__init__(ignore_tags)
class Media:
class Media(ReadExif):
"""
Extract matadatas from exiftool and sort them to dict structure
"""
d_coordinates = {'latitude': 'latitude_ref', 'longitude': 'longitude_ref'}
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
extensions = PHOTO + AUDIO + VIDEO
def __init__(
self,
file_path,
@ -216,84 +219,28 @@ class Media:
use_date_filename=False,
use_file_dates=False,
):
"""
:params: Path, Path, bool, set, bool, Logger
"""
self.file_path = file_path
super().__init__(
file_path,
ignore_tags=ignore_tags,
logger=logger,
)
self.src_dir = src_dir
self.album_from_folder = album_from_folder
self.exif_metadata = None
if ignore_tags is None:
ignore_tags = set()
self.ignore_tags = ignore_tags
self.interactive = interactive
self.logger = logger.getChild(self.__class__.__name__)
self.metadata = None
self.tags_keys = self.get_tags()
self.use_date_filename = use_date_filename
self.use_file_dates = use_file_dates
self.theme = request.load_theme()
def get_tags(self) -> dict:
"""Get exif tags groups in dict"""
tags_keys = {}
tags_keys['date_original'] = [
'EXIF:DateTimeOriginal',
'H264:DateTimeOriginal',
'QuickTime:ContentCreateDate',
]
tags_keys['date_created'] = [
'EXIF:CreateDate',
'QuickTime:CreationDate',
'QuickTime:CreateDate',
'QuickTime:CreationDate-und-US',
'QuickTime:MediaCreateDate',
]
tags_keys['date_modified'] = ['File:FileModifyDate', 'QuickTime:ModifyDate']
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
tags_keys['title'] = ['XMP:Title', 'XMP:DisplayName']
tags_keys['latitude'] = [
'EXIF:GPSLatitude',
'XMP:GPSLatitude',
# 'QuickTime:GPSLatitude',
'Composite:GPSLatitude',
]
tags_keys['longitude'] = [
'EXIF:GPSLongitude',
'XMP:GPSLongitude',
# 'QuickTime:GPSLongitude',
'Composite:GPSLongitude',
]
tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef']
tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef']
tags_keys['original_name'] = ['XMP:OriginalFileName']
# Remove ignored tag from list
for tag_regex in self.ignore_tags:
for key, tags in tags_keys.items():
for i, tag in enumerate(tags):
if re.match(tag_regex, tag):
del tags_keys[key][i]
return tags_keys
def _del_ignored_tags(self, exif_metadata):
for tag_regex in self.ignore_tags:
ignored_tags = set()
for tag in exif_metadata:
if re.search(tag_regex, tag) is not None:
ignored_tags.add(tag)
for ignored_tag in ignored_tags:
del exif_metadata[ignored_tag]
# get self.metadata
self.get_metadata(self.file_path)
def get_mimetype(self):
"""Get the mimetype of the file.
"""
Get the mimetype of the file.
:returns: str or None
"""
mimetype = mimetypes.guess_type(self.file_path)
@ -302,50 +249,6 @@ class Media:
return mimetype[0]
def _get_key_values(self, key):
"""
Get the first value of a tag set
:returns: str or None if no exif tag
"""
if self.exif_metadata is None:
return None
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
yield self.exif_metadata[tag]
def get_value(self, tag):
"""
Get given value from EXIF.
:returns: str or None
"""
if self.exif_metadata is None:
return None
if tag not in self.exif_metadata:
return None
return self.exif_metadata[tag]
def get_date_format(self, value):
"""Formatting date attribute.
:returns: datetime object or None
"""
# We need to parse a string to datetime format.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
# in %Y:%m:%d %H:%M:%S format
if value is None:
return None
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if re.match(regex, value) is not None: # noqa
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value)
return parser.parse(value)
except BaseException or parser._parser.ParserError as e:
self.logger.warning(e.args, value)
return None
def get_coordinates(self, key, value):
"""Get latitude or longitude value
@ -382,6 +285,27 @@ class Media:
return None
def get_date_format(self, value):
"""
Formatting date attribute.
:returns: datetime object or None
"""
# We need to parse a string to datetime format.
# EXIF DateTimeOriginal and EXIF DateTime are both stored
# in %Y:%m:%d %H:%M:%S format
if value is None:
return None
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if re.match(regex, value) is not None: # noqa
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value)
return parser.parse(value)
except BaseException or parser._parser.ParserError as e:
self.logger.warning(e.args, value)
return None
def _get_date_media_interactive(self, choices, default):
print(f"Date conflict for file: {self.file_path}")
choices_list = [
@ -483,12 +407,6 @@ class Media:
default = date_filename
return self._get_date_media_interactive(choices, default)
def get_exif_metadata(self):
# Get metadata from exiftool.
self.exif_metadata = ExifToolCaching(
self.file_path, logger=self.logger
).asdict()
def _set_album(self, album, folder):
print(f"Metadata conflict for file: {self.file_path}")
choices_list = [
@ -511,23 +429,15 @@ class Media:
if not answers['album']:
answers = inquirer.prompt(prompt, theme=self.theme)
return answers['custom']
else:
return answers['album']
return answers['album']
def _set_metadata_from_exif(self):
self.metadata['src_dir'] = str(self.src_dir)
self.metadata['subdirs'] = str(
self.file_path.relative_to(self.src_dir).parent
)
self.metadata['filename'] = self.file_path.name
# Get metadata from exif
self.get_exif_metadata()
# Retrieve selected metadata to dict
"""
Get selected metadata from exif to dict structure
"""
if not self.exif_metadata:
return self.metadata
return
for key in self.tags_keys:
formated_data = None
@ -655,7 +565,7 @@ class Media:
if not album or album == '':
self.metadata['album'] = folder
def get_metadata(self, root, loc=None, db=None, cache=False) -> dict:
def get_metadata(self, root, loc=None, db=None, cache=False):
"""
Get a dictionary of metadata from exif.
All keys will be present and have a value of None if not obtained.
@ -670,6 +580,12 @@ class Media:
if db_checksum:
location_id = self._set_metadata_from_db(db, relpath)
else:
self.metadata['src_dir'] = str(self.src_dir)
self.metadata['subdirs'] = str(
self.file_path.relative_to(self.src_dir).parent
)
self.metadata['filename'] = self.file_path.name
self._set_metadata_from_exif()
self.metadata['date_media'] = self.get_date_media()
@ -679,8 +595,6 @@ class Media:
if self.album_from_folder:
self._set_album_from_folder()
return self.metadata
def has_exif_data(self):
"""Check if file has metadata, date original"""
if not self.metadata:
@ -692,78 +606,17 @@ class Media:
return False
def set_value(self, tag, value):
"""Set value of a tag.
:returns: value (str)
"""
return ExifTool(self.file_path, logger=self.logger).setvalue(tag, value)
def set_key_values(self, key, value):
"""Set tags values for given key"""
status = True
if self.exif_metadata is None:
return False
for tag in self.tags_keys[key]:
if tag in self.exif_metadata:
if not self.set_value(tag, value):
status = False
return status
def set_date_media(self, time):
"""Set the date/time a photo was taken.
:param datetime time: datetime object of when the photo was taken
:returns: bool
"""
if time is None:
return False
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
status = self.set_value('date_original', formatted_time)
if status == False:
# exif attribute date_original d'ont exist
status = self.set_value('date_created', formatted_time)
return status
def set_coordinates(self, latitude, longitude):
status = []
if self.metadata['latitude_ref']:
latitude = abs(latitude)
if latitude > 0:
status.append(self.set_value('latitude_ref', 'N'))
else:
status.append(self.set_value('latitude_ref', 'S'))
status.append(self.set_value('latitude', latitude))
if self.metadata['longitude_ref']:
longitude = abs(longitude)
if longitude > 0:
status.append(self.set_value('latitude_ref', 'E'))
else:
status.append(self.set_value('longitude_ref', 'W'))
status.append(self.set_value('longitude', longitude))
if all(status):
return True
else:
return False
def set_album_from_folder(self):
"""Set the album attribute based on the leaf folder name
:returns: bool
"""
return self.set_value('album', self.file_path.parent.name)
class Medias:
"""Get media data in collection or source path"""
"""
Extract matadatas from exiftool in paths and sort them to dict structure
"""
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
VIDEO = ('avi', 'm4v', 'mov', 'mp4', 'mpg', 'mpeg', '3gp', 'mts')
extensions = PHOTO + AUDIO + VIDEO
def __init__(
self,
@ -801,7 +654,7 @@ class Medias:
self.theme = request.load_theme()
def get_media(self, file_path, src_dir, loc=None):
media = Media(
return Media(
file_path,
src_dir,
self.album_from_folder,
@ -811,9 +664,6 @@ class Medias:
self.use_date_filename,
self.use_file_dates,
)
media.get_metadata(self.root, loc, self.db.sqlite, self.cache)
return media
def get_medias(self, src_dirs, imp=False, loc=None):
"""Get medias data"""
@ -837,7 +687,12 @@ class Medias:
def update_exif_data(self, metadata):
file_path = self.root / metadata['file_path']
exif = WriteExif(file_path, metadata, self.ignore_tags)
exif = WriteExif(
file_path,
metadata,
ignore_tags=self.ignore_tags,
logger=self.logger
)
updated = False
if self.album_from_folder:

View File

@ -65,7 +65,7 @@ class TestFPath:
exif_data = ExifToolCaching(str(file_path)).asdict()
loc = GeoLocation()
metadata = media.get_metadata(self.src_path, loc)
metadata = media.metadata
for item, regex in items.items():
for mask in masks:
matched = re.search(regex, mask)
@ -153,10 +153,10 @@ class TestCollection:
assert not summary.errors
# check if album value are set
for file_path in tmp_path.glob('**/*'):
paths = Paths(glob='**/*').get_files(tmp_path)
for file_path in paths:
if '.db' not in str(file_path):
media = Media(file_path, tmp_path, album_from_folder=True)
media.get_exif_metadata()
for value in media._get_key_values('album'):
assert value != '' or None
@ -206,7 +206,6 @@ class TestCollection:
# copy mode
src_path = Path(self.src_path, 'test_exif', 'photo.png')
media = Media(src_path, self.src_path)
media.get_metadata(tmp_path)
name = 'photo_' + str(imp) + '.png'
media.metadata['file_path'] = name
dest_path = Path(tmp_path, name)

View File

@ -14,7 +14,7 @@ from ordigi.utils import get_date_from_string
ORDIGI_PATH = Path(__file__).parent.parent
CACHING = True
class TestMetadata:
class TestMedia:
@pytest.fixture(autouse=True)
def setup_class(cls, sample_files_paths):
@ -31,8 +31,7 @@ class TestMetadata:
for file_path, media in self.get_media():
# test get metadata from cache or exif
for root in self.src_path, tmp_path:
result = media.get_metadata(root)
assert result
media.get_metadata(root)
assert isinstance(media.metadata, dict), media.metadata
#check if all tags key are present
for tags_key, tags in media.tags_keys.items():
@ -77,7 +76,6 @@ class TestMetadata:
exif_data = ExifToolCaching(str(file_path)).asdict()
media = Media(file_path, self.src_path, use_date_filename=True,
use_file_dates=True)
metadata = media.get_metadata(self.src_path)
date_media = media.get_date_media()
date_filename = None