From 5316261b7dcb2d39a080c3c4b59f1a76b4d54d0a Mon Sep 17 00:00:00 2001 From: Jaisen Mathai Date: Wed, 14 Oct 2015 00:39:30 -0700 Subject: [PATCH] Refactor code to use pyexiv2 for photos and videos and move methods out to Media class --- elodie/filesystem.py | 14 ++-- elodie/media/media.py | 131 ++++++++++++++++++++++++++++++++++++++ elodie/media/photo.py | 92 +++----------------------- elodie/media/video.py | 95 +-------------------------- import.py | 28 +------- store_video.py | 40 ------------ tests/scripts/datetime.py | 0 7 files changed, 150 insertions(+), 250 deletions(-) delete mode 100755 store_video.py mode change 100644 => 100755 tests/scripts/datetime.py diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 9c1f90f..8f5055e 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -99,13 +99,15 @@ class FileSystem: @param, time_obj, time, Time object to be used to determine folder name. @returns, string """ - def get_folder_path(self, **kwargs): + def get_folder_path(self, metadata): path = [] - if('date' in kwargs): - path.append(time.strftime('%Y-%m-%b', kwargs['date'])) + if(metadata['date_taken'] is not None): + path.append(time.strftime('%Y-%m-%b', metadata['date_taken'])) - if('latitude' in kwargs and 'longitude' in kwargs): - place_name = geolocation.place_name(kwargs['latitude'], kwargs['longitude']) + if(metadata['album'] is not None): + path.append(metadata['album']) + elif(metadata['latitude'] is not None and metadata['longitude'] is not None): + place_name = geolocation.place_name(metadata['latitude'], metadata['longitude']) if(place_name is None): path.append('Unknown Location') else: @@ -124,7 +126,7 @@ class FileSystem: metadata = media.get_metadata() - directory_name = self.get_folder_path(date=metadata['date_taken'], latitude=metadata['latitude'], longitude=metadata['longitude']) + directory_name = self.get_folder_path(metadata) dest_directory = '%s/%s' % (destination, directory_name) file_name = self.get_file_name(media) diff --git a/elodie/media/media.py b/elodie/media/media.py index d54be1e..c7aa624 100644 --- a/elodie/media/media.py +++ b/elodie/media/media.py @@ -5,8 +5,11 @@ Media package that handles all video operations # load modules from sys import argv +from fractions import Fraction +import LatLon import mimetypes import os +import pyexiv2 import re import subprocess import time @@ -20,6 +23,28 @@ class Media(object): """ def __init__(self, source=None): self.source = source + self.exif_map = { + 'date_taken': ['Exif.Photo.DateTimeOriginal', 'Exif.Image.DateTime'], #, 'EXIF FileDateTime'], + 'latitude': 'Exif.GPSInfo.GPSLatitude', + 'latitude_ref': 'Exif.GPSInfo.GPSLatitudeRef', + 'longitude': 'Exif.GPSInfo.GPSLongitude', + 'longitude_ref': 'Exif.GPSInfo.GPSLongitudeRef', + 'album': 'Xmp.elodie.album' + } + try: + pyexiv2.xmp.register_namespace('https://github.com/jmathai/elodie/', 'elodie') + except KeyError: + pass + + def get_album(self): + if(not self.is_valid()): + return None + + exif = self.get_exif() + try: + return exif[self.exif_map['album']].value + except KeyError: + return None """ Get the full path to the video. @@ -39,6 +64,88 @@ class Media(object): # we can't use self.__get_extension else we'll endlessly recurse return os.path.splitext(source)[1][1:].lower() in self.get_valid_extensions() + """ + Get latitude or longitude of photo from EXIF + + @returns, float or None if not present in EXIF or a non-photo file + """ + def get_coordinate(self, type='latitude'): + if(not self.is_valid()): + return None + + key = self.exif_map['longitude'] if type == 'longitude' else self.exif_map['latitude'] + exif = self.get_exif() + + if(key not in exif): + return None + + try: + # this is a hack to get the proper direction by negating the values for S and W + latdir = 1 + if(key == self.exif_map['latitude'] and str(exif[self.exif_map['latitude_ref']].value) == 'S'): + latdir = -1 + londir = 1 + if(key == self.exif_map['longitude'] and str(exif[self.exif_map['longitude_ref']].value) == 'W'): + londir = -1 + + coords = exif[key].value + if(key == 'latitude'): + return float(str(LatLon.Latitude(degree=coords[0], minute=coords[1], second=coords[2]))) * latdir + else: + return float(str(LatLon.Longitude(degree=coords[0], minute=coords[1], second=coords[2]))) * londir + except KeyError: + return None + + """ + Get the date which the photo was taken. + The date value returned is defined by the min() of mtime and ctime. + + @returns, time object or None for non-photo files or 0 timestamp + """ + def get_date_taken(self): + if(not self.is_valid()): + return None + + source = self.source + seconds_since_epoch = min(os.path.getmtime(source), os.path.getctime(source)) + # We need to parse a string from EXIF into a timestamp. + # EXIF DateTimeOriginal and EXIF DateTime are both stored in %Y:%m:%d %H:%M:%S format + # we use date.strptime -> .timetuple -> time.mktime to do the conversion in the local timezone + # EXIF DateTime is already stored as a timestamp + # Sourced from https://github.com/photo/frontend/blob/master/src/libraries/models/Photo.php#L500 + exif = self.get_exif() + for key in self.exif_map['date_taken']: + try: + if(key in exif): + seconds_since_epoch = time.mktime(datetime.strptime(str(exif[key].value), '%Y:%m:%d %H:%M:%S').timetuple()) + break; + except: + pass + + if(seconds_since_epoch == 0): + return None + + return time.gmtime(seconds_since_epoch) + + """ + Read EXIF from a photo file. + We store the result in a member variable so we can call get_exif() often without performance degredation + + @returns, list or none for a non-photo file + """ + def get_exif(self): + if(not self.is_valid()): + return None + + if(self.exif is not None): + return self.exif + + source = self.source + self.exif = pyexiv2.ImageMetadata(source) + self.exif.read() + + return self.exif + """ Get the file extension as a lowercased string. @@ -50,6 +157,30 @@ class Media(object): source = self.source return os.path.splitext(source)[1][1:].lower() + + """ + Get a dictionary of metadata for a photo. + All keys will be present and have a value of None if not obtained. + + @returns, dictionary or None for non-photo files + """ + def get_metadata(self): + if(not self.is_valid()): + return None + + source = self.source + + metadata = { + 'date_taken': self.get_date_taken(), + 'latitude': self.get_coordinate('latitude'), + 'longitude': self.get_coordinate('longitude'), + 'album': self.get_album(), + 'mime_type': self.get_mimetype(), + 'base_name': os.path.splitext(os.path.basename(source))[0], + 'extension': self.get_extension() + } + + return metadata """ Get the mimetype of the file. diff --git a/elodie/media/photo.py b/elodie/media/photo.py index b97c0a9..9fd936d 100644 --- a/elodie/media/photo.py +++ b/elodie/media/photo.py @@ -7,9 +7,6 @@ Photo package that handles all photo operations from sys import argv from datetime import datetime -import exifread -from fractions import Fraction -import LatLon import mimetypes import os import re @@ -22,7 +19,7 @@ Video class for general photo operations """ class Photo(Media): # class / static variable accessible through get_valid_extensions() - __valid_extensions = ('jpg', 'jpeg') + __valid_extensions = ('jpg', 'jpeg', 'nef', 'dng') """ @param, source, string, The fully qualified path to the photo file @@ -51,15 +48,13 @@ class Photo(Media): # EXIF DateTime is already stored as a timestamp # Sourced from https://github.com/photo/frontend/blob/master/src/libraries/models/Photo.php#L500 exif = self.get_exif() - try: - if('EXIF DateTimeOriginal' in exif): - seconds_since_epoch = time.mktime(datetime.strptime(str(exif['EXIF DateTimeOriginal']), '%Y:%m:%d %H:%M:%S').timetuple()) - elif('EXIF DateTime' in exif): - seconds_since_epoch = time.mktime(datetime.strptime(str(exif['EXIF DateTime']), '%Y:%m:%d %H:%M:%S').timetuple()) - elif('EXIF FileDateTime' in exif): - seconds_since_epoch = str(exif['EXIF DateTime']) - except: - pass + for key in self.exif_map['date_taken']: + try: + if(key in exif): + seconds_since_epoch = time.mktime(datetime.strptime(str(exif[key]), '%Y:%m:%d %H:%M:%S').timetuple()) + break; + except: + pass if(seconds_since_epoch == 0): return None @@ -84,77 +79,6 @@ class Photo(Media): return re.search('(\d{2}:\d{2}.\d{2})', key).group(1).replace('.', ':') return None - """ - Read EXIF from a photo file. - We store the result in a member variable so we can call get_exif() often without performance degredation - - @returns, list or none for a non-photo file - """ - def get_exif(self): - if(not self.is_valid()): - return None - - if(self.exif is not None): - return self.exif - - source = self.source - with open(source, 'r') as f: - self.exif = exifread.process_file(f, details=False) - - return self.exif - - """ - Get latitude or longitude of photo from EXIF - - @returns, float or None if not present in EXIF or a non-photo file - """ - def get_coordinate(self, type='latitude'): - if(not self.is_valid()): - return None - - key = 'GPS GPSLongitude' if type == 'longitude' else 'GPS GPSLatitude' - exif = self.get_exif() - - if(key not in exif): - return None - - # this is a hack to get the proper direction by negating the values for S and W - latdir = 1 - if(key == 'GPS GPSLatitude' and str(exif['GPS GPSLatitudeRef']) == 'S'): - latdir = -1 - londir = 1 - if(key == 'GPS GPSLongitude' and str(exif['GPS GPSLongitudeRef']) == 'W'): - londir = -1 - - coords = [float(Fraction(ratio.num, ratio.den)) for ratio in exif[key].values] - if(key == 'latitude'): - return float(str(LatLon.Latitude(degree=coords[0], minute=coords[1], second=coords[2]))) * latdir - else: - return float(str(LatLon.Longitude(degree=coords[0], minute=coords[1], second=coords[2]))) * londir - - """ - Get a dictionary of metadata for a photo. - All keys will be present and have a value of None if not obtained. - - @returns, dictionary or None for non-photo files - """ - def get_metadata(self): - if(not self.is_valid()): - return None - - source = self.source - - metadata = { - "date_taken": self.get_date_taken(), - "latitude": self.get_coordinate('latitude'), - "longitude": self.get_coordinate('longitude'), - "mime_type": self.get_mimetype(), - "base_name": os.path.splitext(os.path.basename(source))[0], - "extension": self.get_extension() - } - - return metadata - """ Static method to access static __valid_extensions variable. diff --git a/elodie/media/video.py b/elodie/media/video.py index d058a41..0f9cd9c 100644 --- a/elodie/media/video.py +++ b/elodie/media/video.py @@ -29,61 +29,8 @@ class Video(Media): def __init__(self, source=None): super(Video, self).__init__(source) - """ - Get latitude or longitude of photo from EXIF - - @returns, time object or None for non-video files or 0 timestamp - """ - def get_coordinate(self, type='latitude'): - exif_data = self.get_exif() - if(exif_data is None): - return None - - coords = re.findall('(GPS %s +: .+)' % type.capitalize(), exif_data) - if(coords is None or len(coords) == 0): - return None - - coord_string = coords[0] - coordinate = re.findall('([0-9.]+)', coord_string) - direction = re.search('[NSEW]$', coord_string) - if(coordinate is None or direction is None): - return None - - direction = direction.group(0) - - decimal_degrees = float(coordinate[0]) + float(coordinate[1])/60 + float(coordinate[2])/3600 - if(direction == 'S' or direction == 'W'): - decimal_degrees = decimal_degrees * -1 - - return decimal_degrees - - """ - Get the date which the video was taken. - The date value returned is defined by the min() of mtime and ctime. - - @returns, time object or None for non-video files or 0 timestamp - """ - def get_date_taken(self): - if(not self.is_valid()): - return None - - source = self.source - seconds_since_epoch = min(os.path.getmtime(source), os.path.getctime(source)) - # We need to parse a string from EXIF into a timestamp. - # we use date.strptime -> .timetuple -> time.mktime to do the conversion in the local timezone - exif_data = self.get_exif() - date = re.search('Media Create Date +: +(.+)', exif_data) - if(date is not None): - date_string = date.group(1) - try: - seconds_since_epoch = time.mktime(datetime.strptime(date_string, '%Y:%m:%d %H:%M:%S').timetuple()) - except: - pass - - if(seconds_since_epoch == 0): - return None - - return time.gmtime(seconds_since_epoch) + # We only want to parse EXIF once so we store it here + self.exif = None """ Get the duration of a video in seconds. @@ -103,44 +50,6 @@ class Video(Media): return re.search('(\d{2}:\d{2}.\d{2})', key).group(1).replace('.', ':') return None - """ - Get exif data from video file. - Not all video files have exif and this currently relies on the CLI exiftool program - - @returns, string or None if exiftool is not found - """ - def get_exif(self): - exiftool = find_executable('exiftool') - if(exiftool is None): - return None - - source = self.source - process_output = subprocess.Popen(['%s %s ' % (exiftool, source)], stdout=subprocess.PIPE, shell=True) - return process_output.stdout.read() - - """ - Get a dictionary of metadata for a video. - All keys will be present and have a value of None if not obtained. - - @returns, dictionary or None for non-video files - """ - def get_metadata(self): - if(not self.is_valid()): - return None - - source = self.source - metadata = { - "date_taken": self.get_date_taken(), - "latitude": self.get_coordinate('latitude'), - "longitude": self.get_coordinate('longitude'), - "length": self.get_duration(), - "mime_type": self.get_mimetype(), - "base_name": os.path.splitext(os.path.basename(source))[0], - "extension": self.get_extension() - } - - return metadata - """ Static method to access static __valid_extensions variable. diff --git a/import.py b/import.py index 17ca579..4aa6fcd 100755 --- a/import.py +++ b/import.py @@ -36,32 +36,6 @@ def parse_arguments(args): config.update(args) return config -def process_file(_file, destination, media): - checksum = db.checksum(_file) - if(checksum == None): - print 'Could not get checksum for %s. Skipping...' % _file - return - - if(db.check_hash(checksum) == True): - print '%s already exists at %s. Skipping...' % (_file, db.get_hash(checksum)) - return - - metadata = media.get_metadata() - - directory_name = filesystem.get_folder_path(date=metadata['date_taken'], latitude=metadata['latitude'], longitude=metadata['longitude']) - - dest_directory = '%s/%s' % (destination, directory_name) - # TODO remove the day prefix of the file that was there prior to the crawl - file_name = filesystem.get_file_name(media) - dest_path = '%s/%s' % (dest_directory, file_name) - - filesystem.create_directory(dest_directory) - - print '%s -> %s' % (_file, dest_path) - #shutil.copy2(_file, dest_path) - shutil.move(_file, dest_path) - db.add_hash(checksum, dest_path) - def main(argv): destination = config['destination'] @@ -95,7 +69,7 @@ def main(argv): if(media_type.__name__ == 'Video'): filesystem.set_date_from_path_video(media) - dest_path = process_file(config['file'], destination, media, allowDuplicate=False, move=False) + dest_path = filesystem.process_file(config['file'], destination, media, allowDuplicate=False, move=False) print '%s -> %s' % (current_file, dest_path) db.update_hash_db() else: diff --git a/store_video.py b/store_video.py deleted file mode 100755 index 5876ca0..0000000 --- a/store_video.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python - -import os -import shutil -import sys - -from elodie.media.video import Video -from elodie.filesystem import FileSystem - -print 'Running with arguments %r' % sys.argv - -destination = '%s/Dropbox/Videos' % os.path.expanduser('~') - -if __name__ == '__main__': - if(len(sys.argv) < 2): - print "No arguments passed" - sys.exit(0) - - file_path = sys.argv[1] - - filesystem = FileSystem() - video = Video(file_path) - - # check if the file is valid else exit - if(not video.is_valid()): - print "File is not valid" - sys.exit(0) - - metadata = video.get_metadata() - - directory_name = filesystem.get_folder_name_by_date(metadata['date_taken']) - dest_directory = '%s/%s' % (destination, directory_name) - file_name = filesystem.get_file_name_for_video(video) - - dest = '%s/%s' % (dest_directory, file_name) - - if not os.path.exists(dest_directory): - os.makedirs(dest_directory) - - shutil.copy2(file_path, dest) diff --git a/tests/scripts/datetime.py b/tests/scripts/datetime.py old mode 100644 new mode 100755