Fix issue #252 to not modify original source files on import (#272)

This commit is contained in:
Matt Carey 2019-02-04 22:20:39 +00:00 committed by Jaisen Mathai
parent 086c26c198
commit 2e2c103cfb
4 changed files with 135 additions and 44 deletions

View File

@ -472,7 +472,35 @@ class FileSystem(object):
return folder_name return folder_name
def process_checksum(self, _file, allow_duplicate):
db = Db()
checksum = db.checksum(_file)
if(checksum is None):
log.info('Could not get checksum for %s.' % _file)
return None
# If duplicates are not allowed then we check if we've seen this file
# before via checksum. We also check that the file exists at the
# location we believe it to be.
# If we find a checksum match but the file doesn't exist where we
# believe it to be then we write a debug log and proceed to import.
checksum_file = db.get_hash(checksum)
if(allow_duplicate is False and checksum_file is not None):
if(os.path.isfile(checksum_file)):
log.info('%s already at %s.' % (
_file,
checksum_file
))
return None
else:
log.info('%s matched checksum but file not found at %s.' % ( # noqa
_file,
checksum_file
))
return checksum
def process_file(self, _file, destination, media, **kwargs): def process_file(self, _file, destination, media, **kwargs):
move = False move = False
if('move' in kwargs): if('move' in kwargs):
move = kwargs['move'] move = kwargs['move']
@ -481,10 +509,18 @@ class FileSystem(object):
if('allowDuplicate' in kwargs): if('allowDuplicate' in kwargs):
allow_duplicate = kwargs['allowDuplicate'] allow_duplicate = kwargs['allowDuplicate']
stat_info_original = os.stat(_file)
if(not media.is_valid()): if(not media.is_valid()):
print('%s is not a valid media file. Skipping...' % _file) print('%s is not a valid media file. Skipping...' % _file)
return return
checksum = self.process_checksum(_file, allow_duplicate)
if(checksum is None):
log.info('Original checksum returned None for %s. Skipping...' %
_file)
return
media.set_original_name() media.set_original_name()
metadata = media.get_metadata() metadata = media.get_metadata()
@ -494,31 +530,6 @@ class FileSystem(object):
file_name = self.get_file_name(media) file_name = self.get_file_name(media)
dest_path = os.path.join(dest_directory, file_name) dest_path = os.path.join(dest_directory, file_name)
db = Db()
checksum = db.checksum(_file)
if(checksum is None):
log.info('Could not get checksum for %s. Skipping...' % _file)
return
# If duplicates are not allowed then we check if we've seen this file
# before via checksum. We also check that the file exists at the
# location we believe it to be.
# If we find a checksum match but the file doesn't exist where we
# believe it to be then we write a debug log and proceed to import.
checksum_file = db.get_hash(checksum)
if(allow_duplicate is False and checksum_file is not None):
if(os.path.isfile(checksum_file)):
log.info('%s already exists at %s. Skipping...' % (
_file,
checksum_file
))
return
else:
log.info('%s matched checksum but file not found at %s. Importing again...' % ( # noqa
_file,
checksum_file
))
# If source and destination are identical then # If source and destination are identical then
# we should not write the file. gh-210 # we should not write the file. gh-210
if(_file == dest_path): if(_file == dest_path):
@ -527,14 +538,43 @@ class FileSystem(object):
self.create_directory(dest_directory) self.create_directory(dest_directory)
# exiftool renames the original file by appending '_original' to the
# file name. A new file is written with new tags with the initial file
# name. See exiftool man page for more details.
exif_original_file = _file + '_original'
# Check if the source file was processed by exiftool and an _original
# file was created.
exif_original_file_exists = False
if(os.path.exists(exif_original_file)):
exif_original_file_exists = True
if(move is True): if(move is True):
stat = os.stat(_file) stat = os.stat(_file)
# Move the processed file into the destination directory
shutil.move(_file, dest_path) shutil.move(_file, dest_path)
if(exif_original_file_exists is True):
# We can remove it as we don't need the initial file.
os.remove(exif_original_file)
os.utime(dest_path, (stat.st_atime, stat.st_mtime)) os.utime(dest_path, (stat.st_atime, stat.st_mtime))
else:
if(exif_original_file_exists is True):
# Move the newly processed file with any updated tags to the
# destination directory
shutil.move(_file, dest_path)
# Move the exif _original back to the initial source file
shutil.move(exif_original_file, _file)
else: else:
compatability._copyfile(_file, dest_path) compatability._copyfile(_file, dest_path)
# Set the utime based on what the original file contained
# before we made any changes.
# Then set the utime on the destination file based on metadata.
os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime))
self.set_utime_from_metadata(media.get_metadata(), dest_path) self.set_utime_from_metadata(media.get_metadata(), dest_path)
db = Db()
db.add_hash(checksum, dest_path) db.add_hash(checksum, dest_path)
db.update_hash_db() db.update_hash_db()

View File

@ -53,7 +53,6 @@ class Media(Base):
self.original_name_key = 'XMP:OriginalFileName' self.original_name_key = 'XMP:OriginalFileName'
self.set_gps_ref = True self.set_gps_ref = True
self.exiftool_addedargs = [ self.exiftool_addedargs = [
'-overwrite_original',
u'-config', u'-config',
u'"{}"'.format(constants.exiftool_config) u'"{}"'.format(constants.exiftool_config)
] ]

View File

@ -7,7 +7,7 @@ are tracked by Elodie.
from json import dumps, loads from json import dumps, loads
import os import os
from shutil import copyfileobj from shutil import copy2, copyfileobj
import time import time
# load modules # load modules
@ -176,6 +176,11 @@ class Text(Base):
metadata_line[name] = kwargs[name] metadata_line[name] = kwargs[name]
metadata_as_json = dumps(metadata_line) metadata_as_json = dumps(metadata_line)
# Create an _original copy just as we do with exiftool
# This is to keep all file processing logic in line with exiftool
copy2(source, source + '_original')
if has_metadata: if has_metadata:
# Update the first line of this file in place # Update the first line of this file in place
# http://stackoverflow.com/a/14947384 # http://stackoverflow.com/a/14947384

View File

@ -639,6 +639,7 @@ def test_process_file_plain():
origin = os.path.join(folder,'photo.jpg') origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('plain.jpg'), origin) shutil.copyfile(helper.get_file('plain.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -648,8 +649,10 @@ def test_process_file_plain():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo.jpg')) in destination, destination
def test_process_file_with_title(): def test_process_file_with_title():
@ -659,6 +662,7 @@ def test_process_file_with_title():
origin = '%s/photo.jpg' % folder origin = '%s/photo.jpg' % folder
shutil.copyfile(helper.get_file('with-title.jpg'), origin) shutil.copyfile(helper.get_file('with-title.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -668,8 +672,10 @@ def test_process_file_with_title():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
def test_process_file_with_location(): def test_process_file_with_location():
@ -679,6 +685,7 @@ def test_process_file_with_location():
origin = os.path.join(folder,'photo.jpg') origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-location.jpg'), origin) shutil.copyfile(helper.get_file('with-location.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -688,10 +695,35 @@ def test_process_file_with_location():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo.jpg')) in destination, destination
def test_process_file_validate_original_checksum():
filesystem = FileSystem()
temporary_folder, folder = helper.create_working_folder()
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('plain.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
origin_checksum = helper.checksum(origin)
destination_checksum = helper.checksum(destination)
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum_preprocess is not None, origin_checksum_preprocess
assert origin_checksum is not None, origin_checksum
assert destination_checksum is not None, destination_checksum
assert origin_checksum_preprocess == origin_checksum, (origin_checksum_preprocess, origin_checksum)
def test_process_file_with_location_and_title(): def test_process_file_with_location_and_title():
filesystem = FileSystem() filesystem = FileSystem()
temporary_folder, folder = helper.create_working_folder() temporary_folder, folder = helper.create_working_folder()
@ -699,6 +731,7 @@ def test_process_file_with_location_and_title():
origin = os.path.join(folder,'photo.jpg') origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-location-and-title.jpg'), origin) shutil.copyfile(helper.get_file('with-location-and-title.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -708,8 +741,10 @@ def test_process_file_with_location_and_title():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
def test_process_file_with_album(): def test_process_file_with_album():
@ -719,6 +754,7 @@ def test_process_file_with_album():
origin = os.path.join(folder,'photo.jpg') origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-album.jpg'), origin) shutil.copyfile(helper.get_file('with-album.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -728,8 +764,10 @@ def test_process_file_with_album():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo.jpg')) in destination, destination
def test_process_file_with_album_and_title(): def test_process_file_with_album_and_title():
@ -739,6 +777,7 @@ def test_process_file_with_album_and_title():
origin = os.path.join(folder,'photo.jpg') origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-album-and-title.jpg'), origin) shutil.copyfile(helper.get_file('with-album-and-title.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -748,8 +787,10 @@ def test_process_file_with_album_and_title():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
def test_process_file_with_album_and_title_and_location(): def test_process_file_with_album_and_title_and_location():
@ -759,6 +800,7 @@ def test_process_file_with_album_and_title_and_location():
origin = os.path.join(folder,'photo.jpg') origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-album-and-title-and-location.jpg'), origin) shutil.copyfile(helper.get_file('with-album-and-title-and-location.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin) media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -768,8 +810,10 @@ def test_process_file_with_album_and_title_and_location():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum == destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
# gh-89 (setting album then title reverts album) # gh-89 (setting album then title reverts album)
@ -782,6 +826,7 @@ def test_process_video_with_album_then_title():
origin_checksum = helper.checksum(origin) origin_checksum = helper.checksum(origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Video(origin) media = Video(origin)
media.set_album('test_album') media.set_album('test_album')
media.set_title('test_title') media.set_title('test_title')
@ -792,8 +837,10 @@ def test_process_video_with_album_then_title():
shutil.rmtree(folder) shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination))) shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum assert origin_checksum_preprocess is not None
assert origin_checksum != destination_checksum, destination_checksum assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-01-Jan','test_album','2015-01-19_12-45-11-movie-test_title.mov')) in destination, destination assert helper.path_tz_fix(os.path.join('2015-01-Jan','test_album','2015-01-19_12-45-11-movie-test_title.mov')) in destination, destination
@mock.patch('elodie.config.config_file', '%s/config.ini-fallback-folder' % gettempdir()) @mock.patch('elodie.config.config_file', '%s/config.ini-fallback-folder' % gettempdir())