Fix issue #252 to not modify original source files on import (#272)

This commit is contained in:
Matt Carey 2019-02-04 22:20:39 +00:00 committed by Jaisen Mathai
parent 086c26c198
commit 2e2c103cfb
4 changed files with 135 additions and 44 deletions

View File

@ -472,7 +472,35 @@ class FileSystem(object):
return folder_name
def process_checksum(self, _file, allow_duplicate):
db = Db()
checksum = db.checksum(_file)
if(checksum is None):
log.info('Could not get checksum for %s.' % _file)
return None
# If duplicates are not allowed then we check if we've seen this file
# before via checksum. We also check that the file exists at the
# location we believe it to be.
# If we find a checksum match but the file doesn't exist where we
# believe it to be then we write a debug log and proceed to import.
checksum_file = db.get_hash(checksum)
if(allow_duplicate is False and checksum_file is not None):
if(os.path.isfile(checksum_file)):
log.info('%s already at %s.' % (
_file,
checksum_file
))
return None
else:
log.info('%s matched checksum but file not found at %s.' % ( # noqa
_file,
checksum_file
))
return checksum
def process_file(self, _file, destination, media, **kwargs):
move = False
if('move' in kwargs):
move = kwargs['move']
@ -481,10 +509,18 @@ class FileSystem(object):
if('allowDuplicate' in kwargs):
allow_duplicate = kwargs['allowDuplicate']
stat_info_original = os.stat(_file)
if(not media.is_valid()):
print('%s is not a valid media file. Skipping...' % _file)
return
checksum = self.process_checksum(_file, allow_duplicate)
if(checksum is None):
log.info('Original checksum returned None for %s. Skipping...' %
_file)
return
media.set_original_name()
metadata = media.get_metadata()
@ -494,31 +530,6 @@ class FileSystem(object):
file_name = self.get_file_name(media)
dest_path = os.path.join(dest_directory, file_name)
db = Db()
checksum = db.checksum(_file)
if(checksum is None):
log.info('Could not get checksum for %s. Skipping...' % _file)
return
# If duplicates are not allowed then we check if we've seen this file
# before via checksum. We also check that the file exists at the
# location we believe it to be.
# If we find a checksum match but the file doesn't exist where we
# believe it to be then we write a debug log and proceed to import.
checksum_file = db.get_hash(checksum)
if(allow_duplicate is False and checksum_file is not None):
if(os.path.isfile(checksum_file)):
log.info('%s already exists at %s. Skipping...' % (
_file,
checksum_file
))
return
else:
log.info('%s matched checksum but file not found at %s. Importing again...' % ( # noqa
_file,
checksum_file
))
# If source and destination are identical then
# we should not write the file. gh-210
if(_file == dest_path):
@ -527,14 +538,43 @@ class FileSystem(object):
self.create_directory(dest_directory)
# exiftool renames the original file by appending '_original' to the
# file name. A new file is written with new tags with the initial file
# name. See exiftool man page for more details.
exif_original_file = _file + '_original'
# Check if the source file was processed by exiftool and an _original
# file was created.
exif_original_file_exists = False
if(os.path.exists(exif_original_file)):
exif_original_file_exists = True
if(move is True):
stat = os.stat(_file)
# Move the processed file into the destination directory
shutil.move(_file, dest_path)
if(exif_original_file_exists is True):
# We can remove it as we don't need the initial file.
os.remove(exif_original_file)
os.utime(dest_path, (stat.st_atime, stat.st_mtime))
else:
compatability._copyfile(_file, dest_path)
if(exif_original_file_exists is True):
# Move the newly processed file with any updated tags to the
# destination directory
shutil.move(_file, dest_path)
# Move the exif _original back to the initial source file
shutil.move(exif_original_file, _file)
else:
compatability._copyfile(_file, dest_path)
# Set the utime based on what the original file contained
# before we made any changes.
# Then set the utime on the destination file based on metadata.
os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime))
self.set_utime_from_metadata(media.get_metadata(), dest_path)
db = Db()
db.add_hash(checksum, dest_path)
db.update_hash_db()

View File

@ -53,7 +53,6 @@ class Media(Base):
self.original_name_key = 'XMP:OriginalFileName'
self.set_gps_ref = True
self.exiftool_addedargs = [
'-overwrite_original',
u'-config',
u'"{}"'.format(constants.exiftool_config)
]

View File

@ -7,7 +7,7 @@ are tracked by Elodie.
from json import dumps, loads
import os
from shutil import copyfileobj
from shutil import copy2, copyfileobj
import time
# load modules
@ -176,6 +176,11 @@ class Text(Base):
metadata_line[name] = kwargs[name]
metadata_as_json = dumps(metadata_line)
# Create an _original copy just as we do with exiftool
# This is to keep all file processing logic in line with exiftool
copy2(source, source + '_original')
if has_metadata:
# Update the first line of this file in place
# http://stackoverflow.com/a/14947384

View File

@ -639,6 +639,7 @@ def test_process_file_plain():
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('plain.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -648,8 +649,10 @@ def test_process_file_plain():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo.jpg')) in destination, destination
def test_process_file_with_title():
@ -659,6 +662,7 @@ def test_process_file_with_title():
origin = '%s/photo.jpg' % folder
shutil.copyfile(helper.get_file('with-title.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -668,8 +672,10 @@ def test_process_file_with_title():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
def test_process_file_with_location():
@ -679,6 +685,7 @@ def test_process_file_with_location():
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-location.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -688,10 +695,35 @@ def test_process_file_with_location():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo.jpg')) in destination, destination
def test_process_file_validate_original_checksum():
filesystem = FileSystem()
temporary_folder, folder = helper.create_working_folder()
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('plain.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
origin_checksum = helper.checksum(origin)
destination_checksum = helper.checksum(destination)
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum_preprocess is not None, origin_checksum_preprocess
assert origin_checksum is not None, origin_checksum
assert destination_checksum is not None, destination_checksum
assert origin_checksum_preprocess == origin_checksum, (origin_checksum_preprocess, origin_checksum)
def test_process_file_with_location_and_title():
filesystem = FileSystem()
temporary_folder, folder = helper.create_working_folder()
@ -699,6 +731,7 @@ def test_process_file_with_location_and_title():
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-location-and-title.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -708,8 +741,10 @@ def test_process_file_with_location_and_title():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
def test_process_file_with_album():
@ -719,6 +754,7 @@ def test_process_file_with_album():
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-album.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -728,8 +764,10 @@ def test_process_file_with_album():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo.jpg')) in destination, destination
def test_process_file_with_album_and_title():
@ -739,6 +777,7 @@ def test_process_file_with_album_and_title():
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-album-and-title.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -748,8 +787,10 @@ def test_process_file_with_album_and_title():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
def test_process_file_with_album_and_title_and_location():
@ -759,6 +800,7 @@ def test_process_file_with_album_and_title_and_location():
origin = os.path.join(folder,'photo.jpg')
shutil.copyfile(helper.get_file('with-album-and-title-and-location.jpg'), origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Photo(origin)
destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True)
@ -768,8 +810,10 @@ def test_process_file_with_album_and_title_and_location():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum == destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination
# gh-89 (setting album then title reverts album)
@ -782,6 +826,7 @@ def test_process_video_with_album_then_title():
origin_checksum = helper.checksum(origin)
origin_checksum_preprocess = helper.checksum(origin)
media = Video(origin)
media.set_album('test_album')
media.set_title('test_title')
@ -792,8 +837,10 @@ def test_process_video_with_album_then_title():
shutil.rmtree(folder)
shutil.rmtree(os.path.dirname(os.path.dirname(destination)))
assert origin_checksum is not None, origin_checksum
assert origin_checksum != destination_checksum, destination_checksum
assert origin_checksum_preprocess is not None
assert origin_checksum is not None
assert destination_checksum is not None
assert origin_checksum_preprocess == origin_checksum
assert helper.path_tz_fix(os.path.join('2015-01-Jan','test_album','2015-01-19_12-45-11-movie-test_title.mov')) in destination, destination
@mock.patch('elodie.config.config_file', '%s/config.ini-fallback-folder' % gettempdir())