diff --git a/elodie/filesystem.py b/elodie/filesystem.py index 0a1d6ab..5048b78 100644 --- a/elodie/filesystem.py +++ b/elodie/filesystem.py @@ -472,7 +472,35 @@ class FileSystem(object): return folder_name + def process_checksum(self, _file, allow_duplicate): + db = Db() + checksum = db.checksum(_file) + if(checksum is None): + log.info('Could not get checksum for %s.' % _file) + return None + + # If duplicates are not allowed then we check if we've seen this file + # before via checksum. We also check that the file exists at the + # location we believe it to be. + # If we find a checksum match but the file doesn't exist where we + # believe it to be then we write a debug log and proceed to import. + checksum_file = db.get_hash(checksum) + if(allow_duplicate is False and checksum_file is not None): + if(os.path.isfile(checksum_file)): + log.info('%s already at %s.' % ( + _file, + checksum_file + )) + return None + else: + log.info('%s matched checksum but file not found at %s.' % ( # noqa + _file, + checksum_file + )) + return checksum + def process_file(self, _file, destination, media, **kwargs): + move = False if('move' in kwargs): move = kwargs['move'] @@ -481,10 +509,18 @@ class FileSystem(object): if('allowDuplicate' in kwargs): allow_duplicate = kwargs['allowDuplicate'] + stat_info_original = os.stat(_file) + if(not media.is_valid()): print('%s is not a valid media file. Skipping...' % _file) return + checksum = self.process_checksum(_file, allow_duplicate) + if(checksum is None): + log.info('Original checksum returned None for %s. Skipping...' % + _file) + return + media.set_original_name() metadata = media.get_metadata() @@ -494,31 +530,6 @@ class FileSystem(object): file_name = self.get_file_name(media) dest_path = os.path.join(dest_directory, file_name) - db = Db() - checksum = db.checksum(_file) - if(checksum is None): - log.info('Could not get checksum for %s. Skipping...' % _file) - return - - # If duplicates are not allowed then we check if we've seen this file - # before via checksum. We also check that the file exists at the - # location we believe it to be. - # If we find a checksum match but the file doesn't exist where we - # believe it to be then we write a debug log and proceed to import. - checksum_file = db.get_hash(checksum) - if(allow_duplicate is False and checksum_file is not None): - if(os.path.isfile(checksum_file)): - log.info('%s already exists at %s. Skipping...' % ( - _file, - checksum_file - )) - return - else: - log.info('%s matched checksum but file not found at %s. Importing again...' % ( # noqa - _file, - checksum_file - )) - # If source and destination are identical then # we should not write the file. gh-210 if(_file == dest_path): @@ -527,14 +538,43 @@ class FileSystem(object): self.create_directory(dest_directory) + # exiftool renames the original file by appending '_original' to the + # file name. A new file is written with new tags with the initial file + # name. See exiftool man page for more details. + exif_original_file = _file + '_original' + + # Check if the source file was processed by exiftool and an _original + # file was created. + exif_original_file_exists = False + if(os.path.exists(exif_original_file)): + exif_original_file_exists = True + if(move is True): stat = os.stat(_file) + # Move the processed file into the destination directory shutil.move(_file, dest_path) + + if(exif_original_file_exists is True): + # We can remove it as we don't need the initial file. + os.remove(exif_original_file) os.utime(dest_path, (stat.st_atime, stat.st_mtime)) else: - compatability._copyfile(_file, dest_path) + if(exif_original_file_exists is True): + # Move the newly processed file with any updated tags to the + # destination directory + shutil.move(_file, dest_path) + # Move the exif _original back to the initial source file + shutil.move(exif_original_file, _file) + else: + compatability._copyfile(_file, dest_path) + + # Set the utime based on what the original file contained + # before we made any changes. + # Then set the utime on the destination file based on metadata. + os.utime(_file, (stat_info_original.st_atime, stat_info_original.st_mtime)) self.set_utime_from_metadata(media.get_metadata(), dest_path) + db = Db() db.add_hash(checksum, dest_path) db.update_hash_db() diff --git a/elodie/media/media.py b/elodie/media/media.py index b0c36c6..bb07675 100644 --- a/elodie/media/media.py +++ b/elodie/media/media.py @@ -53,7 +53,6 @@ class Media(Base): self.original_name_key = 'XMP:OriginalFileName' self.set_gps_ref = True self.exiftool_addedargs = [ - '-overwrite_original', u'-config', u'"{}"'.format(constants.exiftool_config) ] diff --git a/elodie/media/text.py b/elodie/media/text.py index 60453b6..4e3c6bb 100644 --- a/elodie/media/text.py +++ b/elodie/media/text.py @@ -7,7 +7,7 @@ are tracked by Elodie. from json import dumps, loads import os -from shutil import copyfileobj +from shutil import copy2, copyfileobj import time # load modules @@ -176,6 +176,11 @@ class Text(Base): metadata_line[name] = kwargs[name] metadata_as_json = dumps(metadata_line) + + # Create an _original copy just as we do with exiftool + # This is to keep all file processing logic in line with exiftool + copy2(source, source + '_original') + if has_metadata: # Update the first line of this file in place # http://stackoverflow.com/a/14947384 diff --git a/elodie/tests/filesystem_test.py b/elodie/tests/filesystem_test.py index 902cb09..c9bc25b 100644 --- a/elodie/tests/filesystem_test.py +++ b/elodie/tests/filesystem_test.py @@ -639,6 +639,7 @@ def test_process_file_plain(): origin = os.path.join(folder,'photo.jpg') shutil.copyfile(helper.get_file('plain.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -648,8 +649,10 @@ def test_process_file_plain(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo.jpg')) in destination, destination def test_process_file_with_title(): @@ -659,6 +662,7 @@ def test_process_file_with_title(): origin = '%s/photo.jpg' % folder shutil.copyfile(helper.get_file('with-title.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -668,8 +672,10 @@ def test_process_file_with_title(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Unknown Location','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination def test_process_file_with_location(): @@ -679,6 +685,7 @@ def test_process_file_with_location(): origin = os.path.join(folder,'photo.jpg') shutil.copyfile(helper.get_file('with-location.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -688,10 +695,35 @@ def test_process_file_with_location(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo.jpg')) in destination, destination +def test_process_file_validate_original_checksum(): + filesystem = FileSystem() + temporary_folder, folder = helper.create_working_folder() + + origin = os.path.join(folder,'photo.jpg') + shutil.copyfile(helper.get_file('plain.jpg'), origin) + + origin_checksum_preprocess = helper.checksum(origin) + media = Photo(origin) + destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) + + origin_checksum = helper.checksum(origin) + destination_checksum = helper.checksum(destination) + + shutil.rmtree(folder) + shutil.rmtree(os.path.dirname(os.path.dirname(destination))) + + assert origin_checksum_preprocess is not None, origin_checksum_preprocess + assert origin_checksum is not None, origin_checksum + assert destination_checksum is not None, destination_checksum + assert origin_checksum_preprocess == origin_checksum, (origin_checksum_preprocess, origin_checksum) + + def test_process_file_with_location_and_title(): filesystem = FileSystem() temporary_folder, folder = helper.create_working_folder() @@ -699,6 +731,7 @@ def test_process_file_with_location_and_title(): origin = os.path.join(folder,'photo.jpg') shutil.copyfile(helper.get_file('with-location-and-title.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -708,8 +741,10 @@ def test_process_file_with_location_and_title(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Sunnyvale','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination def test_process_file_with_album(): @@ -719,6 +754,7 @@ def test_process_file_with_album(): origin = os.path.join(folder,'photo.jpg') shutil.copyfile(helper.get_file('with-album.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -728,8 +764,10 @@ def test_process_file_with_album(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo.jpg')) in destination, destination def test_process_file_with_album_and_title(): @@ -739,6 +777,7 @@ def test_process_file_with_album_and_title(): origin = os.path.join(folder,'photo.jpg') shutil.copyfile(helper.get_file('with-album-and-title.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -748,8 +787,10 @@ def test_process_file_with_album_and_title(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination def test_process_file_with_album_and_title_and_location(): @@ -759,6 +800,7 @@ def test_process_file_with_album_and_title_and_location(): origin = os.path.join(folder,'photo.jpg') shutil.copyfile(helper.get_file('with-album-and-title-and-location.jpg'), origin) + origin_checksum_preprocess = helper.checksum(origin) media = Photo(origin) destination = filesystem.process_file(origin, temporary_folder, media, allowDuplicate=True) @@ -768,8 +810,10 @@ def test_process_file_with_album_and_title_and_location(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum == destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-12-Dec','Test Album','2015-12-05_00-59-26-photo-some-title.jpg')) in destination, destination # gh-89 (setting album then title reverts album) @@ -782,6 +826,7 @@ def test_process_video_with_album_then_title(): origin_checksum = helper.checksum(origin) + origin_checksum_preprocess = helper.checksum(origin) media = Video(origin) media.set_album('test_album') media.set_title('test_title') @@ -792,8 +837,10 @@ def test_process_video_with_album_then_title(): shutil.rmtree(folder) shutil.rmtree(os.path.dirname(os.path.dirname(destination))) - assert origin_checksum is not None, origin_checksum - assert origin_checksum != destination_checksum, destination_checksum + assert origin_checksum_preprocess is not None + assert origin_checksum is not None + assert destination_checksum is not None + assert origin_checksum_preprocess == origin_checksum assert helper.path_tz_fix(os.path.join('2015-01-Jan','test_album','2015-01-19_12-45-11-movie-test_title.mov')) in destination, destination @mock.patch('elodie.config.config_file', '%s/config.ini-fallback-folder' % gettempdir())