diff --git a/Readme.md b/Readme.md index aeafa5b..6228512 100644 --- a/Readme.md +++ b/Readme.md @@ -52,7 +52,7 @@ You'll notice that the photo was organized into an *Unknown Location* folder. Th ### Usage Instructions -You can view these instructions on the command line by typing `./elodie.py import --help` or `./elodie.py update --help`. +You can view these instructions on the command line by typing `./elodie.py import --help`, `./elodie.py update --help` or `./elodie.py generate-db --help`. ``` Usage: elodie.py import [OPTIONS] [PATHS]... @@ -88,6 +88,17 @@ Options: --help Show this message and exit. ``` +``` +Usage: elodie.py generate-db [OPTIONS] + + Regenerate the hash.json database which contains all of the sha1 + signatures of media files. + +Options: + --source DIRECTORY Source of your photo library. [required] + --help Show this message and exit. +``` + Now you're ready to learn more about Elodie.

diff --git a/elodie.py b/elodie.py index baa9974..14e677a 100755 --- a/elodie.py +++ b/elodie.py @@ -20,7 +20,7 @@ from elodie import geolocation from elodie import log from elodie.filesystem import FileSystem from elodie.localstorage import Db -from elodie.media.base import Base +from elodie.media.base import Base, get_all_subclasses from elodie.media.media import Media from elodie.media.text import Text from elodie.media.audio import Audio @@ -29,7 +29,6 @@ from elodie.media.video import Video from elodie.result import Result -DB = Db() FILESYSTEM = FileSystem() RESULT = Result() @@ -107,6 +106,42 @@ def _import(destination, source, file, album_from_folder, trash, paths, allow_du RESULT.write() +@click.command('generate-db') +@click.option('--source', type=click.Path(file_okay=False), + required=True, help='Source of your photo library.') +def _generate_db(source): + """Regenerate the hash.json database which contains all of the sha1 signatures of media files. + """ + source = os.path.abspath(os.path.expanduser(source)) + + extensions = set() + all_files = set() + valid_files = set() + + if not os.path.isdir(source): + log.error('Source is not a valid directory %s' % source) + sys.exit(1) + + subclasses = get_all_subclasses(Base) + for cls in subclasses: + extensions.update(cls.extensions) + + all_files.update(FILESYSTEM.get_all_files(source, None)) + + db = Db() + db.backup_hash_db() + db.reset_hash_db() + + for current_file in all_files: + if os.path.splitext(current_file)[1][1:].lower() not in extensions: + log.info('Skipping invalid file %s' % current_file) + continue + + db.add_hash(db.checksum(current_file), current_file) + + db.update_hash_db() + + def update_location(media, file_path, location_name): """Update location exif metadata of media. """ @@ -237,6 +272,7 @@ def main(): main.add_command(_import) main.add_command(_update) +main.add_command(_generate_db) if __name__ == '__main__': diff --git a/elodie/localstorage.py b/elodie/localstorage.py index 3ce5f51..cd37a44 100644 --- a/elodie/localstorage.py +++ b/elodie/localstorage.py @@ -6,10 +6,13 @@ from builtins import object import hashlib import json -from math import radians, cos, sqrt import os import sys +from math import radians, cos, sqrt +from shutil import copyfile +from time import strftime + from elodie import constants @@ -66,6 +69,38 @@ class Db(object): if(write is True): self.update_hash_db() + # Location database + # Currently quite simple just a list of long/lat pairs with a name + # If it gets many entries a lookup might take too long and a better + # structure might be needed. Some speed up ideas: + # - Sort it and inter-half method can be used + # - Use integer part of long or lat as key to get a lower search list + # - Cache a small number of lookups, photos are likely to be taken in + # clusters around a spot during import. + def add_location(self, latitude, longitude, place, write=False): + """Add a location to the database. + + :param float latitude: Latitude of the location. + :param float longitude: Longitude of the location. + :param str place: Name for the location. + :param bool write: If true, write the location db to disk. + """ + data = {} + data['lat'] = latitude + data['long'] = longitude + data['name'] = place + self.location_db.append(data) + if(write is True): + self.update_location_db() + + def backup_hash_db(self): + """Backs up the hash db.""" + if os.path.isfile(constants.hash_db): + mask = strftime('%Y-%m-%d_%H-%M-%S') + backup_file_name = '%s-%s' % (constants.hash_db, mask) + copyfile(constants.hash_db, backup_file_name) + return backup_file_name + def check_hash(self, key): """Check whether a hash is present for the given key. @@ -74,21 +109,6 @@ class Db(object): """ return key in self.hash_db - def get_hash(self, key): - """Get the hash value for a given key. - - :param str key: - :returns: str or None - """ - if(self.check_hash(key) is True): - return self.hash_db[key] - return None - - def update_hash_db(self): - """Write the hash db to disk.""" - with open(constants.hash_db, 'w') as f: - json.dump(self.hash_db, f) - def checksum(self, file_path, blocksize=65536): """Create a hash value for the given file. @@ -109,30 +129,15 @@ class Db(object): return hasher.hexdigest() return None - # Location database - # Currently quite simple just a list of long/lat pairs with a name - # If it gets many entries a lookup might take too long and a better - # structure might be needed. Some speed up ideas: - # - Sort it and inter-half method can be used - # - Use integer part of long or lat as key to get a lower search list - # - Cache a small number of lookups, photos are likely to be taken in - # clusters around a spot during import. + def get_hash(self, key): + """Get the hash value for a given key. - def add_location(self, latitude, longitude, place, write=False): - """Add a location to the database. - - :param float latitude: Latitude of the location. - :param float longitude: Longitude of the location. - :param str place: Name for the location. - :param bool write: If true, write the location db to disk. + :param str key: + :returns: str or None """ - data = {} - data['lat'] = latitude - data['long'] = longitude - data['name'] = place - self.location_db.append(data) - if(write is True): - self.update_location_db() + if(self.check_hash(key) is True): + return self.hash_db[key] + return None def get_location_name(self, latitude, longitude, threshold_m): """Find a name for a location in the database. @@ -178,6 +183,14 @@ class Db(object): return None + def reset_hash_db(self): + self.hash_db = {} + + def update_hash_db(self): + """Write the hash db to disk.""" + with open(constants.hash_db, 'w') as f: + json.dump(self.hash_db, f) + def update_location_db(self): """Write the location db to disk.""" with open(constants.location_db, 'w') as f: diff --git a/elodie/media/base.py b/elodie/media/base.py index 2d1410b..d0dee94 100644 --- a/elodie/media/base.py +++ b/elodie/media/base.py @@ -187,6 +187,8 @@ class Base(object): @classmethod def get_class_by_file(cls, _file, classes): + """Static method to get a media object by file. + """ if not isinstance(_file, basestring) or not os.path.isfile(_file): return None @@ -206,3 +208,21 @@ class Base(object): :returns: tuple(str) """ return cls.extensions + + +def get_all_subclasses(cls=None): + """Module method to get all subclasses of Base. + """ + subclasses = set() + + this_class = Base + if cls is not None: + this_class = cls + + subclasses.add(this_class) + + this_class_subclasses = this_class.__subclasses__() + for child_class in this_class_subclasses: + subclasses.update(get_all_subclasses(child_class)) + + return subclasses diff --git a/elodie/media/text.py b/elodie/media/text.py index 559817e..c7e54c7 100644 --- a/elodie/media/text.py +++ b/elodie/media/text.py @@ -25,7 +25,7 @@ class Text(Base): __name__ = 'Text' #: Valid extensions for text files. - extensions = ('txt') + extensions = ('txt',) def __init__(self, source=None): super(Text, self).__init__(source) diff --git a/elodie/tests/elodie_test.py b/elodie/tests/elodie_test.py index 27635f1..a7620fb 100644 --- a/elodie/tests/elodie_test.py +++ b/elodie/tests/elodie_test.py @@ -4,7 +4,9 @@ import os import sys import shutil +from click.testing import CliRunner from nose.plugins.skip import SkipTest +from nose.tools import assert_raises sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))) sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))) @@ -13,6 +15,7 @@ import helper elodie = load_source('elodie', os.path.abspath('{}/../../elodie.py'.format(os.path.dirname(os.path.realpath(__file__))))) from elodie import constants +from elodie.localstorage import Db from elodie.media.audio import Audio from elodie.media.photo import Photo from elodie.media.text import Text @@ -334,6 +337,48 @@ def test_update_time_on_video(): assert metadata['date_taken'] != metadata_processed['date_taken'] assert metadata_processed['date_taken'] == helper.time_convert((2000, 1, 1, 12, 0, 0, 5, 1, 0)), metadata_processed['date_taken'] +def test_regenerate_db_invalid_source(): + runner = CliRunner() + result = runner.invoke(elodie._generate_db, ['--source', '/invalid/path']) + assert result.exit_code == 1, result.exit_code + +def test_regenerate_valid_source(): + temporary_folder, folder = helper.create_working_folder() + + origin = '%s/valid.txt' % folder + shutil.copyfile(helper.get_file('valid.txt'), origin) + + reset_hash_db() + runner = CliRunner() + result = runner.invoke(elodie._generate_db, ['--source', folder]) + db = Db() + restore_hash_db() + + shutil.rmtree(folder) + + assert result.exit_code == 0, result.exit_code + assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db + +def test_regenerate_valid_source_with_invalid_files(): + temporary_folder, folder = helper.create_working_folder() + + origin_valid = '%s/valid.txt' % folder + shutil.copyfile(helper.get_file('valid.txt'), origin_valid) + origin_invalid = '%s/invalid.invalid' % folder + shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid) + + reset_hash_db() + runner = CliRunner() + result = runner.invoke(elodie._generate_db, ['--source', folder]) + db = Db() + restore_hash_db() + + shutil.rmtree(folder) + + assert result.exit_code == 0, result.exit_code + assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db + assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db + def reset_hash_db(): hash_db = constants.hash_db if os.path.isfile(hash_db): diff --git a/elodie/tests/files/invalid.invalid b/elodie/tests/files/invalid.invalid new file mode 100644 index 0000000..e69de29 diff --git a/elodie/tests/localstorage_test.py b/elodie/tests/localstorage_test.py index fc58e11..abf609c 100644 --- a/elodie/tests/localstorage_test.py +++ b/elodie/tests/localstorage_test.py @@ -62,6 +62,14 @@ def test_add_hash_explicit_write(): # Instnatiate new db class to confirm random_key exists db2 = Db() assert db2.check_hash(random_key) == True + +def test_backup_hash_db(): + db = Db() + backup_file_name = db.backup_hash_db() + file_exists = os.path.isfile(backup_file_name) + os.remove(backup_file_name) + + assert file_exists, backup_file_name def test_check_hash_exists(): db = Db() diff --git a/elodie/tests/media/base_test.py b/elodie/tests/media/base_test.py index 365fbb0..f9abe98 100644 --- a/elodie/tests/media/base_test.py +++ b/elodie/tests/media/base_test.py @@ -14,7 +14,7 @@ sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirna sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))) import helper -from elodie.media.base import Base +from elodie.media.base import Base, get_all_subclasses from elodie.media.media import Media from elodie.media.audio import Audio from elodie.media.text import Text @@ -106,3 +106,8 @@ def test_set_metadata_basename(): new_metadata = photo.get_metadata() assert new_metadata['base_name'] == new_basename, new_metadata['base_name'] + +def test_get_all_subclasses(): + subclasses = get_all_subclasses(Base) + expected = {Media, Base, Text, Photo, Video, Audio} + assert subclasses == expected, subclasses