Merge pull request #155 from jmathai/regenerate-db-gh-154

Add command to regenerate the hash database
This commit is contained in:
Jaisen Mathai 2016-12-13 00:18:59 -08:00 committed by GitHub
commit a82114818f
9 changed files with 181 additions and 43 deletions

View File

@ -52,7 +52,7 @@ You'll notice that the photo was organized into an *Unknown Location* folder. Th
### Usage Instructions
You can view these instructions on the command line by typing `./elodie.py import --help` or `./elodie.py update --help`.
You can view these instructions on the command line by typing `./elodie.py import --help`, `./elodie.py update --help` or `./elodie.py generate-db --help`.
```
Usage: elodie.py import [OPTIONS] [PATHS]...
@ -88,6 +88,17 @@ Options:
--help Show this message and exit.
```
```
Usage: elodie.py generate-db [OPTIONS]
Regenerate the hash.json database which contains all of the sha1
signatures of media files.
Options:
--source DIRECTORY Source of your photo library. [required]
--help Show this message and exit.
```
Now you're ready to learn more about Elodie.
<p align="center"><img src ="creative/logo@300x.png" /></p>

View File

@ -20,7 +20,7 @@ from elodie import geolocation
from elodie import log
from elodie.filesystem import FileSystem
from elodie.localstorage import Db
from elodie.media.base import Base
from elodie.media.base import Base, get_all_subclasses
from elodie.media.media import Media
from elodie.media.text import Text
from elodie.media.audio import Audio
@ -29,7 +29,6 @@ from elodie.media.video import Video
from elodie.result import Result
DB = Db()
FILESYSTEM = FileSystem()
RESULT = Result()
@ -107,6 +106,42 @@ def _import(destination, source, file, album_from_folder, trash, paths, allow_du
RESULT.write()
@click.command('generate-db')
@click.option('--source', type=click.Path(file_okay=False),
required=True, help='Source of your photo library.')
def _generate_db(source):
"""Regenerate the hash.json database which contains all of the sha1 signatures of media files.
"""
source = os.path.abspath(os.path.expanduser(source))
extensions = set()
all_files = set()
valid_files = set()
if not os.path.isdir(source):
log.error('Source is not a valid directory %s' % source)
sys.exit(1)
subclasses = get_all_subclasses(Base)
for cls in subclasses:
extensions.update(cls.extensions)
all_files.update(FILESYSTEM.get_all_files(source, None))
db = Db()
db.backup_hash_db()
db.reset_hash_db()
for current_file in all_files:
if os.path.splitext(current_file)[1][1:].lower() not in extensions:
log.info('Skipping invalid file %s' % current_file)
continue
db.add_hash(db.checksum(current_file), current_file)
db.update_hash_db()
def update_location(media, file_path, location_name):
"""Update location exif metadata of media.
"""
@ -237,6 +272,7 @@ def main():
main.add_command(_import)
main.add_command(_update)
main.add_command(_generate_db)
if __name__ == '__main__':

View File

@ -6,10 +6,13 @@ from builtins import object
import hashlib
import json
from math import radians, cos, sqrt
import os
import sys
from math import radians, cos, sqrt
from shutil import copyfile
from time import strftime
from elodie import constants
@ -66,6 +69,38 @@ class Db(object):
if(write is True):
self.update_hash_db()
# Location database
# Currently quite simple just a list of long/lat pairs with a name
# If it gets many entries a lookup might take too long and a better
# structure might be needed. Some speed up ideas:
# - Sort it and inter-half method can be used
# - Use integer part of long or lat as key to get a lower search list
# - Cache a small number of lookups, photos are likely to be taken in
# clusters around a spot during import.
def add_location(self, latitude, longitude, place, write=False):
"""Add a location to the database.
:param float latitude: Latitude of the location.
:param float longitude: Longitude of the location.
:param str place: Name for the location.
:param bool write: If true, write the location db to disk.
"""
data = {}
data['lat'] = latitude
data['long'] = longitude
data['name'] = place
self.location_db.append(data)
if(write is True):
self.update_location_db()
def backup_hash_db(self):
"""Backs up the hash db."""
if os.path.isfile(constants.hash_db):
mask = strftime('%Y-%m-%d_%H-%M-%S')
backup_file_name = '%s-%s' % (constants.hash_db, mask)
copyfile(constants.hash_db, backup_file_name)
return backup_file_name
def check_hash(self, key):
"""Check whether a hash is present for the given key.
@ -74,21 +109,6 @@ class Db(object):
"""
return key in self.hash_db
def get_hash(self, key):
"""Get the hash value for a given key.
:param str key:
:returns: str or None
"""
if(self.check_hash(key) is True):
return self.hash_db[key]
return None
def update_hash_db(self):
"""Write the hash db to disk."""
with open(constants.hash_db, 'w') as f:
json.dump(self.hash_db, f)
def checksum(self, file_path, blocksize=65536):
"""Create a hash value for the given file.
@ -109,30 +129,15 @@ class Db(object):
return hasher.hexdigest()
return None
# Location database
# Currently quite simple just a list of long/lat pairs with a name
# If it gets many entries a lookup might take too long and a better
# structure might be needed. Some speed up ideas:
# - Sort it and inter-half method can be used
# - Use integer part of long or lat as key to get a lower search list
# - Cache a small number of lookups, photos are likely to be taken in
# clusters around a spot during import.
def get_hash(self, key):
"""Get the hash value for a given key.
def add_location(self, latitude, longitude, place, write=False):
"""Add a location to the database.
:param float latitude: Latitude of the location.
:param float longitude: Longitude of the location.
:param str place: Name for the location.
:param bool write: If true, write the location db to disk.
:param str key:
:returns: str or None
"""
data = {}
data['lat'] = latitude
data['long'] = longitude
data['name'] = place
self.location_db.append(data)
if(write is True):
self.update_location_db()
if(self.check_hash(key) is True):
return self.hash_db[key]
return None
def get_location_name(self, latitude, longitude, threshold_m):
"""Find a name for a location in the database.
@ -178,6 +183,14 @@ class Db(object):
return None
def reset_hash_db(self):
self.hash_db = {}
def update_hash_db(self):
"""Write the hash db to disk."""
with open(constants.hash_db, 'w') as f:
json.dump(self.hash_db, f)
def update_location_db(self):
"""Write the location db to disk."""
with open(constants.location_db, 'w') as f:

View File

@ -187,6 +187,8 @@ class Base(object):
@classmethod
def get_class_by_file(cls, _file, classes):
"""Static method to get a media object by file.
"""
if not isinstance(_file, basestring) or not os.path.isfile(_file):
return None
@ -206,3 +208,21 @@ class Base(object):
:returns: tuple(str)
"""
return cls.extensions
def get_all_subclasses(cls=None):
"""Module method to get all subclasses of Base.
"""
subclasses = set()
this_class = Base
if cls is not None:
this_class = cls
subclasses.add(this_class)
this_class_subclasses = this_class.__subclasses__()
for child_class in this_class_subclasses:
subclasses.update(get_all_subclasses(child_class))
return subclasses

View File

@ -25,7 +25,7 @@ class Text(Base):
__name__ = 'Text'
#: Valid extensions for text files.
extensions = ('txt')
extensions = ('txt',)
def __init__(self, source=None):
super(Text, self).__init__(source)

View File

@ -4,7 +4,9 @@ import os
import sys
import shutil
from click.testing import CliRunner
from nose.plugins.skip import SkipTest
from nose.tools import assert_raises
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))))
@ -13,6 +15,7 @@ import helper
elodie = load_source('elodie', os.path.abspath('{}/../../elodie.py'.format(os.path.dirname(os.path.realpath(__file__)))))
from elodie import constants
from elodie.localstorage import Db
from elodie.media.audio import Audio
from elodie.media.photo import Photo
from elodie.media.text import Text
@ -334,6 +337,48 @@ def test_update_time_on_video():
assert metadata['date_taken'] != metadata_processed['date_taken']
assert metadata_processed['date_taken'] == helper.time_convert((2000, 1, 1, 12, 0, 0, 5, 1, 0)), metadata_processed['date_taken']
def test_regenerate_db_invalid_source():
runner = CliRunner()
result = runner.invoke(elodie._generate_db, ['--source', '/invalid/path'])
assert result.exit_code == 1, result.exit_code
def test_regenerate_valid_source():
temporary_folder, folder = helper.create_working_folder()
origin = '%s/valid.txt' % folder
shutil.copyfile(helper.get_file('valid.txt'), origin)
reset_hash_db()
runner = CliRunner()
result = runner.invoke(elodie._generate_db, ['--source', folder])
db = Db()
restore_hash_db()
shutil.rmtree(folder)
assert result.exit_code == 0, result.exit_code
assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
def test_regenerate_valid_source_with_invalid_files():
temporary_folder, folder = helper.create_working_folder()
origin_valid = '%s/valid.txt' % folder
shutil.copyfile(helper.get_file('valid.txt'), origin_valid)
origin_invalid = '%s/invalid.invalid' % folder
shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid)
reset_hash_db()
runner = CliRunner()
result = runner.invoke(elodie._generate_db, ['--source', folder])
db = Db()
restore_hash_db()
shutil.rmtree(folder)
assert result.exit_code == 0, result.exit_code
assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
def reset_hash_db():
hash_db = constants.hash_db
if os.path.isfile(hash_db):

View File

View File

@ -62,6 +62,14 @@ def test_add_hash_explicit_write():
# Instnatiate new db class to confirm random_key exists
db2 = Db()
assert db2.check_hash(random_key) == True
def test_backup_hash_db():
db = Db()
backup_file_name = db.backup_hash_db()
file_exists = os.path.isfile(backup_file_name)
os.remove(backup_file_name)
assert file_exists, backup_file_name
def test_check_hash_exists():
db = Db()

View File

@ -14,7 +14,7 @@ sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirna
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
import helper
from elodie.media.base import Base
from elodie.media.base import Base, get_all_subclasses
from elodie.media.media import Media
from elodie.media.audio import Audio
from elodie.media.text import Text
@ -106,3 +106,8 @@ def test_set_metadata_basename():
new_metadata = photo.get_metadata()
assert new_metadata['base_name'] == new_basename, new_metadata['base_name']
def test_get_all_subclasses():
subclasses = get_all_subclasses(Base)
expected = {Media, Base, Text, Photo, Video, Audio}
assert subclasses == expected, subclasses