gh-154 Add command to regenerate the hash database
This commit is contained in:
parent
9950e8099a
commit
bf24f9689a
13
Readme.md
13
Readme.md
|
@ -52,7 +52,7 @@ You'll notice that the photo was organized into an *Unknown Location* folder. Th
|
|||
|
||||
### Usage Instructions
|
||||
|
||||
You can view these instructions on the command line by typing `./elodie.py import --help` or `./elodie.py update --help`.
|
||||
You can view these instructions on the command line by typing `./elodie.py import --help`, `./elodie.py update --help` or `./elodie.py generate-db --help`.
|
||||
|
||||
```
|
||||
Usage: elodie.py import [OPTIONS] [PATHS]...
|
||||
|
@ -88,6 +88,17 @@ Options:
|
|||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
```
|
||||
Usage: elodie.py generate-db [OPTIONS]
|
||||
|
||||
Regenerate the hash.json database which contains all of the sha1
|
||||
signatures of media files.
|
||||
|
||||
Options:
|
||||
--source DIRECTORY Source of your photo library. [required]
|
||||
--help Show this message and exit.
|
||||
```
|
||||
|
||||
Now you're ready to learn more about Elodie.
|
||||
|
||||
<p align="center"><img src ="creative/logo@300x.png" /></p>
|
||||
|
|
40
elodie.py
40
elodie.py
|
@ -20,7 +20,7 @@ from elodie import geolocation
|
|||
from elodie import log
|
||||
from elodie.filesystem import FileSystem
|
||||
from elodie.localstorage import Db
|
||||
from elodie.media.base import Base
|
||||
from elodie.media.base import Base, get_all_subclasses
|
||||
from elodie.media.media import Media
|
||||
from elodie.media.text import Text
|
||||
from elodie.media.audio import Audio
|
||||
|
@ -29,7 +29,6 @@ from elodie.media.video import Video
|
|||
from elodie.result import Result
|
||||
|
||||
|
||||
DB = Db()
|
||||
FILESYSTEM = FileSystem()
|
||||
RESULT = Result()
|
||||
|
||||
|
@ -107,6 +106,42 @@ def _import(destination, source, file, album_from_folder, trash, paths, allow_du
|
|||
RESULT.write()
|
||||
|
||||
|
||||
@click.command('generate-db')
|
||||
@click.option('--source', type=click.Path(file_okay=False),
|
||||
required=True, help='Source of your photo library.')
|
||||
def _generate_db(source):
|
||||
"""Regenerate the hash.json database which contains all of the sha1 signatures of media files.
|
||||
"""
|
||||
source = os.path.abspath(os.path.expanduser(source))
|
||||
|
||||
extensions = set()
|
||||
all_files = set()
|
||||
valid_files = set()
|
||||
|
||||
if not os.path.isdir(source):
|
||||
log.error('Source is not a valid directory %s' % source)
|
||||
sys.exit(1)
|
||||
|
||||
subclasses = get_all_subclasses(Base)
|
||||
for cls in subclasses:
|
||||
extensions.update(cls.extensions)
|
||||
|
||||
all_files.update(FILESYSTEM.get_all_files(source, None))
|
||||
|
||||
db = Db()
|
||||
db.backup_hash_db()
|
||||
db.reset_hash_db()
|
||||
|
||||
for current_file in all_files:
|
||||
if os.path.splitext(current_file)[1][1:].lower() not in extensions:
|
||||
log.info('Skipping invalid file %s' % current_file)
|
||||
continue
|
||||
|
||||
db.add_hash(db.checksum(current_file), current_file)
|
||||
|
||||
db.update_hash_db()
|
||||
|
||||
|
||||
def update_location(media, file_path, location_name):
|
||||
"""Update location exif metadata of media.
|
||||
"""
|
||||
|
@ -237,6 +272,7 @@ def main():
|
|||
|
||||
main.add_command(_import)
|
||||
main.add_command(_update)
|
||||
main.add_command(_generate_db)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -6,10 +6,13 @@ from builtins import object
|
|||
|
||||
import hashlib
|
||||
import json
|
||||
from math import radians, cos, sqrt
|
||||
import os
|
||||
import sys
|
||||
|
||||
from math import radians, cos, sqrt
|
||||
from shutil import copyfile
|
||||
from time import strftime
|
||||
|
||||
from elodie import constants
|
||||
|
||||
|
||||
|
@ -66,6 +69,38 @@ class Db(object):
|
|||
if(write is True):
|
||||
self.update_hash_db()
|
||||
|
||||
# Location database
|
||||
# Currently quite simple just a list of long/lat pairs with a name
|
||||
# If it gets many entries a lookup might take too long and a better
|
||||
# structure might be needed. Some speed up ideas:
|
||||
# - Sort it and inter-half method can be used
|
||||
# - Use integer part of long or lat as key to get a lower search list
|
||||
# - Cache a small number of lookups, photos are likely to be taken in
|
||||
# clusters around a spot during import.
|
||||
def add_location(self, latitude, longitude, place, write=False):
|
||||
"""Add a location to the database.
|
||||
|
||||
:param float latitude: Latitude of the location.
|
||||
:param float longitude: Longitude of the location.
|
||||
:param str place: Name for the location.
|
||||
:param bool write: If true, write the location db to disk.
|
||||
"""
|
||||
data = {}
|
||||
data['lat'] = latitude
|
||||
data['long'] = longitude
|
||||
data['name'] = place
|
||||
self.location_db.append(data)
|
||||
if(write is True):
|
||||
self.update_location_db()
|
||||
|
||||
def backup_hash_db(self):
|
||||
"""Backs up the hash db."""
|
||||
if os.path.isfile(constants.hash_db):
|
||||
mask = strftime('%Y-%m-%d_%H-%M-%S')
|
||||
backup_file_name = '%s-%s' % (constants.hash_db, mask)
|
||||
copyfile(constants.hash_db, backup_file_name)
|
||||
return backup_file_name
|
||||
|
||||
def check_hash(self, key):
|
||||
"""Check whether a hash is present for the given key.
|
||||
|
||||
|
@ -74,21 +109,6 @@ class Db(object):
|
|||
"""
|
||||
return key in self.hash_db
|
||||
|
||||
def get_hash(self, key):
|
||||
"""Get the hash value for a given key.
|
||||
|
||||
:param str key:
|
||||
:returns: str or None
|
||||
"""
|
||||
if(self.check_hash(key) is True):
|
||||
return self.hash_db[key]
|
||||
return None
|
||||
|
||||
def update_hash_db(self):
|
||||
"""Write the hash db to disk."""
|
||||
with open(constants.hash_db, 'w') as f:
|
||||
json.dump(self.hash_db, f)
|
||||
|
||||
def checksum(self, file_path, blocksize=65536):
|
||||
"""Create a hash value for the given file.
|
||||
|
||||
|
@ -109,30 +129,15 @@ class Db(object):
|
|||
return hasher.hexdigest()
|
||||
return None
|
||||
|
||||
# Location database
|
||||
# Currently quite simple just a list of long/lat pairs with a name
|
||||
# If it gets many entries a lookup might take too long and a better
|
||||
# structure might be needed. Some speed up ideas:
|
||||
# - Sort it and inter-half method can be used
|
||||
# - Use integer part of long or lat as key to get a lower search list
|
||||
# - Cache a small number of lookups, photos are likely to be taken in
|
||||
# clusters around a spot during import.
|
||||
def get_hash(self, key):
|
||||
"""Get the hash value for a given key.
|
||||
|
||||
def add_location(self, latitude, longitude, place, write=False):
|
||||
"""Add a location to the database.
|
||||
|
||||
:param float latitude: Latitude of the location.
|
||||
:param float longitude: Longitude of the location.
|
||||
:param str place: Name for the location.
|
||||
:param bool write: If true, write the location db to disk.
|
||||
:param str key:
|
||||
:returns: str or None
|
||||
"""
|
||||
data = {}
|
||||
data['lat'] = latitude
|
||||
data['long'] = longitude
|
||||
data['name'] = place
|
||||
self.location_db.append(data)
|
||||
if(write is True):
|
||||
self.update_location_db()
|
||||
if(self.check_hash(key) is True):
|
||||
return self.hash_db[key]
|
||||
return None
|
||||
|
||||
def get_location_name(self, latitude, longitude, threshold_m):
|
||||
"""Find a name for a location in the database.
|
||||
|
@ -178,6 +183,14 @@ class Db(object):
|
|||
|
||||
return None
|
||||
|
||||
def reset_hash_db(self):
|
||||
self.hash_db = {}
|
||||
|
||||
def update_hash_db(self):
|
||||
"""Write the hash db to disk."""
|
||||
with open(constants.hash_db, 'w') as f:
|
||||
json.dump(self.hash_db, f)
|
||||
|
||||
def update_location_db(self):
|
||||
"""Write the location db to disk."""
|
||||
with open(constants.location_db, 'w') as f:
|
||||
|
|
|
@ -187,6 +187,8 @@ class Base(object):
|
|||
|
||||
@classmethod
|
||||
def get_class_by_file(cls, _file, classes):
|
||||
"""Static method to get a media object by file.
|
||||
"""
|
||||
if not isinstance(_file, basestring) or not os.path.isfile(_file):
|
||||
return None
|
||||
|
||||
|
@ -206,3 +208,21 @@ class Base(object):
|
|||
:returns: tuple(str)
|
||||
"""
|
||||
return cls.extensions
|
||||
|
||||
|
||||
def get_all_subclasses(cls=None):
|
||||
"""Module method to get all subclasses of Base.
|
||||
"""
|
||||
subclasses = set()
|
||||
|
||||
this_class = Base
|
||||
if cls is not None:
|
||||
this_class = cls
|
||||
|
||||
subclasses.add(this_class)
|
||||
|
||||
this_class_subclasses = this_class.__subclasses__()
|
||||
for child_class in this_class_subclasses:
|
||||
subclasses.update(get_all_subclasses(child_class))
|
||||
|
||||
return subclasses
|
||||
|
|
|
@ -25,7 +25,7 @@ class Text(Base):
|
|||
__name__ = 'Text'
|
||||
|
||||
#: Valid extensions for text files.
|
||||
extensions = ('txt')
|
||||
extensions = ('txt',)
|
||||
|
||||
def __init__(self, source=None):
|
||||
super(Text, self).__init__(source)
|
||||
|
|
|
@ -4,7 +4,9 @@ import os
|
|||
import sys
|
||||
import shutil
|
||||
|
||||
from click.testing import CliRunner
|
||||
from nose.plugins.skip import SkipTest
|
||||
from nose.tools import assert_raises
|
||||
|
||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))
|
||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))))
|
||||
|
@ -13,6 +15,7 @@ import helper
|
|||
elodie = load_source('elodie', os.path.abspath('{}/../../elodie.py'.format(os.path.dirname(os.path.realpath(__file__)))))
|
||||
|
||||
from elodie import constants
|
||||
from elodie.localstorage import Db
|
||||
from elodie.media.audio import Audio
|
||||
from elodie.media.photo import Photo
|
||||
from elodie.media.text import Text
|
||||
|
@ -334,6 +337,48 @@ def test_update_time_on_video():
|
|||
assert metadata['date_taken'] != metadata_processed['date_taken']
|
||||
assert metadata_processed['date_taken'] == helper.time_convert((2000, 1, 1, 12, 0, 0, 5, 1, 0)), metadata_processed['date_taken']
|
||||
|
||||
def test_regenerate_db_invalid_source():
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(elodie._generate_db, ['--source', '/invalid/path'])
|
||||
assert result.exit_code == 1, result.exit_code
|
||||
|
||||
def test_regenerate_valid_source():
|
||||
temporary_folder, folder = helper.create_working_folder()
|
||||
|
||||
origin = '%s/valid.txt' % folder
|
||||
shutil.copyfile(helper.get_file('valid.txt'), origin)
|
||||
|
||||
reset_hash_db()
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(elodie._generate_db, ['--source', folder])
|
||||
db = Db()
|
||||
restore_hash_db()
|
||||
|
||||
shutil.rmtree(folder)
|
||||
|
||||
assert result.exit_code == 0, result.exit_code
|
||||
assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
|
||||
|
||||
def test_regenerate_valid_source_with_invalid_files():
|
||||
temporary_folder, folder = helper.create_working_folder()
|
||||
|
||||
origin_valid = '%s/valid.txt' % folder
|
||||
shutil.copyfile(helper.get_file('valid.txt'), origin_valid)
|
||||
origin_invalid = '%s/invalid.invalid' % folder
|
||||
shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid)
|
||||
|
||||
reset_hash_db()
|
||||
runner = CliRunner()
|
||||
result = runner.invoke(elodie._generate_db, ['--source', folder])
|
||||
db = Db()
|
||||
restore_hash_db()
|
||||
|
||||
shutil.rmtree(folder)
|
||||
|
||||
assert result.exit_code == 0, result.exit_code
|
||||
assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
|
||||
assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
|
||||
|
||||
def reset_hash_db():
|
||||
hash_db = constants.hash_db
|
||||
if os.path.isfile(hash_db):
|
||||
|
|
|
@ -63,6 +63,14 @@ def test_add_hash_explicit_write():
|
|||
db2 = Db()
|
||||
assert db2.check_hash(random_key) == True
|
||||
|
||||
def test_backup_hash_db():
|
||||
db = Db()
|
||||
backup_file_name = db.backup_hash_db()
|
||||
file_exists = os.path.isfile(backup_file_name)
|
||||
os.remove(backup_file_name)
|
||||
|
||||
assert file_exists, backup_file_name
|
||||
|
||||
def test_check_hash_exists():
|
||||
db = Db()
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirna
|
|||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
|
||||
|
||||
import helper
|
||||
from elodie.media.base import Base
|
||||
from elodie.media.base import Base, get_all_subclasses
|
||||
from elodie.media.media import Media
|
||||
from elodie.media.audio import Audio
|
||||
from elodie.media.text import Text
|
||||
|
@ -106,3 +106,8 @@ def test_set_metadata_basename():
|
|||
new_metadata = photo.get_metadata()
|
||||
|
||||
assert new_metadata['base_name'] == new_basename, new_metadata['base_name']
|
||||
|
||||
def test_get_all_subclasses():
|
||||
subclasses = get_all_subclasses(Base)
|
||||
expected = {Media, Base, Text, Photo, Video, Audio}
|
||||
assert subclasses == expected, subclasses
|
||||
|
|
Loading…
Reference in New Issue