Merge pull request #155 from jmathai/regenerate-db-gh-154
Add command to regenerate the hash database
This commit is contained in:
commit
a82114818f
13
Readme.md
13
Readme.md
|
@ -52,7 +52,7 @@ You'll notice that the photo was organized into an *Unknown Location* folder. Th
|
||||||
|
|
||||||
### Usage Instructions
|
### Usage Instructions
|
||||||
|
|
||||||
You can view these instructions on the command line by typing `./elodie.py import --help` or `./elodie.py update --help`.
|
You can view these instructions on the command line by typing `./elodie.py import --help`, `./elodie.py update --help` or `./elodie.py generate-db --help`.
|
||||||
|
|
||||||
```
|
```
|
||||||
Usage: elodie.py import [OPTIONS] [PATHS]...
|
Usage: elodie.py import [OPTIONS] [PATHS]...
|
||||||
|
@ -88,6 +88,17 @@ Options:
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
Usage: elodie.py generate-db [OPTIONS]
|
||||||
|
|
||||||
|
Regenerate the hash.json database which contains all of the sha1
|
||||||
|
signatures of media files.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--source DIRECTORY Source of your photo library. [required]
|
||||||
|
--help Show this message and exit.
|
||||||
|
```
|
||||||
|
|
||||||
Now you're ready to learn more about Elodie.
|
Now you're ready to learn more about Elodie.
|
||||||
|
|
||||||
<p align="center"><img src ="creative/logo@300x.png" /></p>
|
<p align="center"><img src ="creative/logo@300x.png" /></p>
|
||||||
|
|
40
elodie.py
40
elodie.py
|
@ -20,7 +20,7 @@ from elodie import geolocation
|
||||||
from elodie import log
|
from elodie import log
|
||||||
from elodie.filesystem import FileSystem
|
from elodie.filesystem import FileSystem
|
||||||
from elodie.localstorage import Db
|
from elodie.localstorage import Db
|
||||||
from elodie.media.base import Base
|
from elodie.media.base import Base, get_all_subclasses
|
||||||
from elodie.media.media import Media
|
from elodie.media.media import Media
|
||||||
from elodie.media.text import Text
|
from elodie.media.text import Text
|
||||||
from elodie.media.audio import Audio
|
from elodie.media.audio import Audio
|
||||||
|
@ -29,7 +29,6 @@ from elodie.media.video import Video
|
||||||
from elodie.result import Result
|
from elodie.result import Result
|
||||||
|
|
||||||
|
|
||||||
DB = Db()
|
|
||||||
FILESYSTEM = FileSystem()
|
FILESYSTEM = FileSystem()
|
||||||
RESULT = Result()
|
RESULT = Result()
|
||||||
|
|
||||||
|
@ -107,6 +106,42 @@ def _import(destination, source, file, album_from_folder, trash, paths, allow_du
|
||||||
RESULT.write()
|
RESULT.write()
|
||||||
|
|
||||||
|
|
||||||
|
@click.command('generate-db')
|
||||||
|
@click.option('--source', type=click.Path(file_okay=False),
|
||||||
|
required=True, help='Source of your photo library.')
|
||||||
|
def _generate_db(source):
|
||||||
|
"""Regenerate the hash.json database which contains all of the sha1 signatures of media files.
|
||||||
|
"""
|
||||||
|
source = os.path.abspath(os.path.expanduser(source))
|
||||||
|
|
||||||
|
extensions = set()
|
||||||
|
all_files = set()
|
||||||
|
valid_files = set()
|
||||||
|
|
||||||
|
if not os.path.isdir(source):
|
||||||
|
log.error('Source is not a valid directory %s' % source)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
subclasses = get_all_subclasses(Base)
|
||||||
|
for cls in subclasses:
|
||||||
|
extensions.update(cls.extensions)
|
||||||
|
|
||||||
|
all_files.update(FILESYSTEM.get_all_files(source, None))
|
||||||
|
|
||||||
|
db = Db()
|
||||||
|
db.backup_hash_db()
|
||||||
|
db.reset_hash_db()
|
||||||
|
|
||||||
|
for current_file in all_files:
|
||||||
|
if os.path.splitext(current_file)[1][1:].lower() not in extensions:
|
||||||
|
log.info('Skipping invalid file %s' % current_file)
|
||||||
|
continue
|
||||||
|
|
||||||
|
db.add_hash(db.checksum(current_file), current_file)
|
||||||
|
|
||||||
|
db.update_hash_db()
|
||||||
|
|
||||||
|
|
||||||
def update_location(media, file_path, location_name):
|
def update_location(media, file_path, location_name):
|
||||||
"""Update location exif metadata of media.
|
"""Update location exif metadata of media.
|
||||||
"""
|
"""
|
||||||
|
@ -237,6 +272,7 @@ def main():
|
||||||
|
|
||||||
main.add_command(_import)
|
main.add_command(_import)
|
||||||
main.add_command(_update)
|
main.add_command(_update)
|
||||||
|
main.add_command(_generate_db)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -6,10 +6,13 @@ from builtins import object
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
import json
|
import json
|
||||||
from math import radians, cos, sqrt
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
from math import radians, cos, sqrt
|
||||||
|
from shutil import copyfile
|
||||||
|
from time import strftime
|
||||||
|
|
||||||
from elodie import constants
|
from elodie import constants
|
||||||
|
|
||||||
|
|
||||||
|
@ -66,6 +69,38 @@ class Db(object):
|
||||||
if(write is True):
|
if(write is True):
|
||||||
self.update_hash_db()
|
self.update_hash_db()
|
||||||
|
|
||||||
|
# Location database
|
||||||
|
# Currently quite simple just a list of long/lat pairs with a name
|
||||||
|
# If it gets many entries a lookup might take too long and a better
|
||||||
|
# structure might be needed. Some speed up ideas:
|
||||||
|
# - Sort it and inter-half method can be used
|
||||||
|
# - Use integer part of long or lat as key to get a lower search list
|
||||||
|
# - Cache a small number of lookups, photos are likely to be taken in
|
||||||
|
# clusters around a spot during import.
|
||||||
|
def add_location(self, latitude, longitude, place, write=False):
|
||||||
|
"""Add a location to the database.
|
||||||
|
|
||||||
|
:param float latitude: Latitude of the location.
|
||||||
|
:param float longitude: Longitude of the location.
|
||||||
|
:param str place: Name for the location.
|
||||||
|
:param bool write: If true, write the location db to disk.
|
||||||
|
"""
|
||||||
|
data = {}
|
||||||
|
data['lat'] = latitude
|
||||||
|
data['long'] = longitude
|
||||||
|
data['name'] = place
|
||||||
|
self.location_db.append(data)
|
||||||
|
if(write is True):
|
||||||
|
self.update_location_db()
|
||||||
|
|
||||||
|
def backup_hash_db(self):
|
||||||
|
"""Backs up the hash db."""
|
||||||
|
if os.path.isfile(constants.hash_db):
|
||||||
|
mask = strftime('%Y-%m-%d_%H-%M-%S')
|
||||||
|
backup_file_name = '%s-%s' % (constants.hash_db, mask)
|
||||||
|
copyfile(constants.hash_db, backup_file_name)
|
||||||
|
return backup_file_name
|
||||||
|
|
||||||
def check_hash(self, key):
|
def check_hash(self, key):
|
||||||
"""Check whether a hash is present for the given key.
|
"""Check whether a hash is present for the given key.
|
||||||
|
|
||||||
|
@ -74,21 +109,6 @@ class Db(object):
|
||||||
"""
|
"""
|
||||||
return key in self.hash_db
|
return key in self.hash_db
|
||||||
|
|
||||||
def get_hash(self, key):
|
|
||||||
"""Get the hash value for a given key.
|
|
||||||
|
|
||||||
:param str key:
|
|
||||||
:returns: str or None
|
|
||||||
"""
|
|
||||||
if(self.check_hash(key) is True):
|
|
||||||
return self.hash_db[key]
|
|
||||||
return None
|
|
||||||
|
|
||||||
def update_hash_db(self):
|
|
||||||
"""Write the hash db to disk."""
|
|
||||||
with open(constants.hash_db, 'w') as f:
|
|
||||||
json.dump(self.hash_db, f)
|
|
||||||
|
|
||||||
def checksum(self, file_path, blocksize=65536):
|
def checksum(self, file_path, blocksize=65536):
|
||||||
"""Create a hash value for the given file.
|
"""Create a hash value for the given file.
|
||||||
|
|
||||||
|
@ -109,30 +129,15 @@ class Db(object):
|
||||||
return hasher.hexdigest()
|
return hasher.hexdigest()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Location database
|
def get_hash(self, key):
|
||||||
# Currently quite simple just a list of long/lat pairs with a name
|
"""Get the hash value for a given key.
|
||||||
# If it gets many entries a lookup might take too long and a better
|
|
||||||
# structure might be needed. Some speed up ideas:
|
|
||||||
# - Sort it and inter-half method can be used
|
|
||||||
# - Use integer part of long or lat as key to get a lower search list
|
|
||||||
# - Cache a small number of lookups, photos are likely to be taken in
|
|
||||||
# clusters around a spot during import.
|
|
||||||
|
|
||||||
def add_location(self, latitude, longitude, place, write=False):
|
:param str key:
|
||||||
"""Add a location to the database.
|
:returns: str or None
|
||||||
|
|
||||||
:param float latitude: Latitude of the location.
|
|
||||||
:param float longitude: Longitude of the location.
|
|
||||||
:param str place: Name for the location.
|
|
||||||
:param bool write: If true, write the location db to disk.
|
|
||||||
"""
|
"""
|
||||||
data = {}
|
if(self.check_hash(key) is True):
|
||||||
data['lat'] = latitude
|
return self.hash_db[key]
|
||||||
data['long'] = longitude
|
return None
|
||||||
data['name'] = place
|
|
||||||
self.location_db.append(data)
|
|
||||||
if(write is True):
|
|
||||||
self.update_location_db()
|
|
||||||
|
|
||||||
def get_location_name(self, latitude, longitude, threshold_m):
|
def get_location_name(self, latitude, longitude, threshold_m):
|
||||||
"""Find a name for a location in the database.
|
"""Find a name for a location in the database.
|
||||||
|
@ -178,6 +183,14 @@ class Db(object):
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def reset_hash_db(self):
|
||||||
|
self.hash_db = {}
|
||||||
|
|
||||||
|
def update_hash_db(self):
|
||||||
|
"""Write the hash db to disk."""
|
||||||
|
with open(constants.hash_db, 'w') as f:
|
||||||
|
json.dump(self.hash_db, f)
|
||||||
|
|
||||||
def update_location_db(self):
|
def update_location_db(self):
|
||||||
"""Write the location db to disk."""
|
"""Write the location db to disk."""
|
||||||
with open(constants.location_db, 'w') as f:
|
with open(constants.location_db, 'w') as f:
|
||||||
|
|
|
@ -187,6 +187,8 @@ class Base(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_class_by_file(cls, _file, classes):
|
def get_class_by_file(cls, _file, classes):
|
||||||
|
"""Static method to get a media object by file.
|
||||||
|
"""
|
||||||
if not isinstance(_file, basestring) or not os.path.isfile(_file):
|
if not isinstance(_file, basestring) or not os.path.isfile(_file):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -206,3 +208,21 @@ class Base(object):
|
||||||
:returns: tuple(str)
|
:returns: tuple(str)
|
||||||
"""
|
"""
|
||||||
return cls.extensions
|
return cls.extensions
|
||||||
|
|
||||||
|
|
||||||
|
def get_all_subclasses(cls=None):
|
||||||
|
"""Module method to get all subclasses of Base.
|
||||||
|
"""
|
||||||
|
subclasses = set()
|
||||||
|
|
||||||
|
this_class = Base
|
||||||
|
if cls is not None:
|
||||||
|
this_class = cls
|
||||||
|
|
||||||
|
subclasses.add(this_class)
|
||||||
|
|
||||||
|
this_class_subclasses = this_class.__subclasses__()
|
||||||
|
for child_class in this_class_subclasses:
|
||||||
|
subclasses.update(get_all_subclasses(child_class))
|
||||||
|
|
||||||
|
return subclasses
|
||||||
|
|
|
@ -25,7 +25,7 @@ class Text(Base):
|
||||||
__name__ = 'Text'
|
__name__ = 'Text'
|
||||||
|
|
||||||
#: Valid extensions for text files.
|
#: Valid extensions for text files.
|
||||||
extensions = ('txt')
|
extensions = ('txt',)
|
||||||
|
|
||||||
def __init__(self, source=None):
|
def __init__(self, source=None):
|
||||||
super(Text, self).__init__(source)
|
super(Text, self).__init__(source)
|
||||||
|
|
|
@ -4,7 +4,9 @@ import os
|
||||||
import sys
|
import sys
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
|
from click.testing import CliRunner
|
||||||
from nose.plugins.skip import SkipTest
|
from nose.plugins.skip import SkipTest
|
||||||
|
from nose.tools import assert_raises
|
||||||
|
|
||||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))
|
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))
|
||||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))))
|
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))))
|
||||||
|
@ -13,6 +15,7 @@ import helper
|
||||||
elodie = load_source('elodie', os.path.abspath('{}/../../elodie.py'.format(os.path.dirname(os.path.realpath(__file__)))))
|
elodie = load_source('elodie', os.path.abspath('{}/../../elodie.py'.format(os.path.dirname(os.path.realpath(__file__)))))
|
||||||
|
|
||||||
from elodie import constants
|
from elodie import constants
|
||||||
|
from elodie.localstorage import Db
|
||||||
from elodie.media.audio import Audio
|
from elodie.media.audio import Audio
|
||||||
from elodie.media.photo import Photo
|
from elodie.media.photo import Photo
|
||||||
from elodie.media.text import Text
|
from elodie.media.text import Text
|
||||||
|
@ -334,6 +337,48 @@ def test_update_time_on_video():
|
||||||
assert metadata['date_taken'] != metadata_processed['date_taken']
|
assert metadata['date_taken'] != metadata_processed['date_taken']
|
||||||
assert metadata_processed['date_taken'] == helper.time_convert((2000, 1, 1, 12, 0, 0, 5, 1, 0)), metadata_processed['date_taken']
|
assert metadata_processed['date_taken'] == helper.time_convert((2000, 1, 1, 12, 0, 0, 5, 1, 0)), metadata_processed['date_taken']
|
||||||
|
|
||||||
|
def test_regenerate_db_invalid_source():
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(elodie._generate_db, ['--source', '/invalid/path'])
|
||||||
|
assert result.exit_code == 1, result.exit_code
|
||||||
|
|
||||||
|
def test_regenerate_valid_source():
|
||||||
|
temporary_folder, folder = helper.create_working_folder()
|
||||||
|
|
||||||
|
origin = '%s/valid.txt' % folder
|
||||||
|
shutil.copyfile(helper.get_file('valid.txt'), origin)
|
||||||
|
|
||||||
|
reset_hash_db()
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(elodie._generate_db, ['--source', folder])
|
||||||
|
db = Db()
|
||||||
|
restore_hash_db()
|
||||||
|
|
||||||
|
shutil.rmtree(folder)
|
||||||
|
|
||||||
|
assert result.exit_code == 0, result.exit_code
|
||||||
|
assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
|
||||||
|
|
||||||
|
def test_regenerate_valid_source_with_invalid_files():
|
||||||
|
temporary_folder, folder = helper.create_working_folder()
|
||||||
|
|
||||||
|
origin_valid = '%s/valid.txt' % folder
|
||||||
|
shutil.copyfile(helper.get_file('valid.txt'), origin_valid)
|
||||||
|
origin_invalid = '%s/invalid.invalid' % folder
|
||||||
|
shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid)
|
||||||
|
|
||||||
|
reset_hash_db()
|
||||||
|
runner = CliRunner()
|
||||||
|
result = runner.invoke(elodie._generate_db, ['--source', folder])
|
||||||
|
db = Db()
|
||||||
|
restore_hash_db()
|
||||||
|
|
||||||
|
shutil.rmtree(folder)
|
||||||
|
|
||||||
|
assert result.exit_code == 0, result.exit_code
|
||||||
|
assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
|
||||||
|
assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
|
||||||
|
|
||||||
def reset_hash_db():
|
def reset_hash_db():
|
||||||
hash_db = constants.hash_db
|
hash_db = constants.hash_db
|
||||||
if os.path.isfile(hash_db):
|
if os.path.isfile(hash_db):
|
||||||
|
|
|
@ -63,6 +63,14 @@ def test_add_hash_explicit_write():
|
||||||
db2 = Db()
|
db2 = Db()
|
||||||
assert db2.check_hash(random_key) == True
|
assert db2.check_hash(random_key) == True
|
||||||
|
|
||||||
|
def test_backup_hash_db():
|
||||||
|
db = Db()
|
||||||
|
backup_file_name = db.backup_hash_db()
|
||||||
|
file_exists = os.path.isfile(backup_file_name)
|
||||||
|
os.remove(backup_file_name)
|
||||||
|
|
||||||
|
assert file_exists, backup_file_name
|
||||||
|
|
||||||
def test_check_hash_exists():
|
def test_check_hash_exists():
|
||||||
db = Db()
|
db = Db()
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirna
|
||||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
|
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
|
||||||
|
|
||||||
import helper
|
import helper
|
||||||
from elodie.media.base import Base
|
from elodie.media.base import Base, get_all_subclasses
|
||||||
from elodie.media.media import Media
|
from elodie.media.media import Media
|
||||||
from elodie.media.audio import Audio
|
from elodie.media.audio import Audio
|
||||||
from elodie.media.text import Text
|
from elodie.media.text import Text
|
||||||
|
@ -106,3 +106,8 @@ def test_set_metadata_basename():
|
||||||
new_metadata = photo.get_metadata()
|
new_metadata = photo.get_metadata()
|
||||||
|
|
||||||
assert new_metadata['base_name'] == new_basename, new_metadata['base_name']
|
assert new_metadata['base_name'] == new_basename, new_metadata['base_name']
|
||||||
|
|
||||||
|
def test_get_all_subclasses():
|
||||||
|
subclasses = get_all_subclasses(Base)
|
||||||
|
expected = {Media, Base, Text, Photo, Video, Audio}
|
||||||
|
assert subclasses == expected, subclasses
|
||||||
|
|
Loading…
Reference in New Issue