gh-154 Add command to regenerate the hash database
This commit is contained in:
		
							parent
							
								
									9950e8099a
								
							
						
					
					
						commit
						bf24f9689a
					
				
							
								
								
									
										13
									
								
								Readme.md
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								Readme.md
									
									
									
									
									
								
							@ -52,7 +52,7 @@ You'll notice that the photo was organized into an *Unknown Location* folder. Th
 | 
			
		||||
 | 
			
		||||
### Usage Instructions
 | 
			
		||||
 | 
			
		||||
You can view these instructions on the command line by typing `./elodie.py import --help` or `./elodie.py update --help`.
 | 
			
		||||
You can view these instructions on the command line by typing `./elodie.py import --help`, `./elodie.py update --help` or `./elodie.py generate-db --help`.
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
Usage: elodie.py import [OPTIONS] [PATHS]...
 | 
			
		||||
@ -88,6 +88,17 @@ Options:
 | 
			
		||||
  --help           Show this message and exit.
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
Usage: elodie.py generate-db [OPTIONS]
 | 
			
		||||
 | 
			
		||||
  Regenerate the hash.json database which contains all of the sha1
 | 
			
		||||
  signatures of media files.
 | 
			
		||||
 | 
			
		||||
Options:
 | 
			
		||||
  --source DIRECTORY  Source of your photo library.  [required]
 | 
			
		||||
  --help              Show this message and exit.
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Now you're ready to learn more about Elodie.
 | 
			
		||||
 | 
			
		||||
<p align="center"><img src ="creative/logo@300x.png" /></p>
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										40
									
								
								elodie.py
									
									
									
									
									
								
							
							
						
						
									
										40
									
								
								elodie.py
									
									
									
									
									
								
							@ -20,7 +20,7 @@ from elodie import geolocation
 | 
			
		||||
from elodie import log
 | 
			
		||||
from elodie.filesystem import FileSystem
 | 
			
		||||
from elodie.localstorage import Db
 | 
			
		||||
from elodie.media.base import Base
 | 
			
		||||
from elodie.media.base import Base, get_all_subclasses
 | 
			
		||||
from elodie.media.media import Media
 | 
			
		||||
from elodie.media.text import Text
 | 
			
		||||
from elodie.media.audio import Audio
 | 
			
		||||
@ -29,7 +29,6 @@ from elodie.media.video import Video
 | 
			
		||||
from elodie.result import Result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
DB = Db()
 | 
			
		||||
FILESYSTEM = FileSystem()
 | 
			
		||||
RESULT = Result()
 | 
			
		||||
 | 
			
		||||
@ -107,6 +106,42 @@ def _import(destination, source, file, album_from_folder, trash, paths, allow_du
 | 
			
		||||
    RESULT.write()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@click.command('generate-db')
 | 
			
		||||
@click.option('--source', type=click.Path(file_okay=False),
 | 
			
		||||
              required=True, help='Source of your photo library.')
 | 
			
		||||
def _generate_db(source):
 | 
			
		||||
    """Regenerate the hash.json database which contains all of the sha1 signatures of media files.
 | 
			
		||||
    """
 | 
			
		||||
    source = os.path.abspath(os.path.expanduser(source))
 | 
			
		||||
 | 
			
		||||
    extensions = set()
 | 
			
		||||
    all_files = set()
 | 
			
		||||
    valid_files = set()
 | 
			
		||||
 | 
			
		||||
    if not os.path.isdir(source):
 | 
			
		||||
        log.error('Source is not a valid directory %s' % source)
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
        
 | 
			
		||||
    subclasses = get_all_subclasses(Base)
 | 
			
		||||
    for cls in subclasses:
 | 
			
		||||
        extensions.update(cls.extensions)
 | 
			
		||||
 | 
			
		||||
    all_files.update(FILESYSTEM.get_all_files(source, None))
 | 
			
		||||
 | 
			
		||||
    db = Db()
 | 
			
		||||
    db.backup_hash_db()
 | 
			
		||||
    db.reset_hash_db()
 | 
			
		||||
 | 
			
		||||
    for current_file in all_files:
 | 
			
		||||
        if os.path.splitext(current_file)[1][1:].lower() not in extensions:
 | 
			
		||||
            log.info('Skipping invalid file %s' % current_file)
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        db.add_hash(db.checksum(current_file), current_file)
 | 
			
		||||
    
 | 
			
		||||
    db.update_hash_db()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def update_location(media, file_path, location_name):
 | 
			
		||||
    """Update location exif metadata of media.
 | 
			
		||||
    """
 | 
			
		||||
@ -237,6 +272,7 @@ def main():
 | 
			
		||||
 | 
			
		||||
main.add_command(_import)
 | 
			
		||||
main.add_command(_update)
 | 
			
		||||
main.add_command(_generate_db)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
 | 
			
		||||
@ -6,10 +6,13 @@ from builtins import object
 | 
			
		||||
 | 
			
		||||
import hashlib
 | 
			
		||||
import json
 | 
			
		||||
from math import radians, cos, sqrt
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
from math import radians, cos, sqrt
 | 
			
		||||
from shutil import copyfile
 | 
			
		||||
from time import strftime
 | 
			
		||||
 | 
			
		||||
from elodie import constants
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -66,6 +69,38 @@ class Db(object):
 | 
			
		||||
        if(write is True):
 | 
			
		||||
            self.update_hash_db()
 | 
			
		||||
 | 
			
		||||
    # Location database
 | 
			
		||||
    # Currently quite simple just a list of long/lat pairs with a name
 | 
			
		||||
    # If it gets many entries a lookup might take too long and a better
 | 
			
		||||
    # structure might be needed. Some speed up ideas:
 | 
			
		||||
    # - Sort it and inter-half method can be used
 | 
			
		||||
    # - Use integer part of long or lat as key to get a lower search list
 | 
			
		||||
    # - Cache a small number of lookups, photos are likely to be taken in
 | 
			
		||||
    #   clusters around a spot during import.
 | 
			
		||||
    def add_location(self, latitude, longitude, place, write=False):
 | 
			
		||||
        """Add a location to the database.
 | 
			
		||||
 | 
			
		||||
        :param float latitude: Latitude of the location.
 | 
			
		||||
        :param float longitude: Longitude of the location.
 | 
			
		||||
        :param str place: Name for the location.
 | 
			
		||||
        :param bool write: If true, write the location db to disk.
 | 
			
		||||
        """
 | 
			
		||||
        data = {}
 | 
			
		||||
        data['lat'] = latitude
 | 
			
		||||
        data['long'] = longitude
 | 
			
		||||
        data['name'] = place
 | 
			
		||||
        self.location_db.append(data)
 | 
			
		||||
        if(write is True):
 | 
			
		||||
            self.update_location_db()
 | 
			
		||||
 | 
			
		||||
    def backup_hash_db(self):
 | 
			
		||||
        """Backs up the hash db."""
 | 
			
		||||
        if os.path.isfile(constants.hash_db):
 | 
			
		||||
            mask = strftime('%Y-%m-%d_%H-%M-%S')
 | 
			
		||||
            backup_file_name = '%s-%s' % (constants.hash_db, mask)
 | 
			
		||||
            copyfile(constants.hash_db, backup_file_name)
 | 
			
		||||
            return backup_file_name
 | 
			
		||||
 | 
			
		||||
    def check_hash(self, key):
 | 
			
		||||
        """Check whether a hash is present for the given key.
 | 
			
		||||
 | 
			
		||||
@ -74,21 +109,6 @@ class Db(object):
 | 
			
		||||
        """
 | 
			
		||||
        return key in self.hash_db
 | 
			
		||||
 | 
			
		||||
    def get_hash(self, key):
 | 
			
		||||
        """Get the hash value for a given key.
 | 
			
		||||
 | 
			
		||||
        :param str key:
 | 
			
		||||
        :returns: str or None
 | 
			
		||||
        """
 | 
			
		||||
        if(self.check_hash(key) is True):
 | 
			
		||||
            return self.hash_db[key]
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def update_hash_db(self):
 | 
			
		||||
        """Write the hash db to disk."""
 | 
			
		||||
        with open(constants.hash_db, 'w') as f:
 | 
			
		||||
            json.dump(self.hash_db, f)
 | 
			
		||||
 | 
			
		||||
    def checksum(self, file_path, blocksize=65536):
 | 
			
		||||
        """Create a hash value for the given file.
 | 
			
		||||
 | 
			
		||||
@ -109,30 +129,15 @@ class Db(object):
 | 
			
		||||
            return hasher.hexdigest()
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    # Location database
 | 
			
		||||
    # Currently quite simple just a list of long/lat pairs with a name
 | 
			
		||||
    # If it gets many entries a lookup might take too long and a better
 | 
			
		||||
    # structure might be needed. Some speed up ideas:
 | 
			
		||||
    # - Sort it and inter-half method can be used
 | 
			
		||||
    # - Use integer part of long or lat as key to get a lower search list
 | 
			
		||||
    # - Cache a small number of lookups, photos are likely to be taken in
 | 
			
		||||
    #   clusters around a spot during import.
 | 
			
		||||
    def get_hash(self, key):
 | 
			
		||||
        """Get the hash value for a given key.
 | 
			
		||||
 | 
			
		||||
    def add_location(self, latitude, longitude, place, write=False):
 | 
			
		||||
        """Add a location to the database.
 | 
			
		||||
 | 
			
		||||
        :param float latitude: Latitude of the location.
 | 
			
		||||
        :param float longitude: Longitude of the location.
 | 
			
		||||
        :param str place: Name for the location.
 | 
			
		||||
        :param bool write: If true, write the location db to disk.
 | 
			
		||||
        :param str key:
 | 
			
		||||
        :returns: str or None
 | 
			
		||||
        """
 | 
			
		||||
        data = {}
 | 
			
		||||
        data['lat'] = latitude
 | 
			
		||||
        data['long'] = longitude
 | 
			
		||||
        data['name'] = place
 | 
			
		||||
        self.location_db.append(data)
 | 
			
		||||
        if(write is True):
 | 
			
		||||
            self.update_location_db()
 | 
			
		||||
        if(self.check_hash(key) is True):
 | 
			
		||||
            return self.hash_db[key]
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def get_location_name(self, latitude, longitude, threshold_m):
 | 
			
		||||
        """Find a name for a location in the database.
 | 
			
		||||
@ -178,6 +183,14 @@ class Db(object):
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def reset_hash_db(self):
 | 
			
		||||
        self.hash_db = {}
 | 
			
		||||
 | 
			
		||||
    def update_hash_db(self):
 | 
			
		||||
        """Write the hash db to disk."""
 | 
			
		||||
        with open(constants.hash_db, 'w') as f:
 | 
			
		||||
            json.dump(self.hash_db, f)
 | 
			
		||||
 | 
			
		||||
    def update_location_db(self):
 | 
			
		||||
        """Write the location db to disk."""
 | 
			
		||||
        with open(constants.location_db, 'w') as f:
 | 
			
		||||
 | 
			
		||||
@ -187,6 +187,8 @@ class Base(object):
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def get_class_by_file(cls, _file, classes):
 | 
			
		||||
        """Static method to get a media object by file.
 | 
			
		||||
        """
 | 
			
		||||
        if not isinstance(_file, basestring) or not os.path.isfile(_file):
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
@ -206,3 +208,21 @@ class Base(object):
 | 
			
		||||
        :returns: tuple(str)
 | 
			
		||||
        """
 | 
			
		||||
        return cls.extensions
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def get_all_subclasses(cls=None):
 | 
			
		||||
    """Module method to get all subclasses of Base.
 | 
			
		||||
    """
 | 
			
		||||
    subclasses = set()
 | 
			
		||||
 | 
			
		||||
    this_class = Base
 | 
			
		||||
    if cls is not None:
 | 
			
		||||
        this_class = cls
 | 
			
		||||
 | 
			
		||||
    subclasses.add(this_class)
 | 
			
		||||
 | 
			
		||||
    this_class_subclasses = this_class.__subclasses__()
 | 
			
		||||
    for child_class in this_class_subclasses:
 | 
			
		||||
        subclasses.update(get_all_subclasses(child_class))
 | 
			
		||||
 | 
			
		||||
    return subclasses
 | 
			
		||||
 | 
			
		||||
@ -25,7 +25,7 @@ class Text(Base):
 | 
			
		||||
    __name__ = 'Text'
 | 
			
		||||
 | 
			
		||||
    #: Valid extensions for text files.
 | 
			
		||||
    extensions = ('txt')
 | 
			
		||||
    extensions = ('txt',)
 | 
			
		||||
 | 
			
		||||
    def __init__(self, source=None):
 | 
			
		||||
        super(Text, self).__init__(source)
 | 
			
		||||
 | 
			
		||||
@ -4,7 +4,9 @@ import os
 | 
			
		||||
import sys
 | 
			
		||||
import shutil
 | 
			
		||||
 | 
			
		||||
from click.testing import CliRunner
 | 
			
		||||
from nose.plugins.skip import SkipTest
 | 
			
		||||
from nose.tools import assert_raises
 | 
			
		||||
 | 
			
		||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))))
 | 
			
		||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))))
 | 
			
		||||
@ -13,6 +15,7 @@ import helper
 | 
			
		||||
elodie = load_source('elodie', os.path.abspath('{}/../../elodie.py'.format(os.path.dirname(os.path.realpath(__file__)))))
 | 
			
		||||
 | 
			
		||||
from elodie import constants
 | 
			
		||||
from elodie.localstorage import Db
 | 
			
		||||
from elodie.media.audio import Audio
 | 
			
		||||
from elodie.media.photo import Photo
 | 
			
		||||
from elodie.media.text import Text
 | 
			
		||||
@ -334,6 +337,48 @@ def test_update_time_on_video():
 | 
			
		||||
    assert metadata['date_taken'] != metadata_processed['date_taken']
 | 
			
		||||
    assert metadata_processed['date_taken'] == helper.time_convert((2000, 1, 1, 12, 0, 0, 5, 1, 0)), metadata_processed['date_taken']
 | 
			
		||||
 | 
			
		||||
def test_regenerate_db_invalid_source():
 | 
			
		||||
    runner = CliRunner()
 | 
			
		||||
    result = runner.invoke(elodie._generate_db, ['--source', '/invalid/path'])
 | 
			
		||||
    assert result.exit_code == 1, result.exit_code
 | 
			
		||||
 | 
			
		||||
def test_regenerate_valid_source():
 | 
			
		||||
    temporary_folder, folder = helper.create_working_folder()
 | 
			
		||||
 | 
			
		||||
    origin = '%s/valid.txt' % folder
 | 
			
		||||
    shutil.copyfile(helper.get_file('valid.txt'), origin)
 | 
			
		||||
 | 
			
		||||
    reset_hash_db()
 | 
			
		||||
    runner = CliRunner()
 | 
			
		||||
    result = runner.invoke(elodie._generate_db, ['--source', folder])
 | 
			
		||||
    db = Db()
 | 
			
		||||
    restore_hash_db()
 | 
			
		||||
 | 
			
		||||
    shutil.rmtree(folder)
 | 
			
		||||
 | 
			
		||||
    assert result.exit_code == 0, result.exit_code
 | 
			
		||||
    assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
 | 
			
		||||
 | 
			
		||||
def test_regenerate_valid_source_with_invalid_files():
 | 
			
		||||
    temporary_folder, folder = helper.create_working_folder()
 | 
			
		||||
 | 
			
		||||
    origin_valid = '%s/valid.txt' % folder
 | 
			
		||||
    shutil.copyfile(helper.get_file('valid.txt'), origin_valid)
 | 
			
		||||
    origin_invalid = '%s/invalid.invalid' % folder
 | 
			
		||||
    shutil.copyfile(helper.get_file('invalid.invalid'), origin_invalid)
 | 
			
		||||
 | 
			
		||||
    reset_hash_db()
 | 
			
		||||
    runner = CliRunner()
 | 
			
		||||
    result = runner.invoke(elodie._generate_db, ['--source', folder])
 | 
			
		||||
    db = Db()
 | 
			
		||||
    restore_hash_db()
 | 
			
		||||
 | 
			
		||||
    shutil.rmtree(folder)
 | 
			
		||||
 | 
			
		||||
    assert result.exit_code == 0, result.exit_code
 | 
			
		||||
    assert 'bde2dc0b839a5d20b0b4c1f57605f84e0e2a4562aaebc1c362de6cb7cc02eeb3' in db.hash_db, db.hash_db
 | 
			
		||||
    assert 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' not in db.hash_db, db.hash_db
 | 
			
		||||
 | 
			
		||||
def reset_hash_db():
 | 
			
		||||
    hash_db = constants.hash_db
 | 
			
		||||
    if os.path.isfile(hash_db):
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										0
									
								
								elodie/tests/files/invalid.invalid
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								elodie/tests/files/invalid.invalid
									
									
									
									
									
										Normal file
									
								
							@ -62,6 +62,14 @@ def test_add_hash_explicit_write():
 | 
			
		||||
    # Instnatiate new db class to confirm random_key exists
 | 
			
		||||
    db2 = Db()
 | 
			
		||||
    assert db2.check_hash(random_key) == True
 | 
			
		||||
 | 
			
		||||
def test_backup_hash_db():
 | 
			
		||||
    db = Db()
 | 
			
		||||
    backup_file_name = db.backup_hash_db()
 | 
			
		||||
    file_exists = os.path.isfile(backup_file_name)
 | 
			
		||||
    os.remove(backup_file_name)
 | 
			
		||||
    
 | 
			
		||||
    assert file_exists, backup_file_name
 | 
			
		||||
    
 | 
			
		||||
def test_check_hash_exists():
 | 
			
		||||
    db = Db()
 | 
			
		||||
 | 
			
		||||
@ -14,7 +14,7 @@ sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirna
 | 
			
		||||
sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(os.path.realpath(__file__)))))
 | 
			
		||||
 | 
			
		||||
import helper
 | 
			
		||||
from elodie.media.base import Base
 | 
			
		||||
from elodie.media.base import Base, get_all_subclasses
 | 
			
		||||
from elodie.media.media import Media
 | 
			
		||||
from elodie.media.audio import Audio
 | 
			
		||||
from elodie.media.text import Text
 | 
			
		||||
@ -106,3 +106,8 @@ def test_set_metadata_basename():
 | 
			
		||||
    new_metadata = photo.get_metadata()
 | 
			
		||||
 | 
			
		||||
    assert new_metadata['base_name'] == new_basename, new_metadata['base_name']
 | 
			
		||||
 | 
			
		||||
def test_get_all_subclasses():
 | 
			
		||||
    subclasses = get_all_subclasses(Base)
 | 
			
		||||
    expected = {Media, Base, Text, Photo, Video, Audio}
 | 
			
		||||
    assert subclasses == expected, subclasses
 | 
			
		||||
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user