Add init, check and update commands

This commit is contained in:
Cédric Leporcq 2021-10-15 06:41:22 +02:00
parent 2ac8ca3b67
commit 1cade46307
4 changed files with 225 additions and 33 deletions

View File

@ -237,25 +237,63 @@ def clean(**kwargs):
sys.exit(1) sys.exit(1)
@click.command('generate-db') @click.command('init')
@add_options(_logger_options) @add_options(_logger_options)
@click.option('--path', type=click.Path(file_okay=False), @click.argument('path', required=True, nargs=1, type=click.Path())
required=True, help='Path of your photo library.') def init(**kwargs):
def generate_db(**kwargs):
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files. """Regenerate the hash.json database which contains all of the sha256 signatures of media files.
""" """
# TODO config = Config(constants.CONFIG_FILE)
pass opt = config.get_options()
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
summary = collection.init(loc)
if verbose or debug:
summary.print()
@click.command('verify') @click.command('update')
@add_options(_logger_options) @add_options(_logger_options)
@click.option('--path', type=click.Path(file_okay=False), @click.argument('path', required=True, nargs=1, type=click.Path())
required=True, help='Path of your photo library.') def update(**kwargs):
def verify(**kwargs): """Regenerate the hash.json database which contains all of the sha256 signatures of media files.
"""Verify hashes""" """
# TODO config = Config(constants.CONFIG_FILE)
pass opt = config.get_options()
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
summary = collection.update(loc)
if verbose or debug:
summary.print()
@click.command('check')
@add_options(_logger_options)
@click.argument('path', required=True, nargs=1, type=click.Path())
def check(**kwargs):
"""check db and verify hashes"""
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
result = collection.check_db()
if result:
summary, result = collection.check_files()
if verbose or debug:
summary.print()
if not result:
sys.exit(1)
else:
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
@click.command('compare') @click.command('compare')
@ -317,10 +355,11 @@ def main(**kwargs):
main.add_command(clean) main.add_command(clean)
main.add_command(check)
main.add_command(compare) main.add_command(compare)
main.add_command(init)
main.add_command(sort) main.add_command(sort)
main.add_command(generate_db) main.add_command(update)
main.add_command(verify)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -291,7 +291,7 @@ class Collection(object):
return row_data return row_data
def _add_db_data(self, dest_path, metadata): def _add_db_data(self, metadata):
loc_values = self._format_row_data('location', metadata) loc_values = self._format_row_data('location', metadata)
metadata['location_id'] = self.db.add_row('location', loc_values) metadata['location_id'] = self.db.add_row('location', loc_values)
@ -334,7 +334,7 @@ class Collection(object):
media.metadata['file_path'] = os.path.relpath(dest_path, media.metadata['file_path'] = os.path.relpath(dest_path,
self.root) self.root)
self._add_db_data(dest_path, media.metadata) self._add_db_data(media.metadata)
if self.mode == 'move': if self.mode == 'move':
# Delete file path entry in db when file is moved inside collection # Delete file path entry in db when file is moved inside collection
if str(self.root) in str(src_path): if str(self.root) in str(src_path):
@ -655,6 +655,32 @@ class Collection(object):
] ]
return inquirer.prompt(questions, theme=self.theme)['selection'] return inquirer.prompt(questions, theme=self.theme)['selection']
def _get_all_files(self):
return [x for x in self._get_files_in_path(self.root)]
def check_db(self):
"""
Check if db FilePath match to collection filesystem
:returns: bool
"""
file_paths = [x for x in self._get_all_files()]
db_rows = [row['FilePath'] for row in self.db.get_rows('metadata')]
for file_path in file_paths:
relpath = os.path.relpath(file_path, self.root)
# If file not in database
if relpath not in db_rows:
self.logger.error('Db data is not accurate')
self.logger.info(f'{file_path} not in db')
return False
nb_files = len(file_paths)
nb_row = len(db_rows)
if nb_row != nb_files:
self.logger.error('Db data is not accurate')
return False
return True
def _check_processed(self): def _check_processed(self):
# Finally check if are files are successfully processed # Finally check if are files are successfully processed
n_fail = len(self.src_list) - len(self.dest_list) n_fail = len(self.src_list) - len(self.dest_list)
@ -662,13 +688,86 @@ class Collection(object):
self.logger.error("{n_fail} files have not be processed") self.logger.error("{n_fail} files have not be processed")
return False return False
return True return self.check_db()
def init(self, loc, ignore_tags=set()):
record = True
for file_path in self._get_all_files():
media = Media(file_path, self.root, ignore_tags=ignore_tags,
logger=self.logger, use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
media.metadata['file_path'] = os.path.relpath(file_path,
self.root)
self._add_db_data(media.metadata)
self.summary.append((file_path, file_path))
return self.summary
def check_files(self):
result = True
for file_path in self._get_all_files():
checksum = utils.checksum(file_path)
relpath = file_path.relative_to(self.root)
if checksum == self.db.get_checksum(relpath):
self.summary.append((file_path, file_path))
else:
self.logger.error('{file_path} is corrupted')
self.summary.append((file_path, False))
result = False
return self.summary, result
def update(self, loc, ignore_tags=set()):
file_paths = [x for x in self._get_all_files()]
db_rows = [row for row in self.db.get_rows('metadata')]
invalid_db_rows = set()
for db_row in db_rows:
abspath = self.root / db_row['FilePath']
if abspath not in file_paths:
invalid_db_rows.add(db_row)
for file_path in file_paths:
relpath = os.path.relpath(file_path, self.root)
# If file not in database
if relpath not in db_rows:
media = Media(file_path, self.root, ignore_tags=ignore_tags,
logger=self.logger, use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
media.metadata['file_path'] = relpath
# Check if file checksum is in invalid rows
row = []
for row in invalid_db_rows:
if row['Checksum'] == media.metadata['checksum']:
# file have been moved without registering to db
media.metadata['src_path'] = row['SrcPath']
# Check if row FilePath is a subpath of relpath
if relpath.startswith(row['FilePath']):
d = os.path.relpath(relpath, row['FilePath'])
media.metadata['subdirs'] = row['Subdirs'] + d
media.metadata['Filename'] = row['Filename']
break
# set row attribute to the file
self._add_db_data(media.metadata)
self.summary.append((file_path, file_path))
# Finally delete invalid rows
for row in invalid_db_rows:
self.db.delete_filepath(row['FilePath'])
return self.summary
def sort_files(self, paths, loc, remove_duplicates=False, def sort_files(self, paths, loc, remove_duplicates=False,
ignore_tags=set()): ignore_tags=set()):
""" """
Sort files into appropriate folder Sort files into appropriate folder
""" """
# Check db
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
result = False result = False
files_data = [] files_data = []
for path in paths: for path in paths:
@ -751,10 +850,16 @@ class Collection(object):
def sort_similar_images(self, path, similarity=80): def sort_similar_images(self, path, similarity=80):
# Check db
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
result = True result = True
path = self._check_path(path) path = self._check_path(path)
images = set([ x for x in self._get_images(path) ]) images = set([ x for x in self._get_images(path) ])
i = Images(images, logger=self.logger) i = Images(images, logger=self.logger)
nb_row_ini = self.db.len('metadata')
for image in images: for image in images:
if not image.img_path.is_file(): if not image.img_path.is_file():
continue continue
@ -794,14 +899,27 @@ class Collection(object):
self.summary.append((img_path, False)) self.summary.append((img_path, False))
result = False result = False
nb_row_end = self.db.len('metadata')
if nb_row_ini and nb_row_ini != nb_row_end:
self.logger.error('Nb of row have changed unexpectedly')
result = False
if result:
result = self.check_db()
return self.summary, result return self.summary, result
def revert_compare(self, path): def revert_compare(self, path):
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
result = True result = True
path = self._check_path(path) path = self._check_path(path)
dirnames = set() dirnames = set()
moved_files = set() moved_files = set()
nb_row_ini = self.db.len('metadata')
for src_path in self._get_files_in_path(path, glob=self.glob, for src_path in self._get_files_in_path(path, glob=self.glob,
extensions=self.filter_by_ext): extensions=self.filter_by_ext):
dirname = src_path.parent.name dirname = src_path.parent.name
@ -827,6 +945,14 @@ class Collection(object):
except OSError as error: except OSError as error:
self.logger.error(error) self.logger.error(error)
nb_row_end = self.db.len('metadata')
if nb_row_ini and nb_row_ini != nb_row_end:
self.logger.error('Nb of row have changed unexpectedly')
result = False
if result:
result = self.check_db()
return self.summary, result return self.summary, result

View File

@ -211,26 +211,26 @@ class Sqlite:
return self.add_row(table, row_data) return self.add_row(table, row_data)
def get_checksum(self, FilePath): def get_checksum(self, file_path):
query = f"select Checksum from metadata where FilePath='{FilePath}'" query = f"select Checksum from metadata where FilePath='{file_path}'"
return self._run(query) return self._run(query)
def get_metadata_data(self, FilePath, data): def get_metadata_data(self, file_path, data):
query = f"select {data} from metadata where FilePath='{FilePath}'" query = f"select {data} from metadata where FilePath='{file_path}'"
return self._run(query) return self._run(query)
def match_location(self, Latitude, Longitude): def match_location(self, latitude, longitude):
query = f"""select 1 from location where Latitude='{Latitude}' query = f"""select 1 from location where Latitude='{latitude}'
and Longitude='{Longitude}'""" and Longitude='{longitude}'"""
return self._run(query) return self._run(query)
def get_location_data(self, LocationId, data): def get_location_data(self, location_id, data):
query = f"select '{data}' from location where ROWID='{LocationId}'" query = f"select '{data}' from location where ROWID='{location_id}'"
return self._run(query) return self._run(query)
def get_location(self, Latitude, Longitude, column): def get_location(self, latitude, longitude, column):
query = f"""select {column} from location where Latitude='{Latitude}' query = f"""select {column} from location where Latitude='{latitude}'
and Longitude='{Longitude}'""" and Longitude='{longitude}'"""
return self._run(query) return self._run(query)
def _get_table(self, table): def _get_table(self, table):
@ -283,4 +283,15 @@ class Sqlite:
self.cur.execute(sql) self.cur.execute(sql)
self.con.commit() self.con.commit()
def len(self, table):
sql = f'select count() from {table}'
return self._run(sql)
def get_rows(self, table):
"""Cycle through rows in table
:params: str
:return: iter
"""
self.cur.execute(f'select * from {table}')
for row in self.cur:
yield row

View File

@ -26,6 +26,7 @@ class TestCollection:
def setup_class(cls, sample_files_paths): def setup_class(cls, sample_files_paths):
cls.src_path, cls.file_paths = sample_files_paths cls.src_path, cls.file_paths = sample_files_paths
cls.path_format = constants.default_path + '/' + constants.default_name cls.path_format = constants.default_path + '/' + constants.default_name
cls.logger = log.get_logger(True, True)
def teardown_class(self): def teardown_class(self):
terminate_exiftool() terminate_exiftool()
@ -113,7 +114,8 @@ class TestCollection:
assert part == '', file_path assert part == '', file_path
def test_sort_files(self, tmp_path): def test_sort_files(self, tmp_path):
collection = Collection(tmp_path, self.path_format, album_from_folder=True) collection = Collection(tmp_path, self.path_format,
album_from_folder=True, logger=self.logger)
loc = GeoLocation() loc = GeoLocation()
summary, result = collection.sort_files([self.src_path], loc) summary, result = collection.sort_files([self.src_path], loc)
@ -121,6 +123,10 @@ class TestCollection:
assert summary, summary assert summary, summary
assert result, result assert result, result
summary, result = collection.check_files()
assert summary, summary
assert result, result
for file_path in tmp_path.glob('**/*'): for file_path in tmp_path.glob('**/*'):
if '.db' not in str(file_path): if '.db' not in str(file_path):
media = Media(file_path, tmp_path, album_from_folder=True) media = Media(file_path, tmp_path, album_from_folder=True)
@ -130,12 +136,21 @@ class TestCollection:
# test with populated dest dir # test with populated dest dir
randomize_files(tmp_path) randomize_files(tmp_path)
summary, result = collection.check_files()
assert summary, summary
assert not result, result
collection = Collection(tmp_path, None, mode='move', logger=self.logger)
summary = collection.update(loc)
assert summary, summary
collection = Collection(tmp_path, self.path_format, album_from_folder=True) collection = Collection(tmp_path, self.path_format, album_from_folder=True)
loc = GeoLocation() loc = GeoLocation()
summary, result = collection.sort_files([self.src_path], loc) summary, result = collection.sort_files([self.src_path], loc)
assert summary, summary assert summary, summary
assert result, result assert result, result
# TODO check if path follow path_format # TODO check if path follow path_format
def test_sort_files_invalid_db(self, tmp_path): def test_sort_files_invalid_db(self, tmp_path):
@ -183,8 +198,9 @@ class TestCollection:
def test_sort_similar_images(self, tmp_path): def test_sort_similar_images(self, tmp_path):
path = tmp_path / 'collection' path = tmp_path / 'collection'
shutil.copytree(self.src_path, path) shutil.copytree(self.src_path, path)
logger = log.get_logger(True, True) collection = Collection(path, None, mode='move', logger=self.logger)
collection = Collection(path, None, mode='move', logger=logger) loc = GeoLocation()
summary = collection.init(loc)
summary, result = collection.sort_similar_images(path, similarity=60) summary, result = collection.sort_similar_images(path, similarity=60)
# Summary is created and there is no errors # Summary is created and there is no errors