Add init, check and update commands

This commit is contained in:
Cédric Leporcq 2021-10-15 06:41:22 +02:00
parent 2ac8ca3b67
commit 1cade46307
4 changed files with 225 additions and 33 deletions

View File

@ -237,25 +237,63 @@ def clean(**kwargs):
sys.exit(1)
@click.command('generate-db')
@click.command('init')
@add_options(_logger_options)
@click.option('--path', type=click.Path(file_okay=False),
required=True, help='Path of your photo library.')
def generate_db(**kwargs):
@click.argument('path', required=True, nargs=1, type=click.Path())
def init(**kwargs):
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files.
"""
# TODO
pass
config = Config(constants.CONFIG_FILE)
opt = config.get_options()
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
summary = collection.init(loc)
if verbose or debug:
summary.print()
@click.command('verify')
@click.command('update')
@add_options(_logger_options)
@click.option('--path', type=click.Path(file_okay=False),
required=True, help='Path of your photo library.')
def verify(**kwargs):
"""Verify hashes"""
# TODO
pass
@click.argument('path', required=True, nargs=1, type=click.Path())
def update(**kwargs):
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files.
"""
config = Config(constants.CONFIG_FILE)
opt = config.get_options()
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
summary = collection.update(loc)
if verbose or debug:
summary.print()
@click.command('check')
@add_options(_logger_options)
@click.argument('path', required=True, nargs=1, type=click.Path())
def check(**kwargs):
"""check db and verify hashes"""
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
collection = Collection(kwargs['path'], None, mode='move', logger=logger)
result = collection.check_db()
if result:
summary, result = collection.check_files()
if verbose or debug:
summary.print()
if not result:
sys.exit(1)
else:
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
@click.command('compare')
@ -317,10 +355,11 @@ def main(**kwargs):
main.add_command(clean)
main.add_command(check)
main.add_command(compare)
main.add_command(init)
main.add_command(sort)
main.add_command(generate_db)
main.add_command(verify)
main.add_command(update)
if __name__ == '__main__':

View File

@ -291,7 +291,7 @@ class Collection(object):
return row_data
def _add_db_data(self, dest_path, metadata):
def _add_db_data(self, metadata):
loc_values = self._format_row_data('location', metadata)
metadata['location_id'] = self.db.add_row('location', loc_values)
@ -334,7 +334,7 @@ class Collection(object):
media.metadata['file_path'] = os.path.relpath(dest_path,
self.root)
self._add_db_data(dest_path, media.metadata)
self._add_db_data(media.metadata)
if self.mode == 'move':
# Delete file path entry in db when file is moved inside collection
if str(self.root) in str(src_path):
@ -655,6 +655,32 @@ class Collection(object):
]
return inquirer.prompt(questions, theme=self.theme)['selection']
def _get_all_files(self):
return [x for x in self._get_files_in_path(self.root)]
def check_db(self):
"""
Check if db FilePath match to collection filesystem
:returns: bool
"""
file_paths = [x for x in self._get_all_files()]
db_rows = [row['FilePath'] for row in self.db.get_rows('metadata')]
for file_path in file_paths:
relpath = os.path.relpath(file_path, self.root)
# If file not in database
if relpath not in db_rows:
self.logger.error('Db data is not accurate')
self.logger.info(f'{file_path} not in db')
return False
nb_files = len(file_paths)
nb_row = len(db_rows)
if nb_row != nb_files:
self.logger.error('Db data is not accurate')
return False
return True
def _check_processed(self):
# Finally check if are files are successfully processed
n_fail = len(self.src_list) - len(self.dest_list)
@ -662,13 +688,86 @@ class Collection(object):
self.logger.error("{n_fail} files have not be processed")
return False
return True
return self.check_db()
def init(self, loc, ignore_tags=set()):
record = True
for file_path in self._get_all_files():
media = Media(file_path, self.root, ignore_tags=ignore_tags,
logger=self.logger, use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
media.metadata['file_path'] = os.path.relpath(file_path,
self.root)
self._add_db_data(media.metadata)
self.summary.append((file_path, file_path))
return self.summary
def check_files(self):
result = True
for file_path in self._get_all_files():
checksum = utils.checksum(file_path)
relpath = file_path.relative_to(self.root)
if checksum == self.db.get_checksum(relpath):
self.summary.append((file_path, file_path))
else:
self.logger.error('{file_path} is corrupted')
self.summary.append((file_path, False))
result = False
return self.summary, result
def update(self, loc, ignore_tags=set()):
file_paths = [x for x in self._get_all_files()]
db_rows = [row for row in self.db.get_rows('metadata')]
invalid_db_rows = set()
for db_row in db_rows:
abspath = self.root / db_row['FilePath']
if abspath not in file_paths:
invalid_db_rows.add(db_row)
for file_path in file_paths:
relpath = os.path.relpath(file_path, self.root)
# If file not in database
if relpath not in db_rows:
media = Media(file_path, self.root, ignore_tags=ignore_tags,
logger=self.logger, use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
media.metadata['file_path'] = relpath
# Check if file checksum is in invalid rows
row = []
for row in invalid_db_rows:
if row['Checksum'] == media.metadata['checksum']:
# file have been moved without registering to db
media.metadata['src_path'] = row['SrcPath']
# Check if row FilePath is a subpath of relpath
if relpath.startswith(row['FilePath']):
d = os.path.relpath(relpath, row['FilePath'])
media.metadata['subdirs'] = row['Subdirs'] + d
media.metadata['Filename'] = row['Filename']
break
# set row attribute to the file
self._add_db_data(media.metadata)
self.summary.append((file_path, file_path))
# Finally delete invalid rows
for row in invalid_db_rows:
self.db.delete_filepath(row['FilePath'])
return self.summary
def sort_files(self, paths, loc, remove_duplicates=False,
ignore_tags=set()):
"""
Sort files into appropriate folder
"""
# Check db
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
result = False
files_data = []
for path in paths:
@ -751,10 +850,16 @@ class Collection(object):
def sort_similar_images(self, path, similarity=80):
# Check db
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
result = True
path = self._check_path(path)
images = set([ x for x in self._get_images(path) ])
i = Images(images, logger=self.logger)
nb_row_ini = self.db.len('metadata')
for image in images:
if not image.img_path.is_file():
continue
@ -794,14 +899,27 @@ class Collection(object):
self.summary.append((img_path, False))
result = False
nb_row_end = self.db.len('metadata')
if nb_row_ini and nb_row_ini != nb_row_end:
self.logger.error('Nb of row have changed unexpectedly')
result = False
if result:
result = self.check_db()
return self.summary, result
def revert_compare(self, path):
if not self.check_db():
self.logger.error('Db data is not accurate run `ordigi init`')
sys.exit(1)
result = True
path = self._check_path(path)
dirnames = set()
moved_files = set()
nb_row_ini = self.db.len('metadata')
for src_path in self._get_files_in_path(path, glob=self.glob,
extensions=self.filter_by_ext):
dirname = src_path.parent.name
@ -827,6 +945,14 @@ class Collection(object):
except OSError as error:
self.logger.error(error)
nb_row_end = self.db.len('metadata')
if nb_row_ini and nb_row_ini != nb_row_end:
self.logger.error('Nb of row have changed unexpectedly')
result = False
if result:
result = self.check_db()
return self.summary, result

View File

@ -211,26 +211,26 @@ class Sqlite:
return self.add_row(table, row_data)
def get_checksum(self, FilePath):
query = f"select Checksum from metadata where FilePath='{FilePath}'"
def get_checksum(self, file_path):
query = f"select Checksum from metadata where FilePath='{file_path}'"
return self._run(query)
def get_metadata_data(self, FilePath, data):
query = f"select {data} from metadata where FilePath='{FilePath}'"
def get_metadata_data(self, file_path, data):
query = f"select {data} from metadata where FilePath='{file_path}'"
return self._run(query)
def match_location(self, Latitude, Longitude):
query = f"""select 1 from location where Latitude='{Latitude}'
and Longitude='{Longitude}'"""
def match_location(self, latitude, longitude):
query = f"""select 1 from location where Latitude='{latitude}'
and Longitude='{longitude}'"""
return self._run(query)
def get_location_data(self, LocationId, data):
query = f"select '{data}' from location where ROWID='{LocationId}'"
def get_location_data(self, location_id, data):
query = f"select '{data}' from location where ROWID='{location_id}'"
return self._run(query)
def get_location(self, Latitude, Longitude, column):
query = f"""select {column} from location where Latitude='{Latitude}'
and Longitude='{Longitude}'"""
def get_location(self, latitude, longitude, column):
query = f"""select {column} from location where Latitude='{latitude}'
and Longitude='{longitude}'"""
return self._run(query)
def _get_table(self, table):
@ -283,4 +283,15 @@ class Sqlite:
self.cur.execute(sql)
self.con.commit()
def len(self, table):
sql = f'select count() from {table}'
return self._run(sql)
def get_rows(self, table):
"""Cycle through rows in table
:params: str
:return: iter
"""
self.cur.execute(f'select * from {table}')
for row in self.cur:
yield row

View File

@ -26,6 +26,7 @@ class TestCollection:
def setup_class(cls, sample_files_paths):
cls.src_path, cls.file_paths = sample_files_paths
cls.path_format = constants.default_path + '/' + constants.default_name
cls.logger = log.get_logger(True, True)
def teardown_class(self):
terminate_exiftool()
@ -113,7 +114,8 @@ class TestCollection:
assert part == '', file_path
def test_sort_files(self, tmp_path):
collection = Collection(tmp_path, self.path_format, album_from_folder=True)
collection = Collection(tmp_path, self.path_format,
album_from_folder=True, logger=self.logger)
loc = GeoLocation()
summary, result = collection.sort_files([self.src_path], loc)
@ -121,6 +123,10 @@ class TestCollection:
assert summary, summary
assert result, result
summary, result = collection.check_files()
assert summary, summary
assert result, result
for file_path in tmp_path.glob('**/*'):
if '.db' not in str(file_path):
media = Media(file_path, tmp_path, album_from_folder=True)
@ -130,12 +136,21 @@ class TestCollection:
# test with populated dest dir
randomize_files(tmp_path)
summary, result = collection.check_files()
assert summary, summary
assert not result, result
collection = Collection(tmp_path, None, mode='move', logger=self.logger)
summary = collection.update(loc)
assert summary, summary
collection = Collection(tmp_path, self.path_format, album_from_folder=True)
loc = GeoLocation()
summary, result = collection.sort_files([self.src_path], loc)
assert summary, summary
assert result, result
# TODO check if path follow path_format
def test_sort_files_invalid_db(self, tmp_path):
@ -183,8 +198,9 @@ class TestCollection:
def test_sort_similar_images(self, tmp_path):
path = tmp_path / 'collection'
shutil.copytree(self.src_path, path)
logger = log.get_logger(True, True)
collection = Collection(path, None, mode='move', logger=logger)
collection = Collection(path, None, mode='move', logger=self.logger)
loc = GeoLocation()
summary = collection.init(loc)
summary, result = collection.sort_similar_images(path, similarity=60)
# Summary is created and there is no errors