diff --git a/ordigi.py b/ordigi.py index 327b505..a2aa7bb 100755 --- a/ordigi.py +++ b/ordigi.py @@ -237,25 +237,63 @@ def clean(**kwargs): sys.exit(1) -@click.command('generate-db') +@click.command('init') @add_options(_logger_options) -@click.option('--path', type=click.Path(file_okay=False), - required=True, help='Path of your photo library.') -def generate_db(**kwargs): +@click.argument('path', required=True, nargs=1, type=click.Path()) +def init(**kwargs): """Regenerate the hash.json database which contains all of the sha256 signatures of media files. """ - # TODO - pass + config = Config(constants.CONFIG_FILE) + opt = config.get_options() + loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], + opt['timeout']) + debug = kwargs['debug'] + verbose = kwargs['verbose'] + logger = log.get_logger(debug, verbose) + collection = Collection(kwargs['path'], None, mode='move', logger=logger) + summary = collection.init(loc) + if verbose or debug: + summary.print() -@click.command('verify') +@click.command('update') @add_options(_logger_options) -@click.option('--path', type=click.Path(file_okay=False), - required=True, help='Path of your photo library.') -def verify(**kwargs): - """Verify hashes""" - # TODO - pass +@click.argument('path', required=True, nargs=1, type=click.Path()) +def update(**kwargs): + """Regenerate the hash.json database which contains all of the sha256 signatures of media files. + """ + config = Config(constants.CONFIG_FILE) + opt = config.get_options() + loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], + opt['timeout']) + debug = kwargs['debug'] + verbose = kwargs['verbose'] + logger = log.get_logger(debug, verbose) + collection = Collection(kwargs['path'], None, mode='move', logger=logger) + summary = collection.update(loc) + if verbose or debug: + summary.print() + + +@click.command('check') +@add_options(_logger_options) +@click.argument('path', required=True, nargs=1, type=click.Path()) +def check(**kwargs): + """check db and verify hashes""" + debug = kwargs['debug'] + verbose = kwargs['verbose'] + logger = log.get_logger(debug, verbose) + collection = Collection(kwargs['path'], None, mode='move', logger=logger) + result = collection.check_db() + if result: + summary, result = collection.check_files() + if verbose or debug: + summary.print() + if not result: + sys.exit(1) + else: + self.logger.error('Db data is not accurate run `ordigi init`') + sys.exit(1) @click.command('compare') @@ -317,10 +355,11 @@ def main(**kwargs): main.add_command(clean) +main.add_command(check) main.add_command(compare) +main.add_command(init) main.add_command(sort) -main.add_command(generate_db) -main.add_command(verify) +main.add_command(update) if __name__ == '__main__': diff --git a/ordigi/collection.py b/ordigi/collection.py index 44d7ebb..ccd52cc 100644 --- a/ordigi/collection.py +++ b/ordigi/collection.py @@ -291,7 +291,7 @@ class Collection(object): return row_data - def _add_db_data(self, dest_path, metadata): + def _add_db_data(self, metadata): loc_values = self._format_row_data('location', metadata) metadata['location_id'] = self.db.add_row('location', loc_values) @@ -334,7 +334,7 @@ class Collection(object): media.metadata['file_path'] = os.path.relpath(dest_path, self.root) - self._add_db_data(dest_path, media.metadata) + self._add_db_data(media.metadata) if self.mode == 'move': # Delete file path entry in db when file is moved inside collection if str(self.root) in str(src_path): @@ -655,6 +655,32 @@ class Collection(object): ] return inquirer.prompt(questions, theme=self.theme)['selection'] + def _get_all_files(self): + return [x for x in self._get_files_in_path(self.root)] + + def check_db(self): + """ + Check if db FilePath match to collection filesystem + :returns: bool + """ + file_paths = [x for x in self._get_all_files()] + db_rows = [row['FilePath'] for row in self.db.get_rows('metadata')] + for file_path in file_paths: + relpath = os.path.relpath(file_path, self.root) + # If file not in database + if relpath not in db_rows: + self.logger.error('Db data is not accurate') + self.logger.info(f'{file_path} not in db') + return False + + nb_files = len(file_paths) + nb_row = len(db_rows) + if nb_row != nb_files: + self.logger.error('Db data is not accurate') + return False + + return True + def _check_processed(self): # Finally check if are files are successfully processed n_fail = len(self.src_list) - len(self.dest_list) @@ -662,13 +688,86 @@ class Collection(object): self.logger.error("{n_fail} files have not be processed") return False - return True + return self.check_db() + + def init(self, loc, ignore_tags=set()): + record = True + for file_path in self._get_all_files(): + media = Media(file_path, self.root, ignore_tags=ignore_tags, + logger=self.logger, use_date_filename=self.use_date_filename, + use_file_dates=self.use_file_dates) + metadata = media.get_metadata(self.root, loc, self.db, self.cache) + media.metadata['file_path'] = os.path.relpath(file_path, + self.root) + self._add_db_data(media.metadata) + self.summary.append((file_path, file_path)) + + return self.summary + + def check_files(self): + result = True + for file_path in self._get_all_files(): + checksum = utils.checksum(file_path) + relpath = file_path.relative_to(self.root) + if checksum == self.db.get_checksum(relpath): + self.summary.append((file_path, file_path)) + else: + self.logger.error('{file_path} is corrupted') + self.summary.append((file_path, False)) + result = False + + return self.summary, result + + def update(self, loc, ignore_tags=set()): + file_paths = [x for x in self._get_all_files()] + db_rows = [row for row in self.db.get_rows('metadata')] + invalid_db_rows = set() + for db_row in db_rows: + abspath = self.root / db_row['FilePath'] + if abspath not in file_paths: + invalid_db_rows.add(db_row) + + for file_path in file_paths: + relpath = os.path.relpath(file_path, self.root) + # If file not in database + if relpath not in db_rows: + media = Media(file_path, self.root, ignore_tags=ignore_tags, + logger=self.logger, use_date_filename=self.use_date_filename, + use_file_dates=self.use_file_dates) + metadata = media.get_metadata(self.root, loc, self.db, self.cache) + media.metadata['file_path'] = relpath + # Check if file checksum is in invalid rows + row = [] + for row in invalid_db_rows: + if row['Checksum'] == media.metadata['checksum']: + # file have been moved without registering to db + media.metadata['src_path'] = row['SrcPath'] + # Check if row FilePath is a subpath of relpath + if relpath.startswith(row['FilePath']): + d = os.path.relpath(relpath, row['FilePath']) + media.metadata['subdirs'] = row['Subdirs'] + d + media.metadata['Filename'] = row['Filename'] + break + # set row attribute to the file + self._add_db_data(media.metadata) + self.summary.append((file_path, file_path)) + + # Finally delete invalid rows + for row in invalid_db_rows: + self.db.delete_filepath(row['FilePath']) + + return self.summary def sort_files(self, paths, loc, remove_duplicates=False, ignore_tags=set()): """ Sort files into appropriate folder """ + # Check db + if not self.check_db(): + self.logger.error('Db data is not accurate run `ordigi init`') + sys.exit(1) + result = False files_data = [] for path in paths: @@ -751,10 +850,16 @@ class Collection(object): def sort_similar_images(self, path, similarity=80): + # Check db + if not self.check_db(): + self.logger.error('Db data is not accurate run `ordigi init`') + sys.exit(1) + result = True path = self._check_path(path) images = set([ x for x in self._get_images(path) ]) i = Images(images, logger=self.logger) + nb_row_ini = self.db.len('metadata') for image in images: if not image.img_path.is_file(): continue @@ -794,14 +899,27 @@ class Collection(object): self.summary.append((img_path, False)) result = False + nb_row_end = self.db.len('metadata') + if nb_row_ini and nb_row_ini != nb_row_end: + self.logger.error('Nb of row have changed unexpectedly') + result = False + + if result: + result = self.check_db() + return self.summary, result def revert_compare(self, path): + if not self.check_db(): + self.logger.error('Db data is not accurate run `ordigi init`') + sys.exit(1) + result = True path = self._check_path(path) dirnames = set() moved_files = set() + nb_row_ini = self.db.len('metadata') for src_path in self._get_files_in_path(path, glob=self.glob, extensions=self.filter_by_ext): dirname = src_path.parent.name @@ -827,6 +945,14 @@ class Collection(object): except OSError as error: self.logger.error(error) + nb_row_end = self.db.len('metadata') + if nb_row_ini and nb_row_ini != nb_row_end: + self.logger.error('Nb of row have changed unexpectedly') + result = False + + if result: + result = self.check_db() + return self.summary, result diff --git a/ordigi/database.py b/ordigi/database.py index 414d1b6..066c01f 100644 --- a/ordigi/database.py +++ b/ordigi/database.py @@ -211,26 +211,26 @@ class Sqlite: return self.add_row(table, row_data) - def get_checksum(self, FilePath): - query = f"select Checksum from metadata where FilePath='{FilePath}'" + def get_checksum(self, file_path): + query = f"select Checksum from metadata where FilePath='{file_path}'" return self._run(query) - def get_metadata_data(self, FilePath, data): - query = f"select {data} from metadata where FilePath='{FilePath}'" + def get_metadata_data(self, file_path, data): + query = f"select {data} from metadata where FilePath='{file_path}'" return self._run(query) - def match_location(self, Latitude, Longitude): - query = f"""select 1 from location where Latitude='{Latitude}' - and Longitude='{Longitude}'""" + def match_location(self, latitude, longitude): + query = f"""select 1 from location where Latitude='{latitude}' + and Longitude='{longitude}'""" return self._run(query) - def get_location_data(self, LocationId, data): - query = f"select '{data}' from location where ROWID='{LocationId}'" + def get_location_data(self, location_id, data): + query = f"select '{data}' from location where ROWID='{location_id}'" return self._run(query) - def get_location(self, Latitude, Longitude, column): - query = f"""select {column} from location where Latitude='{Latitude}' - and Longitude='{Longitude}'""" + def get_location(self, latitude, longitude, column): + query = f"""select {column} from location where Latitude='{latitude}' + and Longitude='{longitude}'""" return self._run(query) def _get_table(self, table): @@ -283,4 +283,15 @@ class Sqlite: self.cur.execute(sql) self.con.commit() + def len(self, table): + sql = f'select count() from {table}' + return self._run(sql) + def get_rows(self, table): + """Cycle through rows in table + :params: str + :return: iter + """ + self.cur.execute(f'select * from {table}') + for row in self.cur: + yield row diff --git a/tests/test_collection.py b/tests/test_collection.py index 6062990..3bc1737 100644 --- a/tests/test_collection.py +++ b/tests/test_collection.py @@ -26,6 +26,7 @@ class TestCollection: def setup_class(cls, sample_files_paths): cls.src_path, cls.file_paths = sample_files_paths cls.path_format = constants.default_path + '/' + constants.default_name + cls.logger = log.get_logger(True, True) def teardown_class(self): terminate_exiftool() @@ -113,7 +114,8 @@ class TestCollection: assert part == '', file_path def test_sort_files(self, tmp_path): - collection = Collection(tmp_path, self.path_format, album_from_folder=True) + collection = Collection(tmp_path, self.path_format, + album_from_folder=True, logger=self.logger) loc = GeoLocation() summary, result = collection.sort_files([self.src_path], loc) @@ -121,6 +123,10 @@ class TestCollection: assert summary, summary assert result, result + summary, result = collection.check_files() + assert summary, summary + assert result, result + for file_path in tmp_path.glob('**/*'): if '.db' not in str(file_path): media = Media(file_path, tmp_path, album_from_folder=True) @@ -130,12 +136,21 @@ class TestCollection: # test with populated dest dir randomize_files(tmp_path) + summary, result = collection.check_files() + assert summary, summary + assert not result, result + + collection = Collection(tmp_path, None, mode='move', logger=self.logger) + summary = collection.update(loc) + assert summary, summary + collection = Collection(tmp_path, self.path_format, album_from_folder=True) loc = GeoLocation() summary, result = collection.sort_files([self.src_path], loc) assert summary, summary assert result, result + # TODO check if path follow path_format def test_sort_files_invalid_db(self, tmp_path): @@ -183,8 +198,9 @@ class TestCollection: def test_sort_similar_images(self, tmp_path): path = tmp_path / 'collection' shutil.copytree(self.src_path, path) - logger = log.get_logger(True, True) - collection = Collection(path, None, mode='move', logger=logger) + collection = Collection(path, None, mode='move', logger=self.logger) + loc = GeoLocation() + summary = collection.init(loc) summary, result = collection.sort_similar_images(path, similarity=60) # Summary is created and there is no errors