diff --git a/ordigi/collection.py b/ordigi/collection.py index a852e92..a2a3734 100644 --- a/ordigi/collection.py +++ b/ordigi/collection.py @@ -819,7 +819,7 @@ class Collection(SortMedias): def init(self, loc): """Init collection db""" for file_path in self.get_collection_files(): - metadata = self.medias.get_metadata(file_path, self.root, loc) + metadata = self.medias.get_metadata(file_path, self.root, loc=loc) metadata['file_path'] = os.path.relpath(file_path, self.root) self.db.add_file_data(metadata) @@ -827,6 +827,46 @@ class Collection(SortMedias): return self.summary + def check_files(self): + """Check file integrity.""" + for file_path in self.paths.get_files(self.root): + checksum = utils.checksum(file_path) + relpath = file_path.relative_to(self.root) + if checksum == self.db.sqlite.get_checksum(relpath): + self.summary.append('check', True, file_path) + else: + self.log.error(f'{file_path} is corrupted') + self.summary.append('check', False, file_path) + + return self.summary + + def file_in_db(self, file_path, db_rows): + # Assuming file_path are inside collection root dir + relpath = os.path.relpath(file_path, self.root) + + # If file not in database + if relpath not in db_rows: + return False + + return True + + def _check_file(self, file_path, file_checksum): + """Check if file checksum as changed""" + relpath = os.path.relpath(file_path, self.root) + db_checksum = self.db.sqlite.get_checksum(relpath) + # Check if checksum match + if not db_checksum: + return None + + if db_checksum != file_checksum: + self.log.warning(f'{file_path} checksum as changed') + self.log.info( + f'file_checksum={file_checksum},\ndb_checksum={db_checksum}' + ) + return False + + return True + def check_db(self): """ Check if db FilePath match to collection filesystem @@ -835,14 +875,20 @@ class Collection(SortMedias): file_paths = list(self.get_collection_files()) db_rows = [row['FilePath'] for row in self.db.sqlite.get_rows('metadata')] for file_path in file_paths: - # Assuming file_path are inside collection root dir - relpath = os.path.relpath(file_path, self.root) - - # If file not in database - if relpath not in db_rows: + result = self.file_in_db(file_path, db_rows) + checksum = utils.checksum(file_path) + if not result: self.log.error('Db data is not accurate') self.log.info(f'{file_path} not in db') return False + elif not self._check_file(file_path, checksum): + # We d'ont want to silently ignore or correct this without + # resetting the cache as is could be due to file corruption + self.log.error(f'modified or corrupted file.') + self.log.info( + 'Use ordigi update --checksum or --reset-cache, check database integrity or try to restore the file' + ) + return False nb_files = len(file_paths) nb_row = len(db_rows) @@ -886,7 +932,7 @@ class Collection(SortMedias): return self.summary - def update(self, loc): + def update(self, loc, update_checksum=False): """Update collection db""" file_paths = list(self.get_collection_files()) db_rows = list(self.db.sqlite.get_rows('metadata')) @@ -901,9 +947,22 @@ class Collection(SortMedias): for file_path in file_paths: relpath = os.path.relpath(file_path, self.root) + metadata = {} + + checksum = utils.checksum(file_path) + if not self._check_file(file_path, checksum) and update_checksum: + # metatata will fill checksum from file + metadata = self.medias.get_metadata( + file_path, self.root, checksum, loc=loc + ) + metadata['file_path'] = relpath + # set row attribute to the file + self.db.add_file_data(metadata) + self.summary.append('update', file_path) + # If file not in database if relpath not in db_paths: - metadata = self.medias.get_metadata(file_path, self.root, loc) + metadata = self.medias.get_metadata(file_path, self.root, loc=loc) metadata['file_path'] = relpath # Check if file checksum is in invalid rows row = [] @@ -927,19 +986,6 @@ class Collection(SortMedias): return self.summary - def check_files(self): - """Check file integrity.""" - for file_path in self.paths.get_files(self.root): - checksum = utils.checksum(file_path) - relpath = file_path.relative_to(self.root) - if checksum == self.db.sqlite.get_checksum(relpath): - self.summary.append('check', True, file_path) - else: - self.log.error(f'{file_path} is corrupted') - self.summary.append('check', False, file_path) - - return self.summary - def set_utime_from_metadata(self, date_media, file_path): """Set the modification time on the file based on the file name.""" diff --git a/ordigi/media.py b/ordigi/media.py index 363f553..9860a3e 100644 --- a/ordigi/media.py +++ b/ordigi/media.py @@ -279,6 +279,7 @@ class Media(ReadExif): ignore_tags=None, interactive=False, cache=True, + checksum=None, use_date_filename=False, use_file_dates=False, ): @@ -292,6 +293,11 @@ class Media(ReadExif): self.album_from_folder = album_from_folder self.cache = cache + if checksum: + self.checksum = checksum + else: + self.checksum = utils.checksum(file_path) + self.interactive = interactive self.log = LOG.getChild(self.__class__.__name__) self.metadata = None @@ -527,30 +533,6 @@ class Media(ReadExif): return db.get_metadata(relpath, 'LocationId') - def _check_file(self, db, root, check=True): - """Check if file_path is a subpath of root""" - - if str(self.file_path).startswith(str(root)): - relpath = os.path.relpath(self.file_path, root) - db_checksum = db.get_checksum(relpath) - file_checksum = self.metadata['checksum'] - # Check if checksum match - if check and db_checksum and db_checksum != file_checksum: - self.log.error(f'{self.file_path} checksum has changed, modified or corrupted file.') - self.log.error( - f'file_checksum={file_checksum},\ndb_checksum={db_checksum}' - ) - self.log.info( - 'Use ordigi update --checksum or --reset-cache, check database integrity or try to restore the file' - ) - # We d'ont want to silently ignore or correct this without - # resetting the cache as is could be due to file corruption - sys.exit(1) - - return relpath, db_checksum - - return None, None - def set_location_from_db(self, location_id, db): self.metadata['location_id'] = location_id @@ -604,13 +586,13 @@ class Media(ReadExif): if not album or album == '': self.metadata['album'] = folder - def get_metadata(self, root, loc=None, db=None, cache=False, check=True): + def get_metadata(self, root, loc=None, db=None, cache=False): """ Get a dictionary of metadata from exif. All keys will be present and have a value of None if not obtained. """ self.metadata = {} - self.metadata['checksum'] = utils.checksum(self.file_path) + self.metadata['checksum'] = self.checksum db_checksum = False location_id = None @@ -621,7 +603,6 @@ class Media(ReadExif): location_id = self._set_metadata_from_db(db, relpath) self.set_location_from_db(location_id, db) else: - # file not in db self.metadata['src_dir'] = str(self.src_dir) self.metadata['subdirs'] = str( self.file_path.relative_to(self.src_dir).parent @@ -688,7 +669,7 @@ class Medias: self.datas = {} self.theme = request.load_theme() - def get_media(self, file_path, src_dir): + def get_media(self, file_path, src_dir, checksum=None): media = Media( file_path, src_dir, @@ -696,23 +677,24 @@ class Medias: self.exif_opt['ignore_tags'], self.interactive, self.exif_opt['cache'], + checksum, self.exif_opt['use_date_filename'], self.exif_opt['use_file_dates'], ) return media - def get_media_data(self, file_path, src_dir, loc=None, check=True): - media = self.get_media(file_path, src_dir) + def get_media_data(self, file_path, src_dir, checksum=None, loc=None): + media = self.get_media(file_path, src_dir, checksum) media.get_metadata( - self.root, loc, self.db.sqlite, self.exif_opt['cache'], check + self.root, loc, self.db.sqlite, self.exif_opt['cache'] ) return media - def get_metadata(self, src_path, src_dir, loc=None, check=True): + def get_metadata(self, src_path, src_dir, checksum=None, loc=None): """Get metadata""" - return self.get_media_data(src_path, src_dir, loc, check).metadata + return self.get_media_data(src_path, src_dir, checksum, loc).metadata def get_paths(self, src_dirs, imp=False): """Get paths""" @@ -739,7 +721,7 @@ class Medias: """Get medias datas""" for src_dir, src_path in self.get_paths(src_dirs, imp=imp): # Get file metadata - media = self.get_media_data(src_path, src_dir, loc) + media = self.get_media_data(src_path, src_dir, loc=loc) yield src_path, media @@ -747,7 +729,7 @@ class Medias: """Get medias data""" for src_dir, src_path in self.get_paths(src_dirs, imp=imp): # Get file metadata - metadata = self.get_metadata(src_path, src_dir, loc) + metadata = self.get_metadata(src_path, src_dir, loc=loc) yield src_path, metadata