Verify checksum in check_db
This commit is contained in:
parent
ed58383ea0
commit
9e32052ce3
|
@ -819,7 +819,7 @@ class Collection(SortMedias):
|
|||
def init(self, loc):
|
||||
"""Init collection db"""
|
||||
for file_path in self.get_collection_files():
|
||||
metadata = self.medias.get_metadata(file_path, self.root, loc)
|
||||
metadata = self.medias.get_metadata(file_path, self.root, loc=loc)
|
||||
metadata['file_path'] = os.path.relpath(file_path, self.root)
|
||||
|
||||
self.db.add_file_data(metadata)
|
||||
|
@ -827,6 +827,46 @@ class Collection(SortMedias):
|
|||
|
||||
return self.summary
|
||||
|
||||
def check_files(self):
|
||||
"""Check file integrity."""
|
||||
for file_path in self.paths.get_files(self.root):
|
||||
checksum = utils.checksum(file_path)
|
||||
relpath = file_path.relative_to(self.root)
|
||||
if checksum == self.db.sqlite.get_checksum(relpath):
|
||||
self.summary.append('check', True, file_path)
|
||||
else:
|
||||
self.log.error(f'{file_path} is corrupted')
|
||||
self.summary.append('check', False, file_path)
|
||||
|
||||
return self.summary
|
||||
|
||||
def file_in_db(self, file_path, db_rows):
|
||||
# Assuming file_path are inside collection root dir
|
||||
relpath = os.path.relpath(file_path, self.root)
|
||||
|
||||
# If file not in database
|
||||
if relpath not in db_rows:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _check_file(self, file_path, file_checksum):
|
||||
"""Check if file checksum as changed"""
|
||||
relpath = os.path.relpath(file_path, self.root)
|
||||
db_checksum = self.db.sqlite.get_checksum(relpath)
|
||||
# Check if checksum match
|
||||
if not db_checksum:
|
||||
return None
|
||||
|
||||
if db_checksum != file_checksum:
|
||||
self.log.warning(f'{file_path} checksum as changed')
|
||||
self.log.info(
|
||||
f'file_checksum={file_checksum},\ndb_checksum={db_checksum}'
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def check_db(self):
|
||||
"""
|
||||
Check if db FilePath match to collection filesystem
|
||||
|
@ -835,14 +875,20 @@ class Collection(SortMedias):
|
|||
file_paths = list(self.get_collection_files())
|
||||
db_rows = [row['FilePath'] for row in self.db.sqlite.get_rows('metadata')]
|
||||
for file_path in file_paths:
|
||||
# Assuming file_path are inside collection root dir
|
||||
relpath = os.path.relpath(file_path, self.root)
|
||||
|
||||
# If file not in database
|
||||
if relpath not in db_rows:
|
||||
result = self.file_in_db(file_path, db_rows)
|
||||
checksum = utils.checksum(file_path)
|
||||
if not result:
|
||||
self.log.error('Db data is not accurate')
|
||||
self.log.info(f'{file_path} not in db')
|
||||
return False
|
||||
elif not self._check_file(file_path, checksum):
|
||||
# We d'ont want to silently ignore or correct this without
|
||||
# resetting the cache as is could be due to file corruption
|
||||
self.log.error(f'modified or corrupted file.')
|
||||
self.log.info(
|
||||
'Use ordigi update --checksum or --reset-cache, check database integrity or try to restore the file'
|
||||
)
|
||||
return False
|
||||
|
||||
nb_files = len(file_paths)
|
||||
nb_row = len(db_rows)
|
||||
|
@ -886,7 +932,7 @@ class Collection(SortMedias):
|
|||
|
||||
return self.summary
|
||||
|
||||
def update(self, loc):
|
||||
def update(self, loc, update_checksum=False):
|
||||
"""Update collection db"""
|
||||
file_paths = list(self.get_collection_files())
|
||||
db_rows = list(self.db.sqlite.get_rows('metadata'))
|
||||
|
@ -901,9 +947,22 @@ class Collection(SortMedias):
|
|||
|
||||
for file_path in file_paths:
|
||||
relpath = os.path.relpath(file_path, self.root)
|
||||
metadata = {}
|
||||
|
||||
checksum = utils.checksum(file_path)
|
||||
if not self._check_file(file_path, checksum) and update_checksum:
|
||||
# metatata will fill checksum from file
|
||||
metadata = self.medias.get_metadata(
|
||||
file_path, self.root, checksum, loc=loc
|
||||
)
|
||||
metadata['file_path'] = relpath
|
||||
# set row attribute to the file
|
||||
self.db.add_file_data(metadata)
|
||||
self.summary.append('update', file_path)
|
||||
|
||||
# If file not in database
|
||||
if relpath not in db_paths:
|
||||
metadata = self.medias.get_metadata(file_path, self.root, loc)
|
||||
metadata = self.medias.get_metadata(file_path, self.root, loc=loc)
|
||||
metadata['file_path'] = relpath
|
||||
# Check if file checksum is in invalid rows
|
||||
row = []
|
||||
|
@ -927,19 +986,6 @@ class Collection(SortMedias):
|
|||
|
||||
return self.summary
|
||||
|
||||
def check_files(self):
|
||||
"""Check file integrity."""
|
||||
for file_path in self.paths.get_files(self.root):
|
||||
checksum = utils.checksum(file_path)
|
||||
relpath = file_path.relative_to(self.root)
|
||||
if checksum == self.db.sqlite.get_checksum(relpath):
|
||||
self.summary.append('check', True, file_path)
|
||||
else:
|
||||
self.log.error(f'{file_path} is corrupted')
|
||||
self.summary.append('check', False, file_path)
|
||||
|
||||
return self.summary
|
||||
|
||||
def set_utime_from_metadata(self, date_media, file_path):
|
||||
"""Set the modification time on the file based on the file name."""
|
||||
|
||||
|
|
|
@ -279,6 +279,7 @@ class Media(ReadExif):
|
|||
ignore_tags=None,
|
||||
interactive=False,
|
||||
cache=True,
|
||||
checksum=None,
|
||||
use_date_filename=False,
|
||||
use_file_dates=False,
|
||||
):
|
||||
|
@ -292,6 +293,11 @@ class Media(ReadExif):
|
|||
|
||||
self.album_from_folder = album_from_folder
|
||||
self.cache = cache
|
||||
if checksum:
|
||||
self.checksum = checksum
|
||||
else:
|
||||
self.checksum = utils.checksum(file_path)
|
||||
|
||||
self.interactive = interactive
|
||||
self.log = LOG.getChild(self.__class__.__name__)
|
||||
self.metadata = None
|
||||
|
@ -527,30 +533,6 @@ class Media(ReadExif):
|
|||
|
||||
return db.get_metadata(relpath, 'LocationId')
|
||||
|
||||
def _check_file(self, db, root, check=True):
|
||||
"""Check if file_path is a subpath of root"""
|
||||
|
||||
if str(self.file_path).startswith(str(root)):
|
||||
relpath = os.path.relpath(self.file_path, root)
|
||||
db_checksum = db.get_checksum(relpath)
|
||||
file_checksum = self.metadata['checksum']
|
||||
# Check if checksum match
|
||||
if check and db_checksum and db_checksum != file_checksum:
|
||||
self.log.error(f'{self.file_path} checksum has changed, modified or corrupted file.')
|
||||
self.log.error(
|
||||
f'file_checksum={file_checksum},\ndb_checksum={db_checksum}'
|
||||
)
|
||||
self.log.info(
|
||||
'Use ordigi update --checksum or --reset-cache, check database integrity or try to restore the file'
|
||||
)
|
||||
# We d'ont want to silently ignore or correct this without
|
||||
# resetting the cache as is could be due to file corruption
|
||||
sys.exit(1)
|
||||
|
||||
return relpath, db_checksum
|
||||
|
||||
return None, None
|
||||
|
||||
def set_location_from_db(self, location_id, db):
|
||||
|
||||
self.metadata['location_id'] = location_id
|
||||
|
@ -604,13 +586,13 @@ class Media(ReadExif):
|
|||
if not album or album == '':
|
||||
self.metadata['album'] = folder
|
||||
|
||||
def get_metadata(self, root, loc=None, db=None, cache=False, check=True):
|
||||
def get_metadata(self, root, loc=None, db=None, cache=False):
|
||||
"""
|
||||
Get a dictionary of metadata from exif.
|
||||
All keys will be present and have a value of None if not obtained.
|
||||
"""
|
||||
self.metadata = {}
|
||||
self.metadata['checksum'] = utils.checksum(self.file_path)
|
||||
self.metadata['checksum'] = self.checksum
|
||||
|
||||
db_checksum = False
|
||||
location_id = None
|
||||
|
@ -621,7 +603,6 @@ class Media(ReadExif):
|
|||
location_id = self._set_metadata_from_db(db, relpath)
|
||||
self.set_location_from_db(location_id, db)
|
||||
else:
|
||||
# file not in db
|
||||
self.metadata['src_dir'] = str(self.src_dir)
|
||||
self.metadata['subdirs'] = str(
|
||||
self.file_path.relative_to(self.src_dir).parent
|
||||
|
@ -688,7 +669,7 @@ class Medias:
|
|||
self.datas = {}
|
||||
self.theme = request.load_theme()
|
||||
|
||||
def get_media(self, file_path, src_dir):
|
||||
def get_media(self, file_path, src_dir, checksum=None):
|
||||
media = Media(
|
||||
file_path,
|
||||
src_dir,
|
||||
|
@ -696,23 +677,24 @@ class Medias:
|
|||
self.exif_opt['ignore_tags'],
|
||||
self.interactive,
|
||||
self.exif_opt['cache'],
|
||||
checksum,
|
||||
self.exif_opt['use_date_filename'],
|
||||
self.exif_opt['use_file_dates'],
|
||||
)
|
||||
|
||||
return media
|
||||
|
||||
def get_media_data(self, file_path, src_dir, loc=None, check=True):
|
||||
media = self.get_media(file_path, src_dir)
|
||||
def get_media_data(self, file_path, src_dir, checksum=None, loc=None):
|
||||
media = self.get_media(file_path, src_dir, checksum)
|
||||
media.get_metadata(
|
||||
self.root, loc, self.db.sqlite, self.exif_opt['cache'], check
|
||||
self.root, loc, self.db.sqlite, self.exif_opt['cache']
|
||||
)
|
||||
|
||||
return media
|
||||
|
||||
def get_metadata(self, src_path, src_dir, loc=None, check=True):
|
||||
def get_metadata(self, src_path, src_dir, checksum=None, loc=None):
|
||||
"""Get metadata"""
|
||||
return self.get_media_data(src_path, src_dir, loc, check).metadata
|
||||
return self.get_media_data(src_path, src_dir, checksum, loc).metadata
|
||||
|
||||
def get_paths(self, src_dirs, imp=False):
|
||||
"""Get paths"""
|
||||
|
@ -739,7 +721,7 @@ class Medias:
|
|||
"""Get medias datas"""
|
||||
for src_dir, src_path in self.get_paths(src_dirs, imp=imp):
|
||||
# Get file metadata
|
||||
media = self.get_media_data(src_path, src_dir, loc)
|
||||
media = self.get_media_data(src_path, src_dir, loc=loc)
|
||||
|
||||
yield src_path, media
|
||||
|
||||
|
@ -747,7 +729,7 @@ class Medias:
|
|||
"""Get medias data"""
|
||||
for src_dir, src_path in self.get_paths(src_dirs, imp=imp):
|
||||
# Get file metadata
|
||||
metadata = self.get_metadata(src_path, src_dir, loc)
|
||||
metadata = self.get_metadata(src_path, src_dir, loc=loc)
|
||||
|
||||
yield src_path, metadata
|
||||
|
||||
|
|
Loading…
Reference in New Issue