Verify checksum in check_db
This commit is contained in:
parent
ed58383ea0
commit
9e32052ce3
|
@ -819,7 +819,7 @@ class Collection(SortMedias):
|
||||||
def init(self, loc):
|
def init(self, loc):
|
||||||
"""Init collection db"""
|
"""Init collection db"""
|
||||||
for file_path in self.get_collection_files():
|
for file_path in self.get_collection_files():
|
||||||
metadata = self.medias.get_metadata(file_path, self.root, loc)
|
metadata = self.medias.get_metadata(file_path, self.root, loc=loc)
|
||||||
metadata['file_path'] = os.path.relpath(file_path, self.root)
|
metadata['file_path'] = os.path.relpath(file_path, self.root)
|
||||||
|
|
||||||
self.db.add_file_data(metadata)
|
self.db.add_file_data(metadata)
|
||||||
|
@ -827,6 +827,46 @@ class Collection(SortMedias):
|
||||||
|
|
||||||
return self.summary
|
return self.summary
|
||||||
|
|
||||||
|
def check_files(self):
|
||||||
|
"""Check file integrity."""
|
||||||
|
for file_path in self.paths.get_files(self.root):
|
||||||
|
checksum = utils.checksum(file_path)
|
||||||
|
relpath = file_path.relative_to(self.root)
|
||||||
|
if checksum == self.db.sqlite.get_checksum(relpath):
|
||||||
|
self.summary.append('check', True, file_path)
|
||||||
|
else:
|
||||||
|
self.log.error(f'{file_path} is corrupted')
|
||||||
|
self.summary.append('check', False, file_path)
|
||||||
|
|
||||||
|
return self.summary
|
||||||
|
|
||||||
|
def file_in_db(self, file_path, db_rows):
|
||||||
|
# Assuming file_path are inside collection root dir
|
||||||
|
relpath = os.path.relpath(file_path, self.root)
|
||||||
|
|
||||||
|
# If file not in database
|
||||||
|
if relpath not in db_rows:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _check_file(self, file_path, file_checksum):
|
||||||
|
"""Check if file checksum as changed"""
|
||||||
|
relpath = os.path.relpath(file_path, self.root)
|
||||||
|
db_checksum = self.db.sqlite.get_checksum(relpath)
|
||||||
|
# Check if checksum match
|
||||||
|
if not db_checksum:
|
||||||
|
return None
|
||||||
|
|
||||||
|
if db_checksum != file_checksum:
|
||||||
|
self.log.warning(f'{file_path} checksum as changed')
|
||||||
|
self.log.info(
|
||||||
|
f'file_checksum={file_checksum},\ndb_checksum={db_checksum}'
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
def check_db(self):
|
def check_db(self):
|
||||||
"""
|
"""
|
||||||
Check if db FilePath match to collection filesystem
|
Check if db FilePath match to collection filesystem
|
||||||
|
@ -835,14 +875,20 @@ class Collection(SortMedias):
|
||||||
file_paths = list(self.get_collection_files())
|
file_paths = list(self.get_collection_files())
|
||||||
db_rows = [row['FilePath'] for row in self.db.sqlite.get_rows('metadata')]
|
db_rows = [row['FilePath'] for row in self.db.sqlite.get_rows('metadata')]
|
||||||
for file_path in file_paths:
|
for file_path in file_paths:
|
||||||
# Assuming file_path are inside collection root dir
|
result = self.file_in_db(file_path, db_rows)
|
||||||
relpath = os.path.relpath(file_path, self.root)
|
checksum = utils.checksum(file_path)
|
||||||
|
if not result:
|
||||||
# If file not in database
|
|
||||||
if relpath not in db_rows:
|
|
||||||
self.log.error('Db data is not accurate')
|
self.log.error('Db data is not accurate')
|
||||||
self.log.info(f'{file_path} not in db')
|
self.log.info(f'{file_path} not in db')
|
||||||
return False
|
return False
|
||||||
|
elif not self._check_file(file_path, checksum):
|
||||||
|
# We d'ont want to silently ignore or correct this without
|
||||||
|
# resetting the cache as is could be due to file corruption
|
||||||
|
self.log.error(f'modified or corrupted file.')
|
||||||
|
self.log.info(
|
||||||
|
'Use ordigi update --checksum or --reset-cache, check database integrity or try to restore the file'
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
nb_files = len(file_paths)
|
nb_files = len(file_paths)
|
||||||
nb_row = len(db_rows)
|
nb_row = len(db_rows)
|
||||||
|
@ -886,7 +932,7 @@ class Collection(SortMedias):
|
||||||
|
|
||||||
return self.summary
|
return self.summary
|
||||||
|
|
||||||
def update(self, loc):
|
def update(self, loc, update_checksum=False):
|
||||||
"""Update collection db"""
|
"""Update collection db"""
|
||||||
file_paths = list(self.get_collection_files())
|
file_paths = list(self.get_collection_files())
|
||||||
db_rows = list(self.db.sqlite.get_rows('metadata'))
|
db_rows = list(self.db.sqlite.get_rows('metadata'))
|
||||||
|
@ -901,9 +947,22 @@ class Collection(SortMedias):
|
||||||
|
|
||||||
for file_path in file_paths:
|
for file_path in file_paths:
|
||||||
relpath = os.path.relpath(file_path, self.root)
|
relpath = os.path.relpath(file_path, self.root)
|
||||||
|
metadata = {}
|
||||||
|
|
||||||
|
checksum = utils.checksum(file_path)
|
||||||
|
if not self._check_file(file_path, checksum) and update_checksum:
|
||||||
|
# metatata will fill checksum from file
|
||||||
|
metadata = self.medias.get_metadata(
|
||||||
|
file_path, self.root, checksum, loc=loc
|
||||||
|
)
|
||||||
|
metadata['file_path'] = relpath
|
||||||
|
# set row attribute to the file
|
||||||
|
self.db.add_file_data(metadata)
|
||||||
|
self.summary.append('update', file_path)
|
||||||
|
|
||||||
# If file not in database
|
# If file not in database
|
||||||
if relpath not in db_paths:
|
if relpath not in db_paths:
|
||||||
metadata = self.medias.get_metadata(file_path, self.root, loc)
|
metadata = self.medias.get_metadata(file_path, self.root, loc=loc)
|
||||||
metadata['file_path'] = relpath
|
metadata['file_path'] = relpath
|
||||||
# Check if file checksum is in invalid rows
|
# Check if file checksum is in invalid rows
|
||||||
row = []
|
row = []
|
||||||
|
@ -927,19 +986,6 @@ class Collection(SortMedias):
|
||||||
|
|
||||||
return self.summary
|
return self.summary
|
||||||
|
|
||||||
def check_files(self):
|
|
||||||
"""Check file integrity."""
|
|
||||||
for file_path in self.paths.get_files(self.root):
|
|
||||||
checksum = utils.checksum(file_path)
|
|
||||||
relpath = file_path.relative_to(self.root)
|
|
||||||
if checksum == self.db.sqlite.get_checksum(relpath):
|
|
||||||
self.summary.append('check', True, file_path)
|
|
||||||
else:
|
|
||||||
self.log.error(f'{file_path} is corrupted')
|
|
||||||
self.summary.append('check', False, file_path)
|
|
||||||
|
|
||||||
return self.summary
|
|
||||||
|
|
||||||
def set_utime_from_metadata(self, date_media, file_path):
|
def set_utime_from_metadata(self, date_media, file_path):
|
||||||
"""Set the modification time on the file based on the file name."""
|
"""Set the modification time on the file based on the file name."""
|
||||||
|
|
||||||
|
|
|
@ -279,6 +279,7 @@ class Media(ReadExif):
|
||||||
ignore_tags=None,
|
ignore_tags=None,
|
||||||
interactive=False,
|
interactive=False,
|
||||||
cache=True,
|
cache=True,
|
||||||
|
checksum=None,
|
||||||
use_date_filename=False,
|
use_date_filename=False,
|
||||||
use_file_dates=False,
|
use_file_dates=False,
|
||||||
):
|
):
|
||||||
|
@ -292,6 +293,11 @@ class Media(ReadExif):
|
||||||
|
|
||||||
self.album_from_folder = album_from_folder
|
self.album_from_folder = album_from_folder
|
||||||
self.cache = cache
|
self.cache = cache
|
||||||
|
if checksum:
|
||||||
|
self.checksum = checksum
|
||||||
|
else:
|
||||||
|
self.checksum = utils.checksum(file_path)
|
||||||
|
|
||||||
self.interactive = interactive
|
self.interactive = interactive
|
||||||
self.log = LOG.getChild(self.__class__.__name__)
|
self.log = LOG.getChild(self.__class__.__name__)
|
||||||
self.metadata = None
|
self.metadata = None
|
||||||
|
@ -527,30 +533,6 @@ class Media(ReadExif):
|
||||||
|
|
||||||
return db.get_metadata(relpath, 'LocationId')
|
return db.get_metadata(relpath, 'LocationId')
|
||||||
|
|
||||||
def _check_file(self, db, root, check=True):
|
|
||||||
"""Check if file_path is a subpath of root"""
|
|
||||||
|
|
||||||
if str(self.file_path).startswith(str(root)):
|
|
||||||
relpath = os.path.relpath(self.file_path, root)
|
|
||||||
db_checksum = db.get_checksum(relpath)
|
|
||||||
file_checksum = self.metadata['checksum']
|
|
||||||
# Check if checksum match
|
|
||||||
if check and db_checksum and db_checksum != file_checksum:
|
|
||||||
self.log.error(f'{self.file_path} checksum has changed, modified or corrupted file.')
|
|
||||||
self.log.error(
|
|
||||||
f'file_checksum={file_checksum},\ndb_checksum={db_checksum}'
|
|
||||||
)
|
|
||||||
self.log.info(
|
|
||||||
'Use ordigi update --checksum or --reset-cache, check database integrity or try to restore the file'
|
|
||||||
)
|
|
||||||
# We d'ont want to silently ignore or correct this without
|
|
||||||
# resetting the cache as is could be due to file corruption
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
return relpath, db_checksum
|
|
||||||
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
def set_location_from_db(self, location_id, db):
|
def set_location_from_db(self, location_id, db):
|
||||||
|
|
||||||
self.metadata['location_id'] = location_id
|
self.metadata['location_id'] = location_id
|
||||||
|
@ -604,13 +586,13 @@ class Media(ReadExif):
|
||||||
if not album or album == '':
|
if not album or album == '':
|
||||||
self.metadata['album'] = folder
|
self.metadata['album'] = folder
|
||||||
|
|
||||||
def get_metadata(self, root, loc=None, db=None, cache=False, check=True):
|
def get_metadata(self, root, loc=None, db=None, cache=False):
|
||||||
"""
|
"""
|
||||||
Get a dictionary of metadata from exif.
|
Get a dictionary of metadata from exif.
|
||||||
All keys will be present and have a value of None if not obtained.
|
All keys will be present and have a value of None if not obtained.
|
||||||
"""
|
"""
|
||||||
self.metadata = {}
|
self.metadata = {}
|
||||||
self.metadata['checksum'] = utils.checksum(self.file_path)
|
self.metadata['checksum'] = self.checksum
|
||||||
|
|
||||||
db_checksum = False
|
db_checksum = False
|
||||||
location_id = None
|
location_id = None
|
||||||
|
@ -621,7 +603,6 @@ class Media(ReadExif):
|
||||||
location_id = self._set_metadata_from_db(db, relpath)
|
location_id = self._set_metadata_from_db(db, relpath)
|
||||||
self.set_location_from_db(location_id, db)
|
self.set_location_from_db(location_id, db)
|
||||||
else:
|
else:
|
||||||
# file not in db
|
|
||||||
self.metadata['src_dir'] = str(self.src_dir)
|
self.metadata['src_dir'] = str(self.src_dir)
|
||||||
self.metadata['subdirs'] = str(
|
self.metadata['subdirs'] = str(
|
||||||
self.file_path.relative_to(self.src_dir).parent
|
self.file_path.relative_to(self.src_dir).parent
|
||||||
|
@ -688,7 +669,7 @@ class Medias:
|
||||||
self.datas = {}
|
self.datas = {}
|
||||||
self.theme = request.load_theme()
|
self.theme = request.load_theme()
|
||||||
|
|
||||||
def get_media(self, file_path, src_dir):
|
def get_media(self, file_path, src_dir, checksum=None):
|
||||||
media = Media(
|
media = Media(
|
||||||
file_path,
|
file_path,
|
||||||
src_dir,
|
src_dir,
|
||||||
|
@ -696,23 +677,24 @@ class Medias:
|
||||||
self.exif_opt['ignore_tags'],
|
self.exif_opt['ignore_tags'],
|
||||||
self.interactive,
|
self.interactive,
|
||||||
self.exif_opt['cache'],
|
self.exif_opt['cache'],
|
||||||
|
checksum,
|
||||||
self.exif_opt['use_date_filename'],
|
self.exif_opt['use_date_filename'],
|
||||||
self.exif_opt['use_file_dates'],
|
self.exif_opt['use_file_dates'],
|
||||||
)
|
)
|
||||||
|
|
||||||
return media
|
return media
|
||||||
|
|
||||||
def get_media_data(self, file_path, src_dir, loc=None, check=True):
|
def get_media_data(self, file_path, src_dir, checksum=None, loc=None):
|
||||||
media = self.get_media(file_path, src_dir)
|
media = self.get_media(file_path, src_dir, checksum)
|
||||||
media.get_metadata(
|
media.get_metadata(
|
||||||
self.root, loc, self.db.sqlite, self.exif_opt['cache'], check
|
self.root, loc, self.db.sqlite, self.exif_opt['cache']
|
||||||
)
|
)
|
||||||
|
|
||||||
return media
|
return media
|
||||||
|
|
||||||
def get_metadata(self, src_path, src_dir, loc=None, check=True):
|
def get_metadata(self, src_path, src_dir, checksum=None, loc=None):
|
||||||
"""Get metadata"""
|
"""Get metadata"""
|
||||||
return self.get_media_data(src_path, src_dir, loc, check).metadata
|
return self.get_media_data(src_path, src_dir, checksum, loc).metadata
|
||||||
|
|
||||||
def get_paths(self, src_dirs, imp=False):
|
def get_paths(self, src_dirs, imp=False):
|
||||||
"""Get paths"""
|
"""Get paths"""
|
||||||
|
@ -739,7 +721,7 @@ class Medias:
|
||||||
"""Get medias datas"""
|
"""Get medias datas"""
|
||||||
for src_dir, src_path in self.get_paths(src_dirs, imp=imp):
|
for src_dir, src_path in self.get_paths(src_dirs, imp=imp):
|
||||||
# Get file metadata
|
# Get file metadata
|
||||||
media = self.get_media_data(src_path, src_dir, loc)
|
media = self.get_media_data(src_path, src_dir, loc=loc)
|
||||||
|
|
||||||
yield src_path, media
|
yield src_path, media
|
||||||
|
|
||||||
|
@ -747,7 +729,7 @@ class Medias:
|
||||||
"""Get medias data"""
|
"""Get medias data"""
|
||||||
for src_dir, src_path in self.get_paths(src_dirs, imp=imp):
|
for src_dir, src_path in self.get_paths(src_dirs, imp=imp):
|
||||||
# Get file metadata
|
# Get file metadata
|
||||||
metadata = self.get_metadata(src_path, src_dir, loc)
|
metadata = self.get_metadata(src_path, src_dir, loc=loc)
|
||||||
|
|
||||||
yield src_path, metadata
|
yield src_path, metadata
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue