format python code with black --skip-string-normalization

This commit is contained in:
Cédric Leporcq 2021-10-15 19:56:50 +02:00
parent 1cade46307
commit a93e7accc0
12 changed files with 612 additions and 365 deletions

297
ordigi.py
View File

@ -16,27 +16,49 @@ from ordigi.summary import Summary
_logger_options = [
click.option('--debug', default=False, is_flag=True,
help='Override the value in constants.py with True.'),
click.option('--verbose', '-v', default=False, is_flag=True,
help='True if you want to see details of file processing')
click.option(
'--debug',
default=False,
is_flag=True,
help='Override the value in constants.py with True.',
),
click.option(
'--verbose',
'-v',
default=False,
is_flag=True,
help='True if you want to see details of file processing',
),
]
_dry_run_options = [
click.option('--dry-run', default=False, is_flag=True,
help='Dry run only, no change made to the filesystem.')
click.option(
'--dry-run',
default=False,
is_flag=True,
help='Dry run only, no change made to the filesystem.',
)
]
_filter_option = [
click.option('--exclude', '-e', default=set(), multiple=True,
help='Directories or files to exclude.'),
click.option('--filter-by-ext', '-f', default=set(), multiple=True,
click.option(
'--exclude',
'-e',
default=set(),
multiple=True,
help='Directories or files to exclude.',
),
click.option(
'--filter-by-ext',
'-f',
default=set(),
multiple=True,
help="""Use filename
extension to filter files for sorting. If value is '*', use
common media file extension for filtering. Ignored files remain in
the same directory structure""" ),
click.option('--glob', '-g', default='**/*',
help='Glob file selection')
the same directory structure""",
),
click.option('--glob', '-g', default='**/*', help='Glob file selection'),
]
@ -49,6 +71,7 @@ def add_options(options):
for option in reversed(options):
func = option(func)
return func
return _add_options
@ -63,31 +86,74 @@ def _get_exclude(opt, exclude):
@add_options(_logger_options)
@add_options(_dry_run_options)
@add_options(_filter_option)
@click.option('--album-from-folder', default=False, is_flag=True,
help="Use images' folders as their album names.")
@click.option('--destination', '-d', type=click.Path(file_okay=False),
default=None, help='Sort files into this directory.')
@click.option('--clean', '-C', default=False, is_flag=True,
help='Clean empty folders')
@click.option('--copy', '-c', default=False, is_flag=True,
@click.option(
'--album-from-folder',
default=False,
is_flag=True,
help="Use images' folders as their album names.",
)
@click.option(
'--destination',
'-d',
type=click.Path(file_okay=False),
default=None,
help='Sort files into this directory.',
)
@click.option('--clean', '-C', default=False, is_flag=True, help='Clean empty folders')
@click.option(
'--copy',
'-c',
default=False,
is_flag=True,
help='True if you want files to be copied over from src_dir to\
dest_dir rather than moved')
@click.option('--ignore-tags', '-I', default=set(), multiple=True,
dest_dir rather than moved',
)
@click.option(
'--ignore-tags',
'-I',
default=set(),
multiple=True,
help='Specific tags or group that will be ignored when\
searching for file data. Example \'File:FileModifyDate\' or \'Filename\'' )
@click.option('--interactive', '-i', default=False, is_flag=True,
help="Interactive mode")
@click.option('--max-deep', '-m', default=None,
help='Maximum level to proceed. Number from 0 to desired level.')
@click.option('--remove-duplicates', '-R', default=False, is_flag=True,
searching for file data. Example \'File:FileModifyDate\' or \'Filename\'',
)
@click.option(
'--interactive', '-i', default=False, is_flag=True, help="Interactive mode"
)
@click.option(
'--max-deep',
'-m',
default=None,
help='Maximum level to proceed. Number from 0 to desired level.',
)
@click.option(
'--remove-duplicates',
'-R',
default=False,
is_flag=True,
help='True to remove files that are exactly the same in name\
and a file hash')
@click.option('--reset-cache', '-r', default=False, is_flag=True,
help='Regenerate the hash.json and location.json database ')
@click.option('--use-date-filename', '-f', default=False, is_flag=True,
help="Use filename date for media original date.")
@click.option('--use-file-dates', '-F', default=False, is_flag=True,
help="Use file date created or modified for media original date.")
and a file hash',
)
@click.option(
'--reset-cache',
'-r',
default=False,
is_flag=True,
help='Regenerate the hash.json and location.json database ',
)
@click.option(
'--use-date-filename',
'-f',
default=False,
is_flag=True,
help="Use filename date for media original date.",
)
@click.option(
'--use-file-dates',
'-F',
default=False,
is_flag=True,
help="Use file date created or modified for media original date.",
)
@click.argument('paths', required=True, nargs=-1, type=click.Path())
def sort(**kwargs):
"""Sort files or directories by reading their EXIF and organizing them
@ -135,17 +201,29 @@ def sort(**kwargs):
exclude = _get_exclude(opt, kwargs['exclude'])
filter_by_ext = set(kwargs['filter_by_ext'])
collection = Collection(destination, opt['path_format'],
kwargs['album_from_folder'], cache, opt['day_begins'], kwargs['dry_run'],
exclude, filter_by_ext, kwargs['glob'], kwargs['interactive'],
logger, max_deep, mode, kwargs['use_date_filename'],
kwargs['use_file_dates'])
collection = Collection(
destination,
opt['path_format'],
kwargs['album_from_folder'],
cache,
opt['day_begins'],
kwargs['dry_run'],
exclude,
filter_by_ext,
kwargs['glob'],
kwargs['interactive'],
logger,
max_deep,
mode,
kwargs['use_date_filename'],
kwargs['use_file_dates'],
)
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], opt['timeout'])
summary, result = collection.sort_files(paths, loc,
kwargs['remove_duplicates'], kwargs['ignore_tags'])
summary, result = collection.sort_files(
paths, loc, kwargs['remove_duplicates'], kwargs['ignore_tags']
)
if kwargs['clean']:
remove_empty_folders(destination, logger)
@ -181,19 +259,39 @@ def remove_empty_folders(path, logger, remove_root=True):
@add_options(_logger_options)
@add_options(_dry_run_options)
@add_options(_filter_option)
@click.option('--dedup-regex', '-d', default=set(), multiple=True,
help='Regex to match duplicate strings parts')
@click.option('--folders', '-f', default=False, is_flag=True,
help='Remove empty folders')
@click.option('--max-deep', '-m', default=None,
help='Maximum level to proceed. Number from 0 to desired level.')
@click.option('--path-string', '-p', default=False, is_flag=True,
help='Deduplicate path string')
@click.option('--remove-duplicates', '-R', default=False, is_flag=True,
help='True to remove files that are exactly the same in name\
and a file hash')
@click.option('--root', '-r', type=click.Path(file_okay=False),
default=None, help='Root dir of media collection. If not set, use path')
@click.option(
'--dedup-regex',
'-d',
default=set(),
multiple=True,
help='Regex to match duplicate strings parts',
)
@click.option(
'--folders', '-f', default=False, is_flag=True, help='Remove empty folders'
)
@click.option(
'--max-deep',
'-m',
default=None,
help='Maximum level to proceed. Number from 0 to desired level.',
)
@click.option(
'--path-string', '-p', default=False, is_flag=True, help='Deduplicate path string'
)
@click.option(
'--remove-duplicates',
'-R',
default=False,
is_flag=True,
help='True to remove files that are exactly the same in name and a file hash',
)
@click.option(
'--root',
'-r',
type=click.Path(file_okay=False),
default=None,
help='Root dir of media collection. If not set, use path',
)
@click.argument('path', required=True, nargs=1, type=click.Path())
def clean(**kwargs):
"""Remove empty folders
@ -221,11 +319,21 @@ def clean(**kwargs):
filter_by_ext = set(kwargs['filter_by_ext'])
if kwargs['path_string']:
collection = Collection(root, opt['path_format'], dry_run=dry_run,
exclude=exclude, filter_by_ext=filter_by_ext, glob=kwargs['glob'],
logger=logger, max_deep=kwargs['max_deep'], mode='move')
collection = Collection(
root,
opt['path_format'],
dry_run=dry_run,
exclude=exclude,
filter_by_ext=filter_by_ext,
glob=kwargs['glob'],
logger=logger,
max_deep=kwargs['max_deep'],
mode='move',
)
dedup_regex = list(kwargs['dedup_regex'])
summary, result = collection.dedup_regex(path, dedup_regex, kwargs['remove_duplicates'])
summary, result = collection.dedup_regex(
path, dedup_regex, kwargs['remove_duplicates']
)
if clean_all or folders:
remove_empty_folders(path, logger)
@ -241,12 +349,10 @@ def clean(**kwargs):
@add_options(_logger_options)
@click.argument('path', required=True, nargs=1, type=click.Path())
def init(**kwargs):
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files.
"""
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files."""
config = Config(constants.CONFIG_FILE)
opt = config.get_options()
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], opt['timeout'])
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
@ -260,12 +366,10 @@ def init(**kwargs):
@add_options(_logger_options)
@click.argument('path', required=True, nargs=1, type=click.Path())
def update(**kwargs):
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files.
"""
"""Regenerate the hash.json database which contains all of the sha256 signatures of media files."""
config = Config(constants.CONFIG_FILE)
opt = config.get_options()
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'],
opt['timeout'])
loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], opt['timeout'])
debug = kwargs['debug']
verbose = kwargs['verbose']
logger = log.get_logger(debug, verbose)
@ -301,17 +405,40 @@ def check(**kwargs):
@add_options(_dry_run_options)
@add_options(_filter_option)
@click.option('--find-duplicates', '-f', default=False, is_flag=True)
@click.option('--output-dir', '-o', default=False, is_flag=True, help='output\
dir')
@click.option(
'--output-dir',
'-o',
default=False,
is_flag=True,
help='output dir',
)
@click.option('--remove-duplicates', '-r', default=False, is_flag=True)
@click.option('--revert-compare', '-R', default=False, is_flag=True, help='Revert\
compare')
@click.option('--root', '-r', type=click.Path(file_okay=False),
default=None, help='Root dir of media collection. If not set, use path')
@click.option('--similar-to', '-s', default=False, help='Similar to given\
image')
@click.option('--similarity', '-S', default=80, help='Similarity level for\
images')
@click.option(
'--revert-compare',
'-R',
default=False,
is_flag=True,
help='Revert compare',
)
@click.option(
'--root',
'-r',
type=click.Path(file_okay=False),
default=None,
help='Root dir of media collection. If not set, use path',
)
@click.option(
'--similar-to',
'-s',
default=False,
help='Similar to given image',
)
@click.option(
'--similarity',
'-S',
default=80,
help='Similarity level for images',
)
@click.argument('path', nargs=1, required=True)
def compare(**kwargs):
'''Compare files in directories'''
@ -333,9 +460,16 @@ def compare(**kwargs):
exclude = _get_exclude(opt, kwargs['exclude'])
filter_by_ext = set(kwargs['filter_by_ext'])
collection = Collection(root, None, exclude=exclude,
filter_by_ext=filter_by_ext, glob=kwargs['glob'],
mode='move', dry_run=dry_run, logger=logger)
collection = Collection(
root,
None,
exclude=exclude,
filter_by_ext=filter_by_ext,
glob=kwargs['glob'],
mode='move',
dry_run=dry_run,
logger=logger,
)
if kwargs['revert_compare']:
summary, result = collection.revert_compare(path)
@ -364,4 +498,3 @@ main.add_command(update)
if __name__ == '__main__':
main()

View File

@ -24,14 +24,27 @@ from ordigi.summary import Summary
from ordigi import utils
class Collection(object):
class Collection:
"""Class of the media collection."""
def __init__(self, root, path_format, album_from_folder=False,
cache=False, day_begins=0, dry_run=False, exclude=set(),
filter_by_ext=set(), glob='**/*', interactive=False,
logger=logging.getLogger(), max_deep=None, mode='copy',
use_date_filename=False, use_file_dates=False):
def __init__(
self,
root,
path_format,
album_from_folder=False,
cache=False,
day_begins=0,
dry_run=False,
exclude=set(),
filter_by_ext=set(),
glob='**/*',
interactive=False,
logger=logging.getLogger(),
max_deep=None,
mode='copy',
use_date_filename=False,
use_file_dates=False,
):
# Attributes
self.root = Path(root).expanduser().absolute()
@ -92,13 +105,15 @@ class Collection(object):
'original_name': '{original_name}',
'state': '{state}',
'title': '{title}',
'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}' # search for date format string
'date': '{(%[a-zA-Z][^a-zA-Z]*){1,8}}', # search for date format string
}
def _check_for_early_morning_photos(self, date):
"""check for early hour photos to be grouped with previous day"""
if date.hour < self.day_begins:
self.logger.info("moving this photo to the previous day for classification purposes")
self.logger.info(
"moving this photo to the previous day for classification purposes"
)
# push it to the day before for classification purposes
date = date - timedelta(hours=date.hour + 1)
@ -181,8 +196,17 @@ class Collection(object):
folders = self._get_folders(folders, mask)
part = os.path.join(*folders)
elif item in ('album','camera_make', 'camera_model', 'city', 'country',
'location', 'original_name', 'state', 'title'):
elif item in (
'album',
'camera_make',
'camera_model',
'city',
'country',
'location',
'original_name',
'state',
'title',
):
if item == 'location':
mask = 'default'
@ -245,8 +269,10 @@ class Collection(object):
if this_part:
# Check if all masks are substituted
if True in [c in this_part for c in '{}']:
self.logger.error(f'Format path part invalid: \
{this_part}')
self.logger.error(
f'Format path part invalid: \
{this_part}'
)
sys.exit(1)
path.append(this_part.strip())
@ -270,15 +296,16 @@ class Collection(object):
return None
def _checkcomp(self, dest_path, src_checksum):
"""Check file.
"""
"""Check file."""
if self.dry_run:
return True
dest_checksum = utils.checksum(dest_path)
if dest_checksum != src_checksum:
self.logger.info(f'Source checksum and destination checksum are not the same')
self.logger.info(
f'Source checksum and destination checksum are not the same'
)
return False
return True
@ -332,8 +359,7 @@ class Collection(object):
checksum = utils.checksum(dest_path)
media.metadata['checksum'] = checksum
media.metadata['file_path'] = os.path.relpath(dest_path,
self.root)
media.metadata['file_path'] = os.path.relpath(dest_path, self.root)
self._add_db_data(media.metadata)
if self.mode == 'move':
# Delete file path entry in db when file is moved inside collection
@ -367,7 +393,7 @@ class Collection(object):
dry_run = self.dry_run
# check for collisions
if(src_path == dest_path):
if src_path == dest_path:
self.logger.info(f'File {dest_path} already sorted')
return None
elif dest_path.is_dir():
@ -377,17 +403,21 @@ class Collection(object):
self.logger.warning(f'File {dest_path} already exist')
if remove_duplicates:
if filecmp.cmp(src_path, dest_path):
self.logger.info(f'File in source and destination are identical. Duplicate will be ignored.')
if(mode == 'move'):
self.logger.info(
f'File in source and destination are identical. Duplicate will be ignored.'
)
if mode == 'move':
self.remove(src_path)
return None
else: # name is same, but file is different
self.logger.warning(f'File in source and destination are different.')
self.logger.warning(
f'File in source and destination are different.'
)
return False
else:
return False
else:
if(mode == 'move'):
if mode == 'move':
if not dry_run:
# Move the processed file into the destination directory
shutil.move(src_path, dest_path)
@ -490,7 +520,8 @@ class Collection(object):
:returns: Path file_path, Path subdirs
"""
for path0 in path.glob(glob):
if path0.is_dir(): continue
if path0.is_dir():
continue
else:
file_path = path0
parts = file_path.parts
@ -501,10 +532,12 @@ class Collection(object):
level = len(subdirs.parts)
if subdirs.parts != ():
if subdirs.parts[0] == '.ordigi': continue
if subdirs.parts[0] == '.ordigi':
continue
if maxlevel is not None:
if level > maxlevel: continue
if level > maxlevel:
continue
matched = False
for exclude in self.exclude:
@ -512,7 +545,8 @@ class Collection(object):
matched = True
break
if matched: continue
if matched:
continue
if (
extensions == set()
@ -535,8 +569,10 @@ class Collection(object):
# Rename the src_file
if self.interactive:
prompt = [
inquirer.Text('file_path', message="New name for"\
f"'{dir_path.name}' file"),
inquirer.Text(
'file_path',
message="New name for" f"'{dir_path.name}' file",
),
]
answers = inquirer.prompt(prompt, theme=self.theme)
file_path = dir_path.parent / answers['file_path']
@ -569,11 +605,12 @@ class Collection(object):
return path
def set_utime_from_metadata(self, date_media, file_path):
""" Set the modification time on the file based on the file name.
"""
"""Set the modification time on the file based on the file name."""
# Initialize date taken to what's returned from the metadata function.
os.utime(file_path, (int(datetime.now().timestamp()), int(date_media.timestamp())))
os.utime(
file_path, (int(datetime.now().timestamp()), int(date_media.timestamp()))
)
def dedup_regex(self, path, dedup_regex, remove_duplicates=False):
# cycle throught files
@ -586,14 +623,14 @@ class Collection(object):
# Numeric date regex
if len(dedup_regex) == 0:
date_num2 = re.compile(fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])')
date_num3 = re.compile(fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])')
date_num2 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])'
)
date_num3 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])'
)
default = re.compile(r'([^-_ .]+[-_ .])')
dedup_regex = [
date_num3,
date_num2,
default
]
dedup_regex = [date_num3, date_num2, default]
conflict_file_list = []
self.src_list = [x for x in self._get_files_in_path(path, glob=self.glob)]
@ -647,7 +684,8 @@ class Collection(object):
"""
message = "Bellow the file selection list, modify selection if needed"
questions = [
inquirer.Checkbox('selection',
inquirer.Checkbox(
'selection',
message=message,
choices=self.src_list,
default=self.src_list,
@ -693,12 +731,16 @@ class Collection(object):
def init(self, loc, ignore_tags=set()):
record = True
for file_path in self._get_all_files():
media = Media(file_path, self.root, ignore_tags=ignore_tags,
logger=self.logger, use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates)
media = Media(
file_path,
self.root,
ignore_tags=ignore_tags,
logger=self.logger,
use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates,
)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
media.metadata['file_path'] = os.path.relpath(file_path,
self.root)
media.metadata['file_path'] = os.path.relpath(file_path, self.root)
self._add_db_data(media.metadata)
self.summary.append((file_path, file_path))
@ -731,9 +773,14 @@ class Collection(object):
relpath = os.path.relpath(file_path, self.root)
# If file not in database
if relpath not in db_rows:
media = Media(file_path, self.root, ignore_tags=ignore_tags,
logger=self.logger, use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates)
media = Media(
file_path,
self.root,
ignore_tags=ignore_tags,
logger=self.logger,
use_date_filename=self.use_date_filename,
use_file_dates=self.use_file_dates,
)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
media.metadata['file_path'] = relpath
# Check if file checksum is in invalid rows
@ -758,8 +805,7 @@ class Collection(object):
return self.summary
def sort_files(self, paths, loc, remove_duplicates=False,
ignore_tags=set()):
def sort_files(self, paths, loc, remove_duplicates=False, ignore_tags=set()):
"""
Sort files into appropriate folder
"""
@ -774,8 +820,12 @@ class Collection(object):
self.dest_list = []
path = self._check_path(path)
conflict_file_list = []
self.src_list = [x for x in self._get_files_in_path(path,
glob=self.glob, extensions=self.filter_by_ext)]
self.src_list = [
x
for x in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext
)
]
if self.interactive:
self.src_list = self._modify_selection()
print('Processing...')
@ -783,9 +833,16 @@ class Collection(object):
# Get medias and paths
for src_path in self.src_list:
# Process files
media = Media(src_path, path, self.album_from_folder,
ignore_tags, self.interactive, self.logger,
self.use_date_filename, self.use_file_dates)
media = Media(
src_path,
path,
self.album_from_folder,
ignore_tags,
self.interactive,
self.logger,
self.use_date_filename,
self.use_file_dates,
)
metadata = media.get_metadata(self.root, loc, self.db, self.cache)
# Get the destination path according to metadata
relpath = Path(self.get_path(metadata))
@ -805,7 +862,6 @@ class Collection(object):
result = self.sort_file(src_path, dest_path, remove_duplicates)
record = False
if result is True:
record = self._record_file(src_path, dest_path, media)
@ -836,8 +892,9 @@ class Collection(object):
"""
:returns: iter
"""
for src_path in self._get_files_in_path(path, glob=self.glob,
extensions=self.filter_by_ext):
for src_path in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext
):
dirname = src_path.parent.name
if dirname.find('similar_to') == 0:
@ -920,8 +977,9 @@ class Collection(object):
dirnames = set()
moved_files = set()
nb_row_ini = self.db.len('metadata')
for src_path in self._get_files_in_path(path, glob=self.glob,
extensions=self.filter_by_ext):
for src_path in self._get_files_in_path(
path, glob=self.glob, extensions=self.filter_by_ext
):
dirname = src_path.parent.name
if dirname.find('similar_to') == 0:
dirnames.add(src_path.parent)
@ -954,5 +1012,3 @@ class Collection(object):
result = self.check_db()
return self.summary, result

View File

@ -89,4 +89,3 @@ class Config:
options['exclude'] = [value for key, value in self.conf.items('Exclusions')]
return options

View File

@ -1,4 +1,3 @@
from datetime import datetime
import json
import os
@ -29,11 +28,7 @@ class Sqlite:
pass
self.db_type = 'SQLite format 3'
self.types = {
'text': (str, datetime),
'integer': (int,),
'real': (float,)
}
self.types = {'text': (str, datetime), 'integer': (int,), 'real': (float,)}
self.filename = Path(db_dir, target_dir.name + '.db')
self.con = sqlite3.connect(self.filename)
@ -56,7 +51,7 @@ class Sqlite:
'OriginalName': 'text',
'SrcPath': 'text',
'Subdirs': 'text',
'Filename': 'text'
'Filename': 'text',
}
location_header = {
@ -67,18 +62,15 @@ class Sqlite:
'City': 'text',
'State': 'text',
'Country': 'text',
'Default': 'text'
'Default': 'text',
}
self.tables = {
'metadata': {
'header': metadata_header,
'primary_keys': ('FilePath',)
},
'metadata': {'header': metadata_header, 'primary_keys': ('FilePath',)},
'location': {
'header': location_header,
'primary_keys': ('Latitude', 'Longitude')
}
'primary_keys': ('Latitude', 'Longitude'),
},
}
self.primary_metadata_keys = self.tables['metadata']['primary_keys']
@ -104,7 +96,9 @@ class Sqlite:
try:
# get the count of tables with the name
self.cur.execute(f"select count(name) from sqlite_master where type='table' and name='{table}'")
self.cur.execute(
f"select count(name) from sqlite_master where type='table' and name='{table}'"
)
except sqlite3.DatabaseError as e:
# raise type(e)(e.message + ' :{self.filename} %s' % arg1)
raise sqlite3.DatabaseError(f"{self.filename} is not valid database")
@ -156,8 +150,10 @@ class Sqlite:
"""
header = self.tables[table]['header']
if len(row_data) != len(header):
raise ValueError(f'''Table {table} length mismatch: row_data
{row_data}, header {header}''')
raise ValueError(
f'''Table {table} length mismatch: row_data
{row_data}, header {header}'''
)
columns = ', '.join(row_data.keys())
placeholders = ', '.join('?' * len(row_data))
@ -204,8 +200,9 @@ class Sqlite:
:returns: bool
"""
if not self.tables[table]['header']:
result = self.build_table(table, row_data,
self.tables[table]['primary_keys'])
result = self.build_table(
table, row_data, self.tables[table]['primary_keys']
)
if not result:
return False
@ -236,8 +233,7 @@ class Sqlite:
def _get_table(self, table):
self.cur.execute(f'SELECT * FROM {table}').fetchall()
def get_location_nearby(self, latitude, longitude, Column,
threshold_m=3000):
def get_location_nearby(self, latitude, longitude, Column, threshold_m=3000):
"""Find a name for a location in the database.
:param float latitude: Latitude of the location.
@ -250,10 +246,9 @@ class Sqlite:
value = None
self.cur.execute('SELECT * FROM location')
for row in self.cur:
distance = distance_between_two_points(latitude, longitude,
row[0], row[1])
distance = distance_between_two_points(latitude, longitude, row[0], row[1])
# Use if closer then threshold_km reuse lookup
if(distance < shorter_distance and distance <= threshold_m):
if distance < shorter_distance and distance <= threshold_m:
shorter_distance = distance
value = row[Column]

View File

@ -148,7 +148,14 @@ class _ExifToolProc:
class ExifTool:
"""Basic exiftool interface for reading and writing EXIF tags"""
def __init__(self, filepath, exiftool=None, overwrite=True, flags=None, logger=logging.getLogger()):
def __init__(
self,
filepath,
exiftool=None,
overwrite=True,
flags=None,
logger=logging.getLogger(),
):
"""Create ExifTool object
Args:
@ -397,8 +404,9 @@ class ExifToolCaching(ExifTool):
def __new__(cls, filepath, exiftool=None, logger=logging.getLogger()):
"""create new object or return instance of already created singleton"""
if filepath not in cls._singletons:
cls._singletons[filepath] = _ExifToolCaching(filepath,
exiftool=exiftool, logger=logger)
cls._singletons[filepath] = _ExifToolCaching(
filepath, exiftool=exiftool, logger=logger
)
return cls._singletons[filepath]
@ -415,8 +423,9 @@ class _ExifToolCaching(ExifTool):
"""
self._json_cache = None
self._asdict_cache = {}
super().__init__(filepath, exiftool=exiftool, overwrite=False,
flags=None, logger=logger)
super().__init__(
filepath, exiftool=exiftool, overwrite=False, flags=None, logger=logger
)
def run_commands(self, *commands, no_file=False):
if commands[0] not in ["-json", "-ver"]:
@ -456,4 +465,3 @@ class _ExifToolCaching(ExifTool):
"""Clear cached data so that calls to json or asdict return fresh data"""
self._json_cache = None
self._asdict_cache = {}

View File

@ -1,4 +1,3 @@
from os import path
import geopy
@ -13,7 +12,12 @@ __KEY__ = None
class GeoLocation:
"""Look up geolocation information for media objects."""
def __init__(self, geocoder='Nominatim', prefer_english_names=False, timeout=options.default_timeout):
def __init__(
self,
geocoder='Nominatim',
prefer_english_names=False,
timeout=options.default_timeout,
):
self.geocoder = geocoder
self.prefer_english_names = prefer_english_names
self.timeout = timeout
@ -21,10 +25,10 @@ class GeoLocation:
def coordinates_by_name(self, name, db, timeout=options.default_timeout):
# Try to get cached location first
cached_coordinates = db.get_location_coordinates(name)
if(cached_coordinates is not None):
if cached_coordinates is not None:
return {
'latitude': cached_coordinates[0],
'longitude': cached_coordinates[1]
'longitude': cached_coordinates[1],
}
# If the name is not cached then we go ahead with an API lookup
@ -35,22 +39,24 @@ class GeoLocation:
if geolocation_info is not None:
return {
'latitude': geolocation_info.latitude,
'longitude': geolocation_info.longitude
'longitude': geolocation_info.longitude,
}
else:
raise NameError(geocoder)
return None
def place_name(self, lat, lon, logger=logging.getLogger(), timeout=options.default_timeout):
def place_name(
self, lat, lon, logger=logging.getLogger(), timeout=options.default_timeout
):
lookup_place_name_default = {'default': None}
if(lat is None or lon is None):
if lat is None or lon is None:
return lookup_place_name_default
# Convert lat/lon to floats
if(not isinstance(lat, float)):
if not isinstance(lat, float):
lat = float(lat)
if(not isinstance(lon, float)):
if not isinstance(lon, float):
lon = float(lon)
lookup_place_name = {}
@ -60,30 +66,31 @@ class GeoLocation:
else:
raise NameError(geocoder)
if(geolocation_info is not None and 'address' in geolocation_info):
if geolocation_info is not None and 'address' in geolocation_info:
address = geolocation_info['address']
# gh-386 adds support for town
# taking precedence after city for backwards compatability
for loc in ['city', 'town', 'village', 'state', 'country']:
if(loc in address):
if loc in address:
lookup_place_name[loc] = address[loc]
# In many cases the desired key is not available so we
# set the most specific as the default.
if('default' not in lookup_place_name):
if 'default' not in lookup_place_name:
lookup_place_name['default'] = address[loc]
if('default' not in lookup_place_name):
if 'default' not in lookup_place_name:
lookup_place_name = lookup_place_name_default
return lookup_place_name
def lookup_osm(self, lat, lon, logger=logging.getLogger(), timeout=options.default_timeout):
def lookup_osm(
self, lat, lon, logger=logging.getLogger(), timeout=options.default_timeout
):
try:
locator = Nominatim(user_agent='myGeocoder', timeout=timeout)
coords = (lat, lon)
if(self.prefer_english_names):
if self.prefer_english_names:
lang = 'en'
else:
lang = 'local'
@ -99,5 +106,3 @@ class GeoLocation:
except (TypeError, ValueError) as e:
logger.error(e)
return None

View File

@ -18,6 +18,7 @@ import time
PYHEIF = False
try:
from pyheif_pillow_opener import register_heif_opener
PYHEIF = True
# Allow to open HEIF/HEIC image from pillow
register_heif_opener()
@ -25,8 +26,7 @@ except ImportError as e:
logging.info(e)
class Image():
class Image:
def __init__(self, img_path, hash_size=8):
self.img_path = img_path
@ -55,7 +55,7 @@ class Image():
except (IOError, UnidentifiedImageError):
return False
if(im.format is None):
if im.format is None:
return False
return True
@ -68,7 +68,7 @@ class Image():
return None
class Images():
class Images:
"""A image object.
@ -76,7 +76,18 @@ class Images():
"""
#: Valid extensions for image files.
extensions = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
extensions = (
'arw',
'cr2',
'dng',
'gif',
'heic',
'jpeg',
'jpg',
'nef',
'png',
'rw2',
)
def __init__(self, images=set(), hash_size=8, logger=logging.getLogger()):
@ -104,7 +115,11 @@ class Images():
duplicates = []
for temp_hash in get_images_hashes():
if temp_hash in hashes:
self.logger.info("Duplicate {} \nfound for image {}\n".format(img_path, hashes[temp_hash]))
self.logger.info(
"Duplicate {} \nfound for image {}\n".format(
img_path, hashes[temp_hash]
)
)
duplicates.append(img_path)
else:
hashes[temp_hash] = img_path
@ -121,7 +136,7 @@ class Images():
def remove_duplicates_interactive(self, duplicates):
if len(duplicates) != 0:
answer = input(f"Do you want to delete these {duplicates} images? Y/n: ")
if(answer.strip().lower() == 'y'):
if answer.strip().lower() == 'y':
self.remove_duplicates(duplicates)
self.logger.info(f'{duplicate} deleted successfully!')
else:
@ -164,7 +179,7 @@ class Images():
img_diff = self.diff(hash1, hash2)
if img_diff <= diff_limit:
similarity_img = self.similarity(img_diff)
self.logger.info(f'{img.img_path} image found {similarity_img}% similar to {image}')
self.logger.info(
f'{img.img_path} image found {similarity_img}% similar to {image}'
)
yield img.img_path

View File

@ -1,5 +1,6 @@
import logging
def get_logger(verbose, debug):
if debug:
level = logging.DEBUG
@ -13,4 +14,3 @@ def get_logger(verbose, debug):
logger = logging.getLogger('ordigi')
logger.level = level
return logger

View File

@ -8,6 +8,7 @@ import mimetypes
import os
import re
import sys
# import pprint
# load modules
@ -17,17 +18,14 @@ from ordigi import utils
from ordigi import request
class Media():
class Media:
"""The media class for all media objects.
:param str file_path: The fully qualified path to the media file.
"""
d_coordinates = {
'latitude': 'latitude_ref',
'longitude': 'longitude_ref'
}
d_coordinates = {'latitude': 'latitude_ref', 'longitude': 'longitude_ref'}
PHOTO = ('arw', 'cr2', 'dng', 'gif', 'heic', 'jpeg', 'jpg', 'nef', 'png', 'rw2')
AUDIO = ('m4a',)
@ -35,9 +33,17 @@ class Media():
extensions = PHOTO + AUDIO + VIDEO
def __init__(self, file_path, src_path, album_from_folder=False,
ignore_tags=set(), interactive=False, logger=logging.getLogger(),
use_date_filename=False, use_file_dates=False):
def __init__(
self,
file_path,
src_path,
album_from_folder=False,
ignore_tags=set(),
interactive=False,
logger=logging.getLogger(),
use_date_filename=False,
use_file_dates=False,
):
"""
:params: Path, Path, bool, set, bool, Logger
"""
@ -61,19 +67,16 @@ class Media():
tags_keys['date_original'] = [
'EXIF:DateTimeOriginal',
'H264:DateTimeOriginal',
'QuickTime:ContentCreateDate'
'QuickTime:ContentCreateDate',
]
tags_keys['date_created'] = [
'EXIF:CreateDate',
'QuickTime:CreationDate',
'QuickTime:CreateDate',
'QuickTime:CreationDate-und-US',
'QuickTime:MediaCreateDate'
]
tags_keys['date_modified'] = [
'File:FileModifyDate',
'QuickTime:ModifyDate'
'QuickTime:MediaCreateDate',
]
tags_keys['date_modified'] = ['File:FileModifyDate', 'QuickTime:ModifyDate']
tags_keys['camera_make'] = ['EXIF:Make', 'QuickTime:Make']
tags_keys['camera_model'] = ['EXIF:Model', 'QuickTime:Model']
tags_keys['album'] = ['XMP-xmpDM:Album', 'XMP:Album']
@ -82,13 +85,13 @@ class Media():
'EXIF:GPSLatitude',
'XMP:GPSLatitude',
# 'QuickTime:GPSLatitude',
'Composite:GPSLatitude'
'Composite:GPSLatitude',
]
tags_keys['longitude'] = [
'EXIF:GPSLongitude',
'XMP:GPSLongitude',
# 'QuickTime:GPSLongitude',
'Composite:GPSLongitude'
'Composite:GPSLongitude',
]
tags_keys['latitude_ref'] = ['EXIF:GPSLatitudeRef']
tags_keys['longitude_ref'] = ['EXIF:GPSLongitudeRef']
@ -100,7 +103,7 @@ class Media():
for key, tags in tags_keys.items():
for n, tag in enumerate(tags):
if re.match(tag_regex, tag):
del(tags_keys[key][n])
del tags_keys[key][n]
return tags_keys
@ -119,7 +122,7 @@ class Media():
:returns: str or None
"""
mimetype = mimetypes.guess_type(self.file_path)
if(mimetype is None):
if mimetype is None:
return None
return mimetype[0]
@ -143,7 +146,7 @@ class Media():
"""
if self.exif_metadata is None:
return None
if(tag not in self.exif_metadata):
if tag not in self.exif_metadata:
return None
return self.exif_metadata[tag]
@ -161,7 +164,7 @@ class Media():
try:
# correct nasty formated date
regex = re.compile(r'(\d{4}):(\d{2}):(\d{2})')
if(re.match(regex , value) is not None): # noqa
if re.match(regex, value) is not None: # noqa
value = re.sub(regex, r'\g<1>-\g<2>-\g<3>', value)
return parse(value)
except BaseException or dateutil.parser._parser.ParserError as e:
@ -207,10 +210,11 @@ class Media():
def _get_date_media_interactive(self, choices, default):
print(f"Date conflict for file: {self.file_path}")
choices_list = [
inquirer.List('date_list',
inquirer.List(
'date_list',
message=f"Choice appropriate original date",
choices=choices,
default=default
default=default,
),
]
prompt = [
@ -243,8 +247,10 @@ class Media():
date_created = self.metadata['date_created']
date_modified = self.metadata['date_modified']
if self.metadata['date_original']:
if (date_filename and date_filename != date_original):
self.logger.warning(f"{basename} time mark is different from {date_original}")
if date_filename and date_filename != date_original:
self.logger.warning(
f"{basename} time mark is different from {date_original}"
)
if self.interactive:
# Ask for keep date taken, filename time, or neither
choices = [
@ -260,9 +266,13 @@ class Media():
self.logger.warning(f"could not find original date for {self.file_path}")
if self.use_date_filename and date_filename:
self.logger.info(f"use date from filename:{date_filename} for {self.file_path}")
self.logger.info(
f"use date from filename:{date_filename} for {self.file_path}"
)
if date_created and date_filename > date_created:
self.logger.warning(f"{basename} time mark is more recent than {date_created}")
self.logger.warning(
f"{basename} time mark is more recent than {date_created}"
)
if self.interactive:
choices = [
(f"date filename:'{date_filename}'", date_filename),
@ -276,16 +286,19 @@ class Media():
elif self.use_file_dates:
if date_created:
self.logger.warning(f"use date created:{date_created} for {self.file_path}")
self.logger.warning(
f"use date created:{date_created} for {self.file_path}"
)
return date_created
elif date_modified:
self.logger.warning(f"use date modified:{date_modified} for {self.file_path}")
self.logger.warning(
f"use date modified:{date_modified} for {self.file_path}"
)
return date_modified
elif self.interactive:
choices = []
if date_filename:
choices.append((f"date filename:'{date_filename}'",
date_filename))
choices.append((f"date filename:'{date_filename}'", date_filename))
if date_created:
choices.append((f"date created:'{date_created}'", date_created))
if date_modified:
@ -296,19 +309,22 @@ class Media():
def get_exif_metadata(self):
# Get metadata from exiftool.
self.exif_metadata = ExifToolCaching(self.file_path, logger=self.logger).asdict()
self.exif_metadata = ExifToolCaching(
self.file_path, logger=self.logger
).asdict()
def _set_album(self, album, folder):
print(f"Metadata conflict for file: {self.file_path}")
choices_list = [
inquirer.List('album',
inquirer.List(
'album',
message=f"Exif album is already set to {album}, choices",
choices=[
(f"album:'{album}'", album),
(f"folder:'{folder}'", folder),
("custom", None),
],
default=f'{album}'
default=f'{album}',
),
]
prompt = [
@ -344,8 +360,12 @@ class Media():
if db_checksum and db_checksum != file_checksum:
self.logger.error(f'{self.file_path} checksum has changed')
self.logger.error('(modified or corrupted file).')
self.logger.error(f'file_checksum={file_checksum},\ndb_checksum={db_checksum}')
self.logger.info('Use --reset-cache, check database integrity or try to restore the file')
self.logger.error(
f'file_checksum={file_checksum},\ndb_checksum={db_checksum}'
)
self.logger.info(
'Use --reset-cache, check database integrity or try to restore the file'
)
# We d'ont want to silently ignore or correct this without
# resetting the cache as is could be due to file corruption
sys.exit(1)
@ -354,8 +374,13 @@ class Media():
# Get metadata from db
formated_data = None
for key in self.tags_keys:
if key in ('latitude', 'longitude', 'latitude_ref',
'longitude_ref', 'file_path'):
if key in (
'latitude',
'longitude',
'latitude_ref',
'longitude_ref',
'file_path',
):
continue
label = utils.snake2camel(key)
value = db.get_metadata_data(relpath, label)
@ -372,7 +397,9 @@ class Media():
location_id = db.get_metadata_data(relpath, 'LocationId')
else:
self.metadata['src_path'] = str(self.src_path)
self.metadata['subdirs'] = str(self.file_path.relative_to(self.src_path).parent)
self.metadata['subdirs'] = str(
self.file_path.relative_to(self.src_path).parent
)
self.metadata['filename'] = self.file_path.name
# Get metadata from exif
@ -403,27 +430,35 @@ class Media():
self.metadata['date_media'] = self.get_date_media()
self.metadata['location_id'] = location_id
loc_keys = ('latitude', 'longitude', 'latitude_ref', 'longitude_ref', 'city', 'state', 'country', 'default')
loc_keys = (
'latitude',
'longitude',
'latitude_ref',
'longitude_ref',
'city',
'state',
'country',
'default',
)
if location_id:
for key in loc_keys:
# use str to convert non string format data like latitude and
# longitude
self.metadata[key] = str(db.get_location_data(location_id,
utils.snake2camel(key)))
self.metadata[key] = str(
db.get_location_data(location_id, utils.snake2camel(key))
)
elif loc:
for key in 'latitude', 'longitude', 'latitude_ref', 'longitude_ref':
self.metadata[key] = None
place_name = loc.place_name(
self.metadata['latitude'],
self.metadata['longitude'],
self.logger
self.metadata['latitude'], self.metadata['longitude'], self.logger
)
for key in ('city', 'state', 'country', 'default'):
# mask = 'city'
# place_name = {'default': u'Sunnyvale', 'city-random': u'Sunnyvale'}
if(key in place_name):
if key in place_name:
self.metadata[key] = place_name[key]
else:
self.metadata[key] = None
@ -432,7 +467,6 @@ class Media():
for key in loc_keys:
self.metadata[key] = None
if self.album_from_folder:
album = self.metadata['album']
folder = self.file_path.parent.name
@ -463,9 +497,10 @@ class Media():
return False
@classmethod
def get_class_by_file(cls, _file, classes, ignore_tags=set(), logger=logging.getLogger()):
"""Static method to get a media object by file.
"""
def get_class_by_file(
cls, _file, classes, ignore_tags=set(), logger=logging.getLogger()
):
"""Static method to get a media object by file."""
if not os.path.isfile(_file):
return None
@ -473,7 +508,7 @@ class Media():
if len(extension) > 0:
for i in classes:
if(extension in i.extensions):
if extension in i.extensions:
return i(_file, ignore_tags=ignore_tags, logger=logger)
return Media(_file, logger, ignore_tags=ignore_tags, logger=logger)
@ -491,7 +526,7 @@ class Media():
:param datetime time: datetime object of when the photo was taken
:returns: bool
"""
if(time is None):
if time is None:
return False
formatted_time = time.strftime('%Y:%m:%d %H:%M:%S')
@ -536,8 +571,7 @@ class Media():
def get_all_subclasses(cls=None):
"""Module method to get all subclasses of Media.
"""
"""Module method to get all subclasses of Media."""
subclasses = set()
this_class = Media
@ -559,12 +593,12 @@ def get_media_class(_file, ignore_tags=set(), logger=logging.getLogger()):
logger.error(f'Could not find {_file}')
return False
media = Media.get_class_by_file(_file, get_all_subclasses(),
ignore_tags=set(), logger=logger)
media = Media.get_class_by_file(
_file, get_all_subclasses(), ignore_tags=set(), logger=logger
)
if not media:
logger.warning(f'File{_file} is not supported')
logger.error(f'File {_file} can\'t be imported')
return False
return media

View File

@ -2,7 +2,6 @@ from tabulate import tabulate
class Summary(object):
def __init__(self):
self.records = []
self.success = 0

View File

@ -1,4 +1,3 @@
from math import radians, cos, sqrt
from datetime import datetime
import hashlib
@ -30,16 +29,14 @@ def distance_between_two_points(lat1, lon1, lat2, lon2):
# As threshold is quite small use simple math
# From http://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points # noqa
# convert decimal degrees to radians
lat1, lon1, lat2, lon2 = list(map(
radians,
[lat1, lon1, lat2, lon2]
))
lat1, lon1, lat2, lon2 = list(map(radians, [lat1, lon1, lat2, lon2]))
r = 6371000 # radius of the earth in m
x = (lon2 - lon1) * cos(0.5 * (lat2 + lat1))
y = lat2 - lat1
return r * sqrt(x * x + y * y)
def get_date_regex(string, user_regex=None):
if user_regex is not None:
matches = re.findall(user_regex, string)
@ -48,14 +45,18 @@ def get_date_regex(string, user_regex=None):
# regex to match date format type %Y%m%d, %y%m%d, %d%m%Y,
# etc...
'a': re.compile(
r'.*[_-]?(?P<year>\d{4})[_-]?(?P<month>\d{2})[_-]?(?P<day>\d{2})[_-]?(?P<hour>\d{2})[_-]?(?P<minute>\d{2})[_-]?(?P<second>\d{2})'),
r'.*[_-]?(?P<year>\d{4})[_-]?(?P<month>\d{2})[_-]?(?P<day>\d{2})[_-]?(?P<hour>\d{2})[_-]?(?P<minute>\d{2})[_-]?(?P<second>\d{2})'
),
'b': re.compile(
r'[-_./](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]'),
r'[-_./](?P<year>\d{4})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]'
),
# not very accurate
'c': re.compile(
r'[-_./](?P<year>\d{2})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]'),
r'[-_./](?P<year>\d{2})[-_.]?(?P<month>\d{2})[-_.]?(?P<day>\d{2})[-_./]'
),
'd': re.compile(
r'[-_./](?P<day>\d{2})[-_.](?P<month>\d{2})[-_.](?P<year>\d{4})[-_./]')
r'[-_./](?P<day>\d{2})[-_.](?P<month>\d{2})[-_.](?P<year>\d{4})[-_./]'
),
}
for i, rx in regex.items():
@ -104,10 +105,12 @@ def get_date_from_string(string, user_regex=None):
# Conversion functions
# source:https://rodic.fr/blog/camelcase-and-snake_case-strings-conversion-with-python/
def snake2camel(name):
return re.sub(r'(?:^|_)([a-z])', lambda x: x.group(1).upper(), name)
def camel2snake(name):
return name[0].lower() + re.sub(r'(?!^)[A-Z]', lambda x: '_' + x.group(0).lower(), name[1:])
return name[0].lower() + re.sub(
r'(?!^)[A-Z]', lambda x: '_' + x.group(0).lower(), name[1:]
)