ordigi/ordigi/collection.py

1184 lines
39 KiB
Python
Raw Normal View History

"""
2021-10-27 00:06:38 +02:00
Collection methods.
"""
from copy import copy
from datetime import datetime, timedelta
2021-12-05 18:27:04 +01:00
from distutils.dir_util import copy_tree
2021-07-16 21:26:42 +02:00
import filecmp
2021-09-18 22:06:34 +02:00
from fnmatch import fnmatch
2015-10-02 09:20:27 +02:00
import os
import re
import shutil
2021-10-27 00:06:38 +02:00
import sys
from pathlib import Path, PurePath
import inquirer
2015-10-02 09:20:27 +02:00
2021-11-12 09:04:53 +01:00
from ordigi import LOG
2021-11-13 18:20:08 +01:00
from ordigi.config import Config
2021-08-31 16:18:41 +02:00
from ordigi.database import Sqlite
2022-01-29 07:48:33 +01:00
from ordigi.media import Medias, WriteExif
2021-09-18 22:06:34 +02:00
from ordigi.images import Image, Images
from ordigi import request
2021-08-13 21:11:24 +02:00
from ordigi.summary import Summary
from ordigi import utils
2021-07-16 21:26:42 +02:00
2015-10-08 11:22:30 +02:00
2021-10-17 12:33:14 +02:00
class FPath:
"""Featured path object"""
2021-08-27 12:45:25 +02:00
2021-11-12 09:04:53 +01:00
def __init__(self, path_format, day_begins=0):
self.day_begins = day_begins
2021-08-08 13:03:58 +02:00
self.items = self.get_items()
2021-11-12 09:04:53 +01:00
self.log = LOG.getChild(self.__class__.__name__)
2021-10-17 12:33:14 +02:00
self.path_format = path_format
self.whitespace_regex = '[ \t\n\r\f\v]+'
2021-11-06 18:04:46 +01:00
self.whitespace_sub = '_'
2021-08-08 13:03:58 +02:00
def get_items(self):
2021-11-07 12:28:30 +01:00
"""Return features items of Fpath class"""
2021-08-08 13:03:58 +02:00
return {
2022-04-18 08:57:47 +02:00
'album': '<album>',
'stem': '<stem>',
'camera_make': '<camera_make>',
'camera_model': '<camera_model>',
'city': '<city>',
'custom': r'<".*">',
'country': '<country>',
'date': r'<(%[a-zA-Z][^a-zA-Z]*){1,8}>', # search for date format string
'ext': '<ext>',
'folder': '<folder>',
'folders': r'<folders(\[[0-9:]{0,3}\])?>',
'location': '<location>',
'name': '<name>',
'original_name': '<original_name>',
'state': '<state>',
'title': '<title>',
}
2021-08-08 13:03:58 +02:00
2021-10-17 12:33:14 +02:00
def get_early_morning_photos_date(self, date, mask):
2021-08-27 12:45:25 +02:00
"""check for early hour photos to be grouped with previous day"""
2021-10-17 12:33:14 +02:00
2021-12-05 13:39:02 +01:00
for i in '%H', '%M', '%S', '%I', '%p', '%f':
# D'ont change date format if datestring contain hour, minutes or seconds.
2021-11-07 12:28:30 +01:00
if i in mask:
2021-10-17 12:33:14 +02:00
return date.strftime(mask)
2021-08-27 12:45:25 +02:00
if date.hour < self.day_begins:
2021-11-12 09:04:53 +01:00
self.log.info(
"moving this photo to the previous day for classification purposes"
)
2021-10-17 12:33:14 +02:00
# push it to the day before for classification purposes
date = date - timedelta(hours=date.hour + 1)
2021-08-27 12:45:25 +02:00
2021-10-17 12:33:14 +02:00
return date.strftime(mask)
2021-08-27 12:45:25 +02:00
2021-09-18 22:06:34 +02:00
def _get_folders(self, folders, mask):
"""
Get folders part
:params: Part, list
:returns: list
"""
n = len(folders) - 1
if not re.search(r':', mask):
2021-11-07 12:28:30 +01:00
regex0 = re.compile(r'[0-9]')
match = re.search(regex0, mask)
2021-09-18 22:06:34 +02:00
if match:
# single folder example: folders[1]
i = int(match[0])
if i > n:
# i is out of range, use ''
return ['']
else:
return folders[i]
else:
# all folders example: folders
return folders
else:
# multiple folder selection: example folders[1:3]
2021-11-07 12:28:30 +01:00
regex0 = re.compile(r'[0-9]:')
regex1 = re.compile(r':[0-9]')
begin = int(re.search(regex0, mask)[0][0])
end = int(re.search(regex1, mask)[0][1])
2021-09-18 22:06:34 +02:00
if begin > n:
# no matched folders
return ['']
2021-11-07 12:28:30 +01:00
2021-09-18 22:06:34 +02:00
if end > n:
end = n
if begin >= end:
return ['']
2021-11-07 12:28:30 +01:00
# select matched folders
return folders[begin:end]
2021-11-07 12:28:30 +01:00
def get_part(self, item, mask, metadata):
"""
Parse a specific folder's name given a mask and metadata.
:param item: Name of the item as defined in the path (i.e. date from %date)
:param mask: Mask representing the template for the path (i.e. %city %state
:param metadata: Metadata dictionary.
:returns: str
"""
# Each item has its own custom logic and we evaluate a single item and return
2021-08-27 12:45:25 +02:00
# the evaluated string.
part = ''
2021-12-05 13:39:02 +01:00
filename = metadata['filename']
stem = os.path.splitext(filename)[0]
if item == 'stem':
part = stem
2021-09-12 07:39:37 +02:00
elif item == 'ext':
part = os.path.splitext(filename)[1][1:]
elif item == 'name':
# Remove date prefix added to the name.
part = stem
for regex in utils.get_date_regex().values():
2021-11-07 12:28:30 +01:00
part = re.sub(regex, '', part)
2022-04-18 08:57:47 +02:00
# Delete separator
if re.search('^[-_ .]', part):
part = part[1:]
elif item == 'date':
date = metadata['date_media']
# early morning photos can be grouped with previous day
if date is not None:
2022-04-17 21:58:56 +02:00
part = str(self.get_early_morning_photos_date(date, mask))
elif item in ('folder', 'folders'):
folders = Path(metadata['subdirs']).parts
2022-04-17 21:58:56 +02:00
if folders:
if item == 'folder':
folder = folders[-1]
part = folder
else:
folders = self._get_folders(folders, mask)
part = os.path.join(*folders)
elif item in (
'album',
'camera_make',
'camera_model',
'city',
'country',
'location',
'original_name',
'state',
'title',
):
2021-08-31 16:18:41 +02:00
if item == 'location':
mask = 'default'
if metadata[mask]:
2022-04-17 21:58:56 +02:00
part = str(metadata[mask])
elif item in 'custom':
# Fallback string
part = mask[1:-1]
return part
2022-04-17 21:58:56 +02:00
def _substitute(self, regex, part, this_part):
2021-10-27 00:06:38 +02:00
# Capitalization
u_regex = '%u' + regex
l_regex = '%l' + regex
if re.search(u_regex, this_part):
return re.sub(u_regex, part.upper(), this_part)
if re.search(l_regex, this_part):
return re.sub(l_regex, part.lower(), this_part)
return re.sub(regex, part, this_part)
2021-10-27 00:06:38 +02:00
def get_path_part(self, this_part, metadata):
2021-08-27 12:45:25 +02:00
"""Build path part
:returns: part (string)"""
for item, regex in self.items.items():
matched = re.search(regex, this_part)
if matched:
2022-04-18 08:57:47 +02:00
self.log.debug(f'item: {item}, mask: <matched.group()[1:-1]>')
part = self.get_part(item, matched.group()[1:-1], metadata)
2022-04-17 21:58:56 +02:00
self.log.debug(f'part: {part}')
2021-08-27 12:45:25 +02:00
part = part.strip()
if part == '':
# delete separator if any
regex = '[-_ .]?(%[ul])?' + regex
this_part = re.sub(regex, part, this_part)
else:
2021-11-06 18:04:46 +01:00
if self.whitespace_sub != ' ':
# Lastly we want to sanitize the name
2021-11-07 12:28:30 +01:00
this_part = re.sub(
2021-11-06 18:04:46 +01:00
self.whitespace_regex, self.whitespace_sub, this_part
)
2022-04-17 21:58:56 +02:00
this_part = self._substitute(regex, part, this_part)
2021-08-27 12:45:25 +02:00
2022-04-17 21:54:29 +02:00
# remove alternate parts inside bracket separated by |
2022-04-18 08:57:47 +02:00
regex = r'[-_ .]?\<\|\>'
2022-04-17 21:54:29 +02:00
if re.search(regex, this_part):
# Delete substitute part and separator if empty
this_part = re.sub(regex, '', this_part)
2022-04-18 08:57:47 +02:00
elif re.search(r'\<.*\>', this_part):
regex = r'\<\|'
2022-04-17 21:54:29 +02:00
this_part = re.sub(regex, '', this_part)
2022-04-18 08:57:47 +02:00
regex = r'\|.*\>'
2022-04-17 21:54:29 +02:00
this_part = re.sub(regex, '', this_part)
2022-04-18 08:57:47 +02:00
regex = r'\>'
2022-04-17 21:54:29 +02:00
this_part = re.sub(regex, '', this_part)
# Delete separator char at the begining of the string if any:
if this_part:
regex = '[-_ .]'
if re.match(regex, this_part[0]):
this_part = this_part[1:]
2022-04-17 21:58:56 +02:00
# Remove unwanted chars in filename
this_part = utils.filename_filter(this_part)
2021-11-06 18:04:46 +01:00
return this_part
2021-11-06 18:04:46 +01:00
def get_path(self, metadata: dict) -> list:
"""
2022-04-18 08:57:47 +02:00
path_format: <%Y-%d-%m>/%u<city>/<album>
Returns file path.
2021-11-06 18:04:46 +01:00
"""
path_format = self.path_format
2021-11-13 18:20:08 +01:00
# Each element in the list represents a folder.
# Fallback folders are supported and are nested lists.
path = []
path_parts = path_format.split('/')
for path_part in path_parts:
2022-04-17 21:54:29 +02:00
part = self.get_path_part(path_part, metadata)
2022-04-17 21:54:29 +02:00
if part != '':
# Check if all masks are substituted
2022-04-18 08:57:47 +02:00
if True in [c in part for c in '<>']:
2022-04-17 21:54:29 +02:00
self.log.error(
2022-04-18 08:57:47 +02:00
f"Format path part invalid: {part}"
2022-04-17 21:54:29 +02:00
)
sys.exit(1)
path.append(part)
2021-10-18 20:06:49 +02:00
# If last path is empty or start with dot
if part == '' or re.match(r'^\..*', part):
2022-04-17 21:58:56 +02:00
path.append(utils.filename_filter(metadata['filename']))
2021-11-06 18:04:46 +01:00
return os.path.join(*path)
2021-10-17 12:33:14 +02:00
2021-11-03 21:29:06 +01:00
class CollectionDb:
def __init__(self, root):
self.sqlite = Sqlite(root)
2022-01-29 07:48:33 +01:00
def _set_row_data(self, table, metadata):
2021-11-03 21:29:06 +01:00
row_data = {}
for title in self.sqlite.tables[table]['header']:
key = utils.camel2snake(title)
row_data[title] = metadata[key]
return row_data
def add_file_data(self, metadata):
"""Save metadata informations to db"""
2022-01-29 07:48:33 +01:00
loc_values = self._set_row_data('location', metadata)
2021-11-03 21:29:06 +01:00
metadata['location_id'] = self.sqlite.add_row('location', loc_values)
2022-01-29 07:48:33 +01:00
row_data = self._set_row_data('metadata', metadata)
2021-11-03 21:29:06 +01:00
self.sqlite.add_row('metadata', row_data)
class FileIO:
"""File Input/Ouput operations for collection"""
2021-11-12 09:04:53 +01:00
def __init__(self, dry_run=False):
2021-11-03 21:29:06 +01:00
# Options
self.dry_run = dry_run
2021-11-12 09:04:53 +01:00
self.log = LOG.getChild(self.__class__.__name__)
2021-11-03 21:29:06 +01:00
def copy(self, src_path, dest_path):
if not self.dry_run:
shutil.copy2(src_path, dest_path)
2021-11-12 09:04:53 +01:00
self.log.info(f'copy: {src_path} -> {dest_path}')
2021-11-03 21:29:06 +01:00
def move(self, src_path, dest_path):
if not self.dry_run:
# Move the file into the destination directory
shutil.move(src_path, dest_path)
2021-11-12 09:04:53 +01:00
self.log.info(f'move: {src_path} -> {dest_path}')
2021-11-03 21:29:06 +01:00
def remove(self, path):
if not self.dry_run:
os.remove(path)
2021-11-12 09:04:53 +01:00
self.log.info(f'remove: {path}')
2021-11-03 21:29:06 +01:00
2021-12-05 18:27:04 +01:00
def mkdir(self, directory):
if not self.dry_run:
directory.mkdir(exist_ok=True)
self.log.info(f'create dir: {directory}')
2021-11-06 16:35:35 +01:00
def rmdir(self, directory):
if not self.dry_run:
directory.rmdir()
2021-11-12 09:04:53 +01:00
self.log.info(f'remove dir: {directory}')
2021-11-03 21:29:06 +01:00
2021-11-06 16:35:35 +01:00
class Paths:
"""Get filtered files paths"""
2021-10-17 12:33:14 +02:00
2021-11-19 18:24:35 +01:00
def __init__(self, filters, interactive=False):
2021-11-06 16:35:35 +01:00
2021-11-19 18:24:35 +01:00
self.filters = filters
2021-11-06 16:35:35 +01:00
2021-11-19 18:24:35 +01:00
self.extensions = self.filters['extensions']
if not self.extensions:
self.extensions = set()
elif '%media' in self.extensions:
self.extensions.remove('%media')
self.extensions = self.extensions.union(Medias.extensions)
self.glob = self.filters['glob']
2021-11-06 16:35:35 +01:00
self.interactive = interactive
2021-11-12 09:04:53 +01:00
self.log = LOG.getChild(self.__class__.__name__)
2021-11-06 16:35:35 +01:00
self.paths_list = []
# Attributes
2021-11-06 16:35:35 +01:00
self.theme = request.load_theme()
def check(self, path):
"""
2021-11-13 10:03:53 +01:00
Check if path exist
:param: Path path
2021-11-06 16:35:35 +01:00
:return: Path path
"""
if not path.exists():
2021-11-12 09:04:53 +01:00
self.log.error(f'Directory {path} does not exist')
2021-11-06 16:35:35 +01:00
sys.exit(1)
return path
def get_images(self, path):
"""
:returns: iter
"""
for src_path in self.get_files(path):
dirname = src_path.parent.name
if dirname.find('similar_to') == 0:
continue
image = Image(src_path)
if image.is_image():
yield image
def get_files(self, path):
"""Recursively get files which match a path and extension.
:param Path path: Path to start recursive file listing
:returns: Path generator File
2021-11-06 16:35:35 +01:00
"""
for path0 in path.glob(self.glob):
if path0.is_dir():
continue
file_path = path0
subdirs = file_path.relative_to(path).parent
if self.glob == '*':
level = 0
else:
level = len(subdirs.parts)
if path / '.ordigi' in file_path.parents:
continue
2021-11-19 18:24:35 +01:00
if self.filters['max_deep'] is not None:
if level > self.filters['max_deep']:
2021-11-06 16:35:35 +01:00
continue
2021-11-19 18:24:35 +01:00
self.exclude = self.filters['exclude']
2021-11-06 16:35:35 +01:00
if self.exclude:
matched = False
for exclude in self.exclude:
if fnmatch(file_path, exclude):
matched = True
break
if matched:
continue
if (
not self.extensions
or PurePath(file_path).suffix.lower()[1:] in self.extensions
):
# return file_path and subdir
yield file_path
def walklevel(self, src_dir, maxlevel=None):
"""
Walk into input directory recursively until desired maxlevel
source: https://stackoverflow.com/questions/229186/os-walk-without-digging-into-directories-below
"""
src_dir = str(src_dir)
if not os.path.isdir(src_dir):
return None
num_sep = src_dir.count(os.path.sep)
for root, dirs, files in os.walk(src_dir):
level = root.count(os.path.sep) - num_sep
yield root, dirs, files, level
if maxlevel is not None and level >= maxlevel:
del dirs[:]
def _modify_selection(self):
"""
:params: list
:return: list
"""
message = "Bellow the file selection list, modify selection if needed"
questions = [
inquirer.Checkbox(
'selection',
message=message,
choices=self.paths_list,
default=self.paths_list,
),
]
prompt = inquirer.prompt(questions, theme=self.theme)
if prompt:
return prompt['selection']
sys.exit()
2021-11-06 16:35:35 +01:00
def get_paths_list(self, path):
self.paths_list = list(self.get_files(path))
if self.interactive:
self.paths_list = self._modify_selection()
print('Processing...')
return self.paths_list
class SortMedias:
"""Sort medias in collection"""
def __init__(
self,
fileio,
medias,
root,
db=None,
dry_run=False,
interactive=False,
):
# Arguments
2021-11-06 16:35:35 +01:00
self.fileio = fileio
self.medias = medias
self.root = root
# Options
self.db = db
self.dry_run = dry_run
self.interactive = interactive
2021-11-12 09:04:53 +01:00
self.log = LOG.getChild(self.__class__.__name__)
2021-11-06 16:35:35 +01:00
self.summary = Summary(self.root)
# Attributes
2021-11-06 16:35:35 +01:00
self.theme = request.load_theme()
def _checkcomp(self, dest_path, src_checksum):
"""Check file."""
if self.dry_run:
return True
dest_checksum = utils.checksum(dest_path)
if dest_checksum != src_checksum:
2021-11-12 09:04:53 +01:00
self.log.info(
2021-11-07 12:28:30 +01:00
"Source checksum and destination checksum are not the same"
2021-11-06 16:35:35 +01:00
)
return False
return True
2021-11-08 07:02:21 +01:00
def _record_file(self, src_path, dest_path, metadata, imp=False):
2021-11-03 21:29:06 +01:00
"""Check file and record the file to db"""
# Check if file remain the same
2021-11-08 07:02:21 +01:00
checksum = metadata['checksum']
2021-10-27 00:06:38 +02:00
if not self._checkcomp(dest_path, checksum):
2021-11-12 09:04:53 +01:00
self.log.error(f'Files {src_path} and {dest_path} are not identical')
2021-11-01 11:42:01 +01:00
self.summary.append('check', False, src_path, dest_path)
2021-10-27 00:06:38 +02:00
return False
2021-07-16 21:26:42 +02:00
2021-10-27 00:06:38 +02:00
# change media file_path to dest_path
if not self.dry_run:
2021-11-08 07:02:21 +01:00
updated = self.medias.update_exif_data(metadata)
2021-10-27 00:06:38 +02:00
if updated:
checksum = utils.checksum(dest_path)
2021-11-08 07:02:21 +01:00
metadata['checksum'] = checksum
2021-07-16 21:26:42 +02:00
2021-11-03 21:29:06 +01:00
if not self.dry_run:
2021-11-08 07:02:21 +01:00
self.db.add_file_data(metadata)
2021-11-03 21:29:06 +01:00
if imp != 'copy' and self.root in src_path.parents:
self.db.sqlite.delete_filepath(str(src_path.relative_to(self.root)))
2021-10-27 00:06:38 +02:00
return True
2021-11-06 16:35:35 +01:00
def _set_summary(self, result, src_path, dest_path, imp=False):
2021-11-03 21:29:06 +01:00
if result:
if imp:
self.summary.append('import', True, src_path, dest_path)
else:
self.summary.append('sort', True, src_path, dest_path)
else:
if imp:
self.summary.append('import', False, src_path, dest_path)
else:
self.summary.append('sort', False, src_path, dest_path)
2021-10-27 00:06:38 +02:00
2021-11-08 07:02:21 +01:00
def sort_file(self, src_path, dest_path, metadata, imp=False):
2021-11-03 21:29:06 +01:00
"""Sort file and register it to db"""
2022-04-17 22:05:13 +02:00
2021-11-03 21:29:06 +01:00
if imp == 'copy':
2021-11-06 16:35:35 +01:00
self.fileio.copy(src_path, dest_path)
2021-11-03 21:29:06 +01:00
else:
2021-11-06 16:35:35 +01:00
self.fileio.move(src_path, dest_path)
2021-10-27 00:06:38 +02:00
2021-11-03 21:29:06 +01:00
if self.db:
result = self._record_file(
2021-11-08 07:02:21 +01:00
src_path, dest_path, metadata, imp=imp
2021-11-03 21:29:06 +01:00
)
else:
result = True
2021-10-27 00:06:38 +02:00
2021-11-03 21:29:06 +01:00
self._set_summary(result, src_path, dest_path, imp)
2021-07-16 21:26:42 +02:00
2021-11-03 21:29:06 +01:00
return self.summary
2021-10-27 00:06:38 +02:00
def _create_directories(self):
2021-11-03 21:29:06 +01:00
"""Create a directory if it does not already exist.
:param Path: A fully qualified path of the to create.
:returns: bool
"""
2021-11-08 07:02:21 +01:00
for file_path, metadata in self.medias.datas.items():
relpath = os.path.dirname(metadata['file_path'])
2021-11-07 07:13:56 +01:00
directory_path = self.root / relpath
2021-11-03 21:29:06 +01:00
parts = directory_path.relative_to(self.root).parts
for i, _ in enumerate(parts):
2021-12-05 13:39:02 +01:00
dir_path = self.root / Path(*parts[0: i + 1])
2021-11-03 21:29:06 +01:00
if dir_path.is_file():
2021-11-12 09:04:53 +01:00
self.log.warning(f'Target directory {dir_path} is a file')
2021-11-03 21:29:06 +01:00
# Rename the src_file
if self.interactive:
prompt = [
inquirer.Text(
'file_path',
message="New name for" f"'{dir_path.name}' file",
),
]
answers = inquirer.prompt(prompt, theme=self.theme)
file_path = dir_path.parent / answers['file_path']
else:
file_path = dir_path.parent / (dir_path.name + '_file')
2021-11-12 09:04:53 +01:00
self.log.warning(f'Renaming {dir_path} to {file_path}')
2021-11-03 21:29:06 +01:00
if not self.dry_run:
shutil.move(dir_path, file_path)
2021-11-08 07:02:21 +01:00
metadata = self.medias.datas[dir_path]
self.medias.datas[file_path] = metadata
del(self.medias.datas[dir_path])
2021-11-03 21:29:06 +01:00
if not self.dry_run:
directory_path.mkdir(parents=True, exist_ok=True)
2021-11-12 09:04:53 +01:00
self.log.info(f'Create {directory_path}')
2021-11-03 21:29:06 +01:00
def check_conflicts(self, src_path, dest_path, remove_duplicates=False):
2021-11-07 12:28:30 +01:00
"""
2021-11-03 21:29:06 +01:00
Check if file can be copied or moved file to dest_path.
2021-11-07 12:28:30 +01:00
"""
2021-11-03 21:29:06 +01:00
# check for collisions
if src_path == dest_path:
2021-11-12 09:04:53 +01:00
self.log.info(f"File {dest_path} already sorted")
2021-11-03 21:29:06 +01:00
return 2
2021-11-07 12:28:30 +01:00
2021-11-03 21:29:06 +01:00
if dest_path.is_dir():
2021-11-12 09:04:53 +01:00
self.log.info(f"File {dest_path} is a existing directory")
2021-11-03 21:29:06 +01:00
return 1
2021-11-07 12:28:30 +01:00
if dest_path.is_file():
2021-11-12 09:04:53 +01:00
self.log.info(f"File {dest_path} already exist")
2021-11-03 21:29:06 +01:00
if remove_duplicates:
if filecmp.cmp(src_path, dest_path):
2021-11-12 09:04:53 +01:00
self.log.info(
2021-11-07 12:28:30 +01:00
"File in source and destination are identical. Duplicate will be ignored."
2021-11-03 21:29:06 +01:00
)
return 3
2021-11-07 12:28:30 +01:00
# name is same, but file is different
2021-11-12 09:04:53 +01:00
self.log.info(
2021-11-07 12:28:30 +01:00
f"File {src_path} and {dest_path} are different."
)
2021-11-03 21:29:06 +01:00
return 1
2021-11-07 12:28:30 +01:00
return 1
return 0
2021-10-27 00:06:38 +02:00
def _solve_conflicts(self, conflicts, remove_duplicates):
2021-08-27 12:45:25 +02:00
unresolved_conflicts = []
2021-10-27 00:06:38 +02:00
while conflicts != []:
2021-11-08 07:02:21 +01:00
src_path, dest_path, metadata = conflicts.pop()
2021-10-27 00:06:38 +02:00
# Check for conflict status again in case is has changed
2021-10-27 00:06:38 +02:00
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
for i in range(1, 100):
if conflict != 1:
break
2021-08-27 12:45:25 +02:00
# Add appendix to the name
suffix = dest_path.suffix
if i > 1:
stem = dest_path.stem.rsplit('_' + str(i - 1))[0]
else:
stem = dest_path.stem
dest_path = dest_path.parent / (stem + '_' + str(i) + suffix)
2021-10-27 00:06:38 +02:00
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
2021-07-16 21:26:42 +02:00
2021-10-27 00:06:38 +02:00
if conflict == 1:
# i = 100:
2021-11-08 07:02:21 +01:00
unresolved_conflicts.append((src_path, dest_path, metadata))
2021-11-12 09:04:53 +01:00
self.log.error(f"Too many appends for {dest_path}")
2021-07-16 21:26:42 +02:00
2021-11-08 07:02:21 +01:00
metadata['file_path'] = os.path.relpath(dest_path, self.root)
2021-11-07 07:13:56 +01:00
2021-11-08 07:02:21 +01:00
yield (src_path, dest_path, metadata), conflict
2021-07-16 21:26:42 +02:00
def sort_medias(self, imp=False, remove_duplicates=False):
2021-11-03 21:29:06 +01:00
"""
sort files and solve conflicts
"""
# Create directories
self._create_directories()
2021-11-03 21:29:06 +01:00
conflicts = []
2021-11-08 07:02:21 +01:00
for src_path, metadata in self.medias.datas.items():
dest_path = self.root / metadata['file_path']
2021-11-03 21:29:06 +01:00
conflict = self.check_conflicts(src_path, dest_path, remove_duplicates)
if not conflict:
self.sort_file(
2021-11-08 07:02:21 +01:00
src_path, dest_path, metadata, imp=imp
2021-11-03 21:29:06 +01:00
)
elif conflict == 1:
# There is conflict and file are different
2021-11-08 07:02:21 +01:00
conflicts.append((src_path, dest_path, metadata))
2021-11-03 21:29:06 +01:00
elif conflict == 3:
# Same file checksum
if imp == 'move':
2021-11-06 16:35:35 +01:00
self.fileio.remove(src_path)
2021-11-03 21:29:06 +01:00
elif conflict == 2:
# File already sorted
pass
if conflicts != []:
2021-12-05 13:39:02 +01:00
for files_data, conflict in self._solve_conflicts(
conflicts, remove_duplicates
):
2021-11-03 21:29:06 +01:00
2021-11-08 07:02:21 +01:00
src_path, dest_path, metadata = files_data
2021-11-03 21:29:06 +01:00
if not conflict:
self.sort_file(
2021-11-08 07:02:21 +01:00
src_path, dest_path, metadata, imp=imp
2021-11-03 21:29:06 +01:00
)
elif conflict == 1:
# There is unresolved conflict
self._set_summary(False, src_path, dest_path, imp)
elif conflict == 3:
# Same file checksum
if imp == 'move':
2021-11-06 16:35:35 +01:00
self.fileio.remove(src_path)
2021-11-03 21:29:06 +01:00
elif conflict == 2:
# File already sorted
pass
return self.summary
class Collection(SortMedias):
"""Class of the media collection."""
2021-11-08 07:02:21 +01:00
2021-11-19 18:24:35 +01:00
def __init__(self, root, cli_options=None):
2021-11-03 21:29:06 +01:00
2021-11-19 18:24:35 +01:00
if not cli_options:
cli_options = {}
2021-11-13 10:03:53 +01:00
2021-11-13 18:20:08 +01:00
self.root = root
2021-12-05 18:27:04 +01:00
self.log = LOG.getChild(self.__class__.__name__)
2021-11-13 18:20:08 +01:00
# Get config options
2021-11-19 18:24:35 +01:00
self.opt = self.get_config_options()
2021-11-13 10:03:53 +01:00
2021-11-13 18:20:08 +01:00
# Set client options
for option, value in cli_options.items():
2022-04-17 21:54:29 +02:00
if value not in (None, ()):
for section in self.opt:
self.opt[section][option] = value
2021-11-19 18:24:35 +01:00
self.exclude = self.opt['Filters']['exclude']
if not self.exclude:
self.exclude = set()
2021-11-13 10:03:53 +01:00
2021-11-19 18:24:35 +01:00
self.fileio = FileIO(self.opt['Terminal']['dry_run'])
2021-12-05 18:27:04 +01:00
self.root_is_valid()
self.db = CollectionDb(root)
2021-11-06 16:35:35 +01:00
self.paths = Paths(
2021-11-19 18:24:35 +01:00
self.opt['Filters'],
interactive=self.opt['Terminal']['interactive'],
2021-11-06 16:35:35 +01:00
)
2021-11-03 21:29:06 +01:00
2021-11-06 16:35:35 +01:00
self.medias = Medias(
self.paths,
2021-11-03 21:29:06 +01:00
root,
2021-11-19 18:24:35 +01:00
self.opt['Exif'],
2021-11-06 16:35:35 +01:00
self.db,
2021-11-19 18:24:35 +01:00
self.opt['Terminal']['interactive'],
2021-11-06 16:35:35 +01:00
)
# Features
super().__init__(
self.fileio,
self.medias,
root,
2021-11-03 21:29:06 +01:00
self.db,
2021-11-19 18:24:35 +01:00
self.opt['Terminal']['dry_run'],
self.opt['Terminal']['interactive'],
2021-11-03 21:29:06 +01:00
)
2021-11-06 16:35:35 +01:00
# Attributes
2021-11-13 10:03:53 +01:00
self.summary = Summary(self.root)
2021-11-03 21:29:06 +01:00
self.theme = request.load_theme()
2021-12-05 18:27:04 +01:00
def root_is_valid(self):
"""Check if collection path is valid"""
if self.root.exists():
if not self.root.is_dir():
self.log.error(f'Collection path {self.root} is not a directory')
sys.exit(1)
else:
self.log.error(f'Collection path {self.root} does not exist')
sys.exit(1)
2021-11-19 18:24:35 +01:00
def get_config_options(self):
2021-11-13 18:20:08 +01:00
"""Get collection config"""
2021-12-05 13:39:02 +01:00
config = Config(self.root.joinpath('.ordigi', 'ordigi.conf'))
2021-11-19 18:24:35 +01:00
return config.get_config_options()
2021-11-13 18:20:08 +01:00
2021-11-19 18:24:35 +01:00
def _set_option(self, section, option, cli_option):
2021-11-13 18:20:08 +01:00
"""if client option is set overwrite collection option value"""
if cli_option:
2021-11-19 18:24:35 +01:00
self.opt[section][option] = cli_option
2021-11-13 18:20:08 +01:00
2021-11-06 16:35:35 +01:00
def get_collection_files(self, exclude=True):
if exclude:
2021-11-19 18:24:35 +01:00
exclude = self.exclude
2021-11-03 21:29:06 +01:00
2021-11-06 16:35:35 +01:00
paths = Paths(
2021-12-05 13:39:02 +01:00
filters={
2021-11-19 18:24:35 +01:00
'exclude': exclude,
'extensions': None,
'glob': '**/*',
'max_deep': None,
},
interactive=self.opt['Terminal']['interactive'],
2021-11-06 16:35:35 +01:00
)
for file_path in paths.get_files(self.root):
yield file_path
2021-11-03 21:29:06 +01:00
2021-11-06 16:35:35 +01:00
def init(self, loc):
2021-11-07 12:28:30 +01:00
"""Init collection db"""
2021-11-06 16:35:35 +01:00
for file_path in self.get_collection_files():
2021-11-11 16:24:37 +01:00
metadata = self.medias.get_metadata(file_path, self.root, loc)
metadata['file_path'] = os.path.relpath(file_path, self.root)
2021-11-03 21:29:06 +01:00
2021-11-11 16:24:37 +01:00
self.db.add_file_data(metadata)
2022-01-29 07:48:33 +01:00
self.summary.append('update', True, file_path)
2021-11-03 21:29:06 +01:00
return self.summary
2021-10-15 06:41:22 +02:00
def check_db(self):
"""
Check if db FilePath match to collection filesystem
:returns: bool
"""
2021-11-06 16:35:35 +01:00
file_paths = list(self.get_collection_files())
2021-11-03 21:29:06 +01:00
db_rows = [row['FilePath'] for row in self.db.sqlite.get_rows('metadata')]
2021-10-15 06:41:22 +02:00
for file_path in file_paths:
relpath = os.path.relpath(file_path, self.root)
# If file not in database
if relpath not in db_rows:
2021-11-12 09:04:53 +01:00
self.log.error('Db data is not accurate')
self.log.info(f'{file_path} not in db')
2021-10-15 06:41:22 +02:00
return False
nb_files = len(file_paths)
nb_row = len(db_rows)
if nb_row != nb_files:
2021-11-12 09:04:53 +01:00
self.log.error('Db data is not accurate')
2021-10-15 06:41:22 +02:00
return False
return True
2021-12-05 18:27:04 +01:00
def check(self):
if self.db.sqlite.is_empty('metadata'):
self.log.error('Db data does not exist run `ordigi init`')
sys.exit(1)
elif not self.check_db():
self.log.error('Db data is not accurate run `ordigi update`')
sys.exit(1)
2021-11-03 21:29:06 +01:00
def _init_check_db(self, loc=None):
if self.db.sqlite.is_empty('metadata'):
self.init(loc)
elif not self.check_db():
2021-11-12 09:04:53 +01:00
self.log.error('Db data is not accurate run `ordigi update`')
sys.exit(1)
2021-12-05 18:27:04 +01:00
def clone(self, dest_path):
"""Clone collection in another location"""
self.check()
if not self.dry_run:
copy_tree(str(self.root), str(dest_path))
self.log.info(f'copy: {self.root} -> {dest_path}')
if not self.dry_run:
dest_collection = Collection(
dest_path, {'cache': True, 'dry_run': self.dry_run}
)
if not dest_collection.check_db():
self.summary.append('check', False)
return self.summary
2021-11-03 21:29:06 +01:00
def update(self, loc):
2021-11-07 12:28:30 +01:00
"""Update collection db"""
2021-11-06 16:35:35 +01:00
file_paths = list(self.get_collection_files())
db_rows = list(self.db.sqlite.get_rows('metadata'))
2021-10-15 06:41:22 +02:00
invalid_db_rows = set()
2021-10-27 00:06:38 +02:00
db_paths = set()
2021-10-15 06:41:22 +02:00
for db_row in db_rows:
abspath = self.root / db_row['FilePath']
if abspath not in file_paths:
invalid_db_rows.add(db_row)
2021-10-27 00:06:38 +02:00
db_paths.add(db_row['FilePath'])
2021-10-15 06:41:22 +02:00
for file_path in file_paths:
relpath = os.path.relpath(file_path, self.root)
# If file not in database
2021-10-27 00:06:38 +02:00
if relpath not in db_paths:
2021-11-11 16:24:37 +01:00
metadata = self.medias.get_metadata(file_path, self.root, loc)
metadata['file_path'] = relpath
2021-10-15 06:41:22 +02:00
# Check if file checksum is in invalid rows
row = []
for row in invalid_db_rows:
2021-11-11 16:24:37 +01:00
if row['Checksum'] == metadata['checksum']:
2021-10-15 06:41:22 +02:00
# file have been moved without registering to db
2021-11-11 16:24:37 +01:00
metadata['src_path'] = row['SrcPath']
2021-10-15 06:41:22 +02:00
# Check if row FilePath is a subpath of relpath
if relpath.startswith(row['FilePath']):
2022-04-17 21:54:29 +02:00
path = os.path.relpath(relpath, row['FilePath'])
2021-11-11 16:24:37 +01:00
metadata['subdirs'] = row['Subdirs'] + path
metadata['Filename'] = row['Filename']
2021-10-15 06:41:22 +02:00
break
# set row attribute to the file
2021-11-11 16:24:37 +01:00
self.db.add_file_data(metadata)
2021-11-01 11:42:01 +01:00
self.summary.append('update', file_path)
2021-10-15 06:41:22 +02:00
# Finally delete invalid rows
for row in invalid_db_rows:
2021-11-03 21:29:06 +01:00
self.db.sqlite.delete_filepath(row['FilePath'])
2021-10-15 06:41:22 +02:00
return self.summary
2021-09-29 07:36:47 +02:00
def check_files(self):
2021-11-07 12:28:30 +01:00
"""Check file integrity."""
2021-11-06 16:35:35 +01:00
for file_path in self.paths.get_files(self.root):
checksum = utils.checksum(file_path)
relpath = file_path.relative_to(self.root)
2021-11-03 21:29:06 +01:00
if checksum == self.db.sqlite.get_checksum(relpath):
2021-12-05 13:39:02 +01:00
self.summary.append('check', True, file_path)
else:
2021-12-05 18:27:04 +01:00
self.log.error(f'{file_path} is corrupted')
2021-11-01 11:42:01 +01:00
self.summary.append('check', False, file_path)
return self.summary
2021-11-06 16:35:35 +01:00
def set_utime_from_metadata(self, date_media, file_path):
"""Set the modification time on the file based on the file name."""
# Initialize date taken to what's returned from the metadata function.
os.utime(
file_path, (int(datetime.now().timestamp()), int(date_media.timestamp()))
)
def remove_excluded_files(self):
"""Remove excluded files in collection"""
# get all files
for file_path in self.get_collection_files(exclude=False):
2021-11-19 18:24:35 +01:00
for exclude in self.exclude:
2021-11-06 16:35:35 +01:00
if fnmatch(file_path, exclude):
self.fileio.remove(file_path)
self.summary.append('remove', True, file_path)
break
return self.summary
def remove_empty_subdirs(self, directories, src_dirs):
"""Remove empty subdir after moving files"""
parents = set()
for directory in directories:
if not directory.is_dir():
continue
if str(directory) in src_dirs:
continue
# if folder empty, delete it
files = os.listdir(directory)
if len(files) == 0:
2021-11-06 16:35:35 +01:00
self.fileio.rmdir(directory)
self.log.info(f"remove empty subdir: {directory}")
if self.root in directory.parent.parents:
parents.add(directory.parent)
if parents != set():
2021-11-06 16:35:35 +01:00
self.remove_empty_subdirs(parents, src_dirs)
def remove_empty_folders(self, directory, remove_root=True):
"""Remove empty sub-folders in collection"""
if not os.path.isdir(directory):
2021-11-01 11:42:01 +01:00
self.summary.append('remove', False, directory)
return self.summary
# remove empty subfolders
files = os.listdir(directory)
if len(files):
2021-11-07 12:28:30 +01:00
for i in files:
fullpath = os.path.join(directory, i)
if os.path.isdir(fullpath):
self.remove_empty_folders(fullpath)
# if folder empty, delete it
files = os.listdir(directory)
if len(files) == 0 and remove_root:
2021-11-12 09:04:53 +01:00
self.log.info(f"Removing empty folder: {directory}")
2021-11-19 18:24:35 +01:00
if not self.opt['Terminal']['dry_run']:
os.rmdir(directory)
2021-11-01 11:42:01 +01:00
self.summary.append('remove', True, directory)
return self.summary
def sort_files(
self, src_dirs, path_format, loc, imp=False, remove_duplicates=False
2021-12-05 13:39:02 +01:00
):
2021-08-08 13:11:02 +02:00
"""
Sort files into appropriate folder
"""
2021-10-15 06:41:22 +02:00
# Check db
2021-11-03 21:29:06 +01:00
self._init_check_db(loc)
2021-10-15 06:41:22 +02:00
2021-11-13 18:20:08 +01:00
# if path format client option is set overwrite it
2021-11-19 18:24:35 +01:00
self._set_option('Path', 'path_format', path_format)
2021-11-13 18:20:08 +01:00
# Get medias data
subdirs = set()
2021-11-11 16:24:37 +01:00
for src_path, metadata in self.medias.get_metadatas(src_dirs, imp=imp, loc=loc):
# Get the destination path according to metadata
2022-04-17 21:54:29 +02:00
self.log.info(f'src_path: {src_path}')
2021-11-19 18:24:35 +01:00
fpath = FPath(path_format, self.opt['Path']['day_begins'])
2021-11-11 16:24:37 +01:00
metadata['file_path'] = fpath.get_path(metadata)
subdirs.add(src_path.parent)
2021-11-11 16:24:37 +01:00
self.medias.datas[src_path] = copy(metadata)
2021-10-27 00:06:38 +02:00
# Sort files and solve conflicts
self.summary = self.sort_medias(imp, remove_duplicates)
2021-07-16 21:26:42 +02:00
2021-11-03 21:29:06 +01:00
if imp != 'copy':
2021-11-06 16:35:35 +01:00
self.remove_empty_subdirs(subdirs, src_dirs)
2021-07-31 21:26:04 +02:00
2021-11-03 21:29:06 +01:00
if not self.check_db():
2021-11-01 11:42:01 +01:00
self.summary.append('check', False)
return self.summary
2021-08-27 12:45:25 +02:00
2021-11-12 09:04:53 +01:00
def dedup_path(self, paths, dedup_regex=None, remove_duplicates=False):
"""Deduplicate file path parts"""
2021-11-06 16:35:35 +01:00
# Check db
self._init_check_db()
# Delimiter regex
delim = r'[-_ .]'
# Numeric date item regex
d = r'\d{2}'
2021-07-16 21:26:42 +02:00
# Numeric date regex
if not dedup_regex:
date_num2 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}|{delim}{d}{delim}{d}[^0-9])'
)
date_num3 = re.compile(
fr'([^0-9]{d}{delim}{d}{delim}{d}{delim}|{delim}{d}{delim}{d}{delim}{d}[^0-9])'
)
default = re.compile(r'([^-_ .]+[-_ .])')
dedup_regex = [date_num3, date_num2, default]
2021-10-27 00:06:38 +02:00
# Get medias data
2021-11-11 16:24:37 +01:00
for src_path, metadata in self.medias.get_metadatas(paths):
# Deduplicate the path
path_parts = src_path.relative_to(self.root).parts
dedup_path = []
for path_part in path_parts:
2021-11-06 16:35:35 +01:00
items = utils.split_part(dedup_regex.copy(), path_part)
2021-07-26 20:50:51 +02:00
filtered_items = []
for item in items:
if item not in filtered_items:
filtered_items.append(item)
dedup_path.append(''.join(filtered_items))
2021-11-11 16:24:37 +01:00
metadata['file_path'] = os.path.join(*dedup_path)
self.medias.datas[src_path] = copy(metadata)
# Sort files and solve conflicts
self.sort_medias(remove_duplicates=remove_duplicates)
2021-11-03 21:29:06 +01:00
if not self.check_db():
2021-11-01 11:42:01 +01:00
self.summary.append('check', False)
2021-07-26 20:50:51 +02:00
2021-10-27 00:06:38 +02:00
return self.summary
2021-07-26 20:50:51 +02:00
def _find_similar_images(self, image, images, path, dest_dir, similarity=80):
if not image.img_path.is_file():
return False
2021-09-18 22:06:34 +02:00
name = image.img_path.stem
2021-11-07 07:13:56 +01:00
directory_name = os.path.join(dest_dir, name.replace('.', '_'))
2021-07-26 20:50:51 +02:00
for img_path in images.find_similar(image, similarity):
2021-11-06 16:35:35 +01:00
self.paths.paths_list.append(img_path)
2021-10-15 06:41:22 +02:00
2021-11-11 16:24:37 +01:00
metadata = self.medias.get_metadata(img_path, path)
2021-11-08 07:02:21 +01:00
relpath = os.path.join(directory_name, img_path.name)
2021-11-11 16:24:37 +01:00
metadata['file_path'] = relpath
self.medias.datas[img_path] = copy(metadata)
2021-07-26 20:50:51 +02:00
if self.medias.datas:
# Found similar images to image
2021-11-06 16:35:35 +01:00
self.paths.paths_list.append(image.img_path)
2021-11-11 16:24:37 +01:00
metadata = self.medias.get_metadata(image.img_path, path)
2021-11-07 07:13:56 +01:00
relpath = os.path.join(directory_name, image.img_path.name)
2021-11-11 16:24:37 +01:00
metadata['file_path'] = relpath
self.medias.datas[image.img_path] = copy(metadata)
2021-07-26 20:50:51 +02:00
return True
2021-10-15 06:41:22 +02:00
def sort_similar_images(self, path, similarity=80, remove_duplicates=False):
"""Sort similar images using imagehash library"""
# Check db
self._init_check_db()
2021-11-07 07:13:56 +01:00
dest_dir = 'similar_images'
2021-11-06 16:35:35 +01:00
path = self.paths.check(path)
2021-11-06 16:35:35 +01:00
images_paths = set(self.paths.get_images(path))
2021-11-12 09:04:53 +01:00
images = Images(images_paths)
2021-11-03 21:29:06 +01:00
nb_row_ini = self.db.sqlite.len('metadata')
for image in images_paths:
2021-11-08 07:02:21 +01:00
# Clear datas in every loops
self.medias.datas = {}
similar_images = self._find_similar_images(
image, images, path, dest_dir, similarity
)
if similar_images:
# Move the simlars file into the destination directory
self.sort_medias(remove_duplicates=remove_duplicates)
2021-07-26 20:50:51 +02:00
2021-11-03 21:29:06 +01:00
nb_row_end = self.db.sqlite.len('metadata')
2021-10-15 06:41:22 +02:00
if nb_row_ini and nb_row_ini != nb_row_end:
2021-11-12 09:04:53 +01:00
self.log.error('Nb of row have changed unexpectedly')
2021-10-15 06:41:22 +02:00
2021-11-03 21:29:06 +01:00
if not self.check_db():
2021-11-01 11:42:01 +01:00
self.summary.append('check', False)
2021-10-15 06:41:22 +02:00
2021-10-27 00:06:38 +02:00
return self.summary
2021-10-17 12:33:14 +02:00
2022-04-17 22:05:13 +02:00
def edit_metadata(self, paths, keys, loc=None, overwrite=False):
"""Edit metadata and exif data for given key"""
2021-10-23 07:51:53 +02:00
self._init_check_db()
2022-04-17 22:05:13 +02:00
for file_path, media in self.medias.get_medias_datas(paths, loc=loc):
2022-01-29 07:48:33 +01:00
media.metadata['file_path'] = os.path.relpath(file_path, self.root)
2022-04-17 22:05:13 +02:00
for key in keys:
print()
value = media.metadata[key]
if overwrite or not value:
print(f"FILE: '{file_path}'")
if overwrite:
print(f"{key}: '{value}'")
if overwrite or not value:
# Prompt value for given key for file_path
prompt = [
inquirer.Text('value', message=key),
]
answer = inquirer.prompt(prompt, theme=self.theme)
# answer = {'value': '03-12-2021 08:12:35'}
# Validate value
if key in ('date_original', 'date_created', 'date_modified'):
# Check date format
value = media.get_date_format(answer['value'])
else:
if not answer[key].isalnum():
print("Invalid entry, use alphanumeric chars")
value = inquirer.prompt(prompt, theme=self.theme)
result = False
if value:
media.metadata[key] = value
# Update database
self.db.add_file_data(media.metadata)
# Update exif data
exif = WriteExif(
file_path,
media.metadata,
ignore_tags=self.opt['Exif']['ignore_tags'],
)
result = exif.set_key_values(key, value)
if result:
self.summary.append('update', True, file_path)
else:
self.summary.append('update', False, file_path)
2021-10-23 07:51:53 +02:00
return self.summary