Refactoring config and geolocation methods

This commit is contained in:
Cédric Leporcq 2021-08-24 17:23:51 +02:00
parent 67f3cd471a
commit 016329e044
8 changed files with 214 additions and 200 deletions

View File

@ -15,3 +15,4 @@ name2=.DS_Store
[Geolocation] [Geolocation]
geocoder=Nominatim geocoder=Nominatim
prefer_english_names=False prefer_english_names=False
# timeout=1

View File

@ -7,11 +7,12 @@ from datetime import datetime
import click import click
from ordigi import config from ordigi.config import Config
from ordigi import constants from ordigi import constants
from ordigi import log from ordigi import log
from ordigi.database import Db from ordigi.database import Db
from ordigi.filesystem import FileSystem from ordigi.filesystem import FileSystem
from ordigi.geolocation import GeoLocation
from ordigi.media import Media, get_all_subclasses from ordigi.media import Media, get_all_subclasses
from ordigi.summary import Summary from ordigi.summary import Summary
@ -98,29 +99,24 @@ def _sort(debug, dry_run, destination, clean, copy, exclude_regex, filter_by_ext
if not os.path.exists(destination): if not os.path.exists(destination):
logger.error(f'Directory {destination} does not exist') logger.error(f'Directory {destination} does not exist')
conf = config.load_config(constants.CONFIG_FILE) config = Config(constants.CONFIG_FILE)
path_format = config.get_path_definition(conf) opt = config.get_options()
# if no exclude list was passed in we check if there's a config # if no exclude list was passed in we check if there's a config
if len(exclude_regex) == 0: if len(exclude_regex) == 0:
if 'Exclusions' in conf: exclude_regex = opt['exclude_regex']
exclude_regex = [value for key, value in conf.items('Exclusions')]
exclude_regex_list = set(exclude_regex) exclude_regex_list = set(exclude_regex)
# Initialize Db # Initialize Db
db = Db(destination) db = Db(destination)
if 'Path' in conf and 'day_begins' in conf['Path']: filesystem = FileSystem(cache, opt['day_begins'], dry_run, exclude_regex_list,
config_directory = conf['Path'] filter_by_ext, logger, max_deep, mode, opt['path_format'])
day_begins = int(config_directory['day_begins'])
else: loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], opt['timeout'])
day_begins = 0
filesystem = FileSystem(cache, day_begins, dry_run, exclude_regex_list,
filter_by_ext, logger, max_deep, mode, path_format)
summary, has_errors = filesystem.sort_files(paths, destination, db, summary, has_errors = filesystem.sort_files(paths, destination, db,
remove_duplicates, ignore_tags) loc, remove_duplicates, ignore_tags)
if clean: if clean:
remove_empty_folders(destination, logger) remove_empty_folders(destination, logger)

View File

@ -1,25 +1,42 @@
"""Load config file as a singleton."""
from configparser import RawConfigParser from configparser import RawConfigParser
from os import path from os import path
from ordigi import constants from ordigi import constants
from geopy.geocoders import options as gopt
def write(conf_file, config): class Config:
with open(conf_file, 'w') as conf_file: """Manage config file"""
config.write(conf_file)
def __init__(self, conf_path=None, conf={}):
self.conf_path = conf_path
if conf_path == None:
self.conf = conf
else:
self.conf = self.load_config()
def write(self, conf):
with open(self.conf_path, 'w') as conf_path:
conf.write(conf_path)
return True return True
return False return False
def load_config(file): def load_config(self):
if not path.exists(file): if not path.exists(self.conf_path):
return {} return {}
config = RawConfigParser() conf = RawConfigParser()
config.read(file) conf.read(self.conf_path)
return config return conf
def get_path_definition(config): def get_option(self, option, section):
if section in self.conf and option in self.conf[section]:
return self.conf[section][option]
return False
def get_path_definition(self):
"""Returns a list of folder definitions. """Returns a list of folder definitions.
Each element in the list represents a folder. Each element in the list represents a folder.
@ -28,20 +45,48 @@ def get_path_definition(config):
:returns: string :returns: string
""" """
if 'Path' in config: if 'Path' in self.conf:
if 'format' in config['Path']: if 'format' in self.conf['Path']:
return config['Path']['format'] return self.conf['Path']['format']
elif 'dirs_path' and 'name' in config['Path']: elif 'dirs_path' and 'name' in self.conf['Path']:
return config['Path']['dirs_path'] + '/' + config['Path']['name'] return self.conf['Path']['dirs_path'] + '/' + self.conf['Path']['name']
return constants.default_path + '/' + constants.default_name return constants.default_path + '/' + constants.default_name
def get_geocoder(): def get_options(self):
config = load_config(constants.CONFIG_FILE) """Get config options
if 'Geolocation' in config and 'geocoder' in config['Geolocation']: :returns: dict
geocoder = config['Geolocation']['geocoder'] """
if geocoder in ('Nominatim', ):
return geocoder
return constants.default_geocoder options = {}
geocoder = self.get_option('geocoder', 'Geolocation')
if geocoder and geocoder in ('Nominatim', ):
options['geocoder'] = geocoder
else:
options['geocoder'] = constants.default_geocoder
prefer_english_names = self.get_option('prefer_english_names', 'Geolocation')
if prefer_english_names:
options['prefer_english_names'] = bool(prefer_english_names)
else:
options['prefer_english_names'] = False
timeout = self.get_option('timeout', 'Geolocation')
if timeout:
options['timeout'] = timeout
else:
options['timeout'] = gopt.default_timeout
options['path_format'] = self.get_path_definition()
if 'Path' in self.conf and 'day_begins' in self.conf['Path']:
config_directory = self.conf['Path']
options['day_begins'] = int(config_directory['day_begins'])
else:
options['day_begins'] = 0
if 'Exclusions' in self.conf:
options['exclude_regex'] = [value for key, value in self.conf.items('Exclusions')]
return options

View File

@ -15,7 +15,6 @@ import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ordigi import constants from ordigi import constants
from ordigi import geolocation
from ordigi import media from ordigi import media
from ordigi.media import Media, get_all_subclasses from ordigi.media import Media, get_all_subclasses
@ -189,7 +188,7 @@ class FileSystem(object):
return folder_name return folder_name
def get_part(self, item, mask, metadata, db, subdirs): def get_part(self, item, mask, metadata, db, subdirs, loc):
"""Parse a specific folder's name given a mask and metadata. """Parse a specific folder's name given a mask and metadata.
:param item: Name of the item as defined in the path (i.e. date from %date) :param item: Name of the item as defined in the path (i.e. date from %date)
@ -215,7 +214,7 @@ class FileSystem(object):
if date is not None: if date is not None:
part = date.strftime(mask) part = date.strftime(mask)
elif item in ('location', 'city', 'state', 'country'): elif item in ('location', 'city', 'state', 'country'):
place_name = geolocation.place_name( place_name = loc.place_name(
metadata['latitude'], metadata['latitude'],
metadata['longitude'], metadata['longitude'],
db, db,
@ -251,7 +250,7 @@ class FileSystem(object):
return part return part
def get_path(self, metadata, db, subdirs='', whitespace_sub='_'): def get_path(self, metadata, db, loc, subdirs='', whitespace_sub='_'):
"""path_format: {%Y-%d-%m}/%u{city}/{album} """path_format: {%Y-%d-%m}/%u{city}/{album}
Returns file path. Returns file path.
@ -272,7 +271,7 @@ class FileSystem(object):
# parts = re.split(mask, this_part) # parts = re.split(mask, this_part)
# parts = this_part.split('%')[1:] # parts = this_part.split('%')[1:]
part = self.get_part(item, matched.group()[1:-1], metadata, db, part = self.get_part(item, matched.group()[1:-1], metadata, db,
subdirs) subdirs, loc)
part = part.strip() part = part.strip()
@ -570,7 +569,7 @@ class FileSystem(object):
return result return result
def sort_files(self, paths, destination, db, remove_duplicates=False, def sort_files(self, paths, destination, db, loc, remove_duplicates=False,
ignore_tags=set()): ignore_tags=set()):
""" """
Sort files into appropriate folder Sort files into appropriate folder
@ -592,7 +591,7 @@ class FileSystem(object):
if media: if media:
metadata = media.get_metadata() metadata = media.get_metadata()
# Get the destination path according to metadata # Get the destination path according to metadata
file_path = self.get_path(metadata, db, subdirs=subdirs) file_path = self.get_path(metadata, db, loc, subdirs=subdirs)
else: else:
# Keep same directory structure # Keep same directory structure
file_path = os.path.relpath(src_path, path) file_path = os.path.relpath(src_path, path)

View File

@ -1,20 +1,25 @@
"""Look up geolocation information for media objects."""
from os import path from os import path
import geopy import geopy
from geopy.geocoders import Nominatim from geopy.geocoders import Nominatim, options
import logging import logging
from ordigi import constants from ordigi import config
from ordigi.config import load_config, get_geocoder
__KEY__ = None __KEY__ = None
__DEFAULT_LOCATION__ = 'Unknown Location' __DEFAULT_LOCATION__ = 'Unknown Location'
__PREFER_ENGLISH_NAMES__ = None
def coordinates_by_name(name, db): class GeoLocation:
"""Look up geolocation information for media objects."""
def __init__(self, geocoder='Nominatim', prefer_english_names=False, timeout=options.default_timeout):
self.geocoder = geocoder
self.prefer_english_names = prefer_english_names
self.timeout = timeout
def coordinates_by_name(self, name, db, timeout=options.default_timeout):
# Try to get cached location first # Try to get cached location first
cached_coordinates = db.get_location_coordinates(name) cached_coordinates = db.get_location_coordinates(name)
if(cached_coordinates is not None): if(cached_coordinates is not None):
@ -24,9 +29,9 @@ def coordinates_by_name(name, db):
} }
# If the name is not cached then we go ahead with an API lookup # If the name is not cached then we go ahead with an API lookup
geocoder = get_geocoder() geocoder = self.geocoder
if geocoder == 'Nominatim': if geocoder == 'Nominatim':
locator = Nominatim(user_agent='myGeocoder') locator = Nominatim(user_agent='myGeocoder', timeout=timeout)
geolocation_info = locator.geocode(name) geolocation_info = locator.geocode(name)
if geolocation_info is not None: if geolocation_info is not None:
return { return {
@ -38,49 +43,7 @@ def coordinates_by_name(name, db):
return None return None
def place_name(self, lat, lon, db, cache=True, logger=logging.getLogger(), timeout=options.default_timeout):
def decimal_to_dms(decimal):
decimal = float(decimal)
decimal_abs = abs(decimal)
minutes, seconds = divmod(decimal_abs*3600, 60)
degrees, minutes = divmod(minutes, 60)
degrees = degrees
sign = 1 if decimal >= 0 else -1
return (degrees, minutes, seconds, sign)
def dms_to_decimal(degrees, minutes, seconds, direction=' '):
sign = 1
if direction[0] in 'WSws':
sign = -1
return (degrees + minutes / 60 + seconds / 3600) * sign
def dms_string(decimal, type='latitude'):
# Example string -> 38 deg 14' 27.82" S
dms = decimal_to_dms(decimal)
if type == 'latitude':
direction = 'N' if decimal >= 0 else 'S'
elif type == 'longitude':
direction = 'E' if decimal >= 0 else 'W'
return '{} deg {}\' {}" {}'.format(dms[0], dms[1], dms[2], direction)
def get_prefer_english_names():
global __PREFER_ENGLISH_NAMES__
if __PREFER_ENGLISH_NAMES__ is not None:
return __PREFER_ENGLISH_NAMES__
config = load_config(constants.CONFIG_FILE)
if('prefer_english_names' not in config['Geolocation']):
return False
__PREFER_ENGLISH_NAMES__ = bool(config['Geolocation']['prefer_english_names'])
return __PREFER_ENGLISH_NAMES__
def place_name(lat, lon, db, cache=True, logger=logging.getLogger()):
lookup_place_name_default = {'default': __DEFAULT_LOCATION__} lookup_place_name_default = {'default': __DEFAULT_LOCATION__}
if(lat is None or lon is None): if(lat is None or lon is None):
return lookup_place_name_default return lookup_place_name_default
@ -102,9 +65,9 @@ def place_name(lat, lon, db, cache=True, logger=logging.getLogger()):
return cached_place_name return cached_place_name
lookup_place_name = {} lookup_place_name = {}
geocoder = get_geocoder() geocoder = self.geocoder
if geocoder == 'Nominatim': if geocoder == 'Nominatim':
geolocation_info = lookup_osm(lat, lon, logger) geolocation_info = self.lookup_osm(lat, lon, logger, timeout)
else: else:
raise NameError(geocoder) raise NameError(geocoder)
@ -131,18 +94,21 @@ def place_name(lat, lon, db, cache=True, logger=logging.getLogger()):
return lookup_place_name return lookup_place_name
def lookup_osm(lat, lon, logger=logging.getLogger()): def lookup_osm(self, lat, lon, logger=logging.getLogger(), timeout=options.default_timeout):
prefer_english_names = get_prefer_english_names()
try: try:
locator = Nominatim(user_agent='myGeocoder') locator = Nominatim(user_agent='myGeocoder', timeout=timeout)
coords = (lat, lon) coords = (lat, lon)
if(prefer_english_names): if(self.prefer_english_names):
lang='en' lang='en'
else: else:
lang='local' lang='local'
return locator.reverse(coords, language=lang).raw locator_reverse = locator.reverse(coords, language=lang)
except geopy.exc.GeocoderUnavailable as e: if locator_reverse is not None:
return locator_reverse.raw
else:
return None
except geopy.exc.GeocoderUnavailable or geopy.exc.GeocoderServiceError as e:
logger.error(e) logger.error(e)
return None return None
# Fix *** TypeError: `address` must not be None # Fix *** TypeError: `address` must not be None

View File

@ -6,7 +6,7 @@ from pathlib import Path
import shutil import shutil
import tempfile import tempfile
from ordigi import config from ordigi.config import Config
from ordigi.exiftool import _ExifToolProc from ordigi.exiftool import _ExifToolProc
ORDIGI_PATH = Path(__file__).parent.parent ORDIGI_PATH = Path(__file__).parent.parent
@ -41,7 +41,8 @@ def conf_path():
'geocoder': 'Nominatium' 'geocoder': 'Nominatium'
} }
conf_path = Path(tmp_path, "ordigi.conf") conf_path = Path(tmp_path, "ordigi.conf")
config.write(conf_path, conf) config = Config(conf_path)
config.write(conf)
yield conf_path yield conf_path

View File

@ -4,7 +4,7 @@ import shutil
import tempfile import tempfile
from unittest import mock from unittest import mock
from ordigi import config from ordigi.config import Config
# Helpers # Helpers
import random import random
@ -21,7 +21,8 @@ class TestConfig:
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def conf(self, conf_path): def conf(self, conf_path):
return config.load_config(conf_path) config = Config(conf_path)
return config.load_config()
def test_write(self, conf_path): def test_write(self, conf_path):
assert conf_path.is_file() assert conf_path.is_file()
@ -38,20 +39,21 @@ class TestConfig:
def test_load_config_no_exist(self): def test_load_config_no_exist(self):
# test file not exist # test file not exist
conf = config.load_config('filename') config = Config('filename')
assert conf == {} assert config.conf == {}
def test_load_config_invalid(self, conf_path): def test_load_config_invalid(self, conf_path):
# test invalid config # test invalid config
write_random_file(conf_path) write_random_file(conf_path)
with pytest.raises(Exception) as e: with pytest.raises(Exception) as e:
config.load_config(conf_path) config = Config(conf_path)
assert e.typename == 'MissingSectionHeaderError' assert e.typename == 'MissingSectionHeaderError'
def test_get_path_definition(self, conf): def test_get_path_definition(self, conf):
""" """
Get path definition from config Get path definition from config
""" """
path = config.get_path_definition(conf) config = Config(conf=conf)
path = config.get_path_definition()
assert path == '%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder}/{%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext}' assert path == '%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder}/{%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext}'

View File

@ -10,9 +10,10 @@ from time import sleep
from .conftest import copy_sample_files from .conftest import copy_sample_files
from ordigi import constants from ordigi import constants
from ordigi.database import Db from ordigi.database import Db
from ordigi.filesystem import FileSystem
from ordigi.media import Media
from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exiftool from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exiftool
from ordigi.filesystem import FileSystem
from ordigi.geolocation import GeoLocation
from ordigi.media import Media
@pytest.mark.skip() @pytest.mark.skip()
@ -67,12 +68,13 @@ class TestFilesystem:
exif_data = ExifToolCaching(str(file_path)).asdict() exif_data = ExifToolCaching(str(file_path)).asdict()
metadata = media.get_metadata() metadata = media.get_metadata()
loc = GeoLocation()
for item, regex in items.items(): for item, regex in items.items():
for mask in masks: for mask in masks:
matched = re.search(regex, mask) matched = re.search(regex, mask)
if matched: if matched:
part = filesystem.get_part(item, mask[1:-1], part = filesystem.get_part(item, mask[1:-1],
metadata, Db(tmp_path), subdirs) metadata, Db(tmp_path), subdirs, loc)
# check if part is correct # check if part is correct
assert isinstance(part, str), file_path assert isinstance(part, str), file_path
if item == 'basename': if item == 'basename':
@ -138,8 +140,9 @@ class TestFilesystem:
def test_sort_files(self, tmp_path): def test_sort_files(self, tmp_path):
db = Db(tmp_path) db = Db(tmp_path)
filesystem = FileSystem(path_format=self.path_format) filesystem = FileSystem(path_format=self.path_format)
loc = GeoLocation()
summary, has_errors = filesystem.sort_files([self.src_paths], tmp_path, db) summary, has_errors = filesystem.sort_files([self.src_paths],
tmp_path, db, loc)
# Summary is created and there is no errors # Summary is created and there is no errors
assert summary, summary assert summary, summary
@ -154,7 +157,8 @@ class TestFilesystem:
filesystem = FileSystem(path_format=self.path_format, mode=mode) filesystem = FileSystem(path_format=self.path_format, mode=mode)
# copy mode # copy mode
src_path = Path(self.src_paths, 'photo.png') src_path = Path(self.src_paths, 'photo.png')
dest_path = Path(tmp_path,'photo_copy.png') name = 'photo_' + mode + '.png'
dest_path = Path(tmp_path, name)
src_checksum = filesystem.checksum(src_path) src_checksum = filesystem.checksum(src_path)
result_copy = filesystem.sort_file(src_path, dest_path) result_copy = filesystem.sort_file(src_path, dest_path)
assert result_copy assert result_copy