Refactoring config and geolocation methods

This commit is contained in:
Cédric Leporcq 2021-08-24 17:23:51 +02:00
parent 67f3cd471a
commit 016329e044
8 changed files with 214 additions and 200 deletions

View File

@ -15,3 +15,4 @@ name2=.DS_Store
[Geolocation] [Geolocation]
geocoder=Nominatim geocoder=Nominatim
prefer_english_names=False prefer_english_names=False
# timeout=1

View File

@ -7,11 +7,12 @@ from datetime import datetime
import click import click
from ordigi import config from ordigi.config import Config
from ordigi import constants from ordigi import constants
from ordigi import log from ordigi import log
from ordigi.database import Db from ordigi.database import Db
from ordigi.filesystem import FileSystem from ordigi.filesystem import FileSystem
from ordigi.geolocation import GeoLocation
from ordigi.media import Media, get_all_subclasses from ordigi.media import Media, get_all_subclasses
from ordigi.summary import Summary from ordigi.summary import Summary
@ -98,29 +99,24 @@ def _sort(debug, dry_run, destination, clean, copy, exclude_regex, filter_by_ext
if not os.path.exists(destination): if not os.path.exists(destination):
logger.error(f'Directory {destination} does not exist') logger.error(f'Directory {destination} does not exist')
conf = config.load_config(constants.CONFIG_FILE) config = Config(constants.CONFIG_FILE)
path_format = config.get_path_definition(conf) opt = config.get_options()
# if no exclude list was passed in we check if there's a config # if no exclude list was passed in we check if there's a config
if len(exclude_regex) == 0: if len(exclude_regex) == 0:
if 'Exclusions' in conf: exclude_regex = opt['exclude_regex']
exclude_regex = [value for key, value in conf.items('Exclusions')]
exclude_regex_list = set(exclude_regex) exclude_regex_list = set(exclude_regex)
# Initialize Db # Initialize Db
db = Db(destination) db = Db(destination)
if 'Path' in conf and 'day_begins' in conf['Path']: filesystem = FileSystem(cache, opt['day_begins'], dry_run, exclude_regex_list,
config_directory = conf['Path'] filter_by_ext, logger, max_deep, mode, opt['path_format'])
day_begins = int(config_directory['day_begins'])
else: loc = GeoLocation(opt['geocoder'], opt['prefer_english_names'], opt['timeout'])
day_begins = 0
filesystem = FileSystem(cache, day_begins, dry_run, exclude_regex_list,
filter_by_ext, logger, max_deep, mode, path_format)
summary, has_errors = filesystem.sort_files(paths, destination, db, summary, has_errors = filesystem.sort_files(paths, destination, db,
remove_duplicates, ignore_tags) loc, remove_duplicates, ignore_tags)
if clean: if clean:
remove_empty_folders(destination, logger) remove_empty_folders(destination, logger)

View File

@ -1,47 +1,92 @@
"""Load config file as a singleton."""
from configparser import RawConfigParser from configparser import RawConfigParser
from os import path from os import path
from ordigi import constants from ordigi import constants
from geopy.geocoders import options as gopt
def write(conf_file, config): class Config:
with open(conf_file, 'w') as conf_file: """Manage config file"""
config.write(conf_file)
return True
return False def __init__(self, conf_path=None, conf={}):
self.conf_path = conf_path
if conf_path == None:
self.conf = conf
else:
self.conf = self.load_config()
def load_config(file): def write(self, conf):
if not path.exists(file): with open(self.conf_path, 'w') as conf_path:
return {} conf.write(conf_path)
return True
config = RawConfigParser() return False
config.read(file)
return config
def get_path_definition(config): def load_config(self):
"""Returns a list of folder definitions. if not path.exists(self.conf_path):
return {}
Each element in the list represents a folder. conf = RawConfigParser()
Fallback folders are supported and are nested lists. conf.read(self.conf_path)
return conf
:returns: string def get_option(self, option, section):
"""
if 'Path' in config: if section in self.conf and option in self.conf[section]:
if 'format' in config['Path']: return self.conf[section][option]
return config['Path']['format']
elif 'dirs_path' and 'name' in config['Path']:
return config['Path']['dirs_path'] + '/' + config['Path']['name']
return constants.default_path + '/' + constants.default_name return False
def get_geocoder(): def get_path_definition(self):
config = load_config(constants.CONFIG_FILE) """Returns a list of folder definitions.
if 'Geolocation' in config and 'geocoder' in config['Geolocation']:
geocoder = config['Geolocation']['geocoder']
if geocoder in ('Nominatim', ):
return geocoder
return constants.default_geocoder Each element in the list represents a folder.
Fallback folders are supported and are nested lists.
:returns: string
"""
if 'Path' in self.conf:
if 'format' in self.conf['Path']:
return self.conf['Path']['format']
elif 'dirs_path' and 'name' in self.conf['Path']:
return self.conf['Path']['dirs_path'] + '/' + self.conf['Path']['name']
return constants.default_path + '/' + constants.default_name
def get_options(self):
"""Get config options
:returns: dict
"""
options = {}
geocoder = self.get_option('geocoder', 'Geolocation')
if geocoder and geocoder in ('Nominatim', ):
options['geocoder'] = geocoder
else:
options['geocoder'] = constants.default_geocoder
prefer_english_names = self.get_option('prefer_english_names', 'Geolocation')
if prefer_english_names:
options['prefer_english_names'] = bool(prefer_english_names)
else:
options['prefer_english_names'] = False
timeout = self.get_option('timeout', 'Geolocation')
if timeout:
options['timeout'] = timeout
else:
options['timeout'] = gopt.default_timeout
options['path_format'] = self.get_path_definition()
if 'Path' in self.conf and 'day_begins' in self.conf['Path']:
config_directory = self.conf['Path']
options['day_begins'] = int(config_directory['day_begins'])
else:
options['day_begins'] = 0
if 'Exclusions' in self.conf:
options['exclude_regex'] = [value for key, value in self.conf.items('Exclusions')]
return options

View File

@ -15,7 +15,6 @@ import time
from datetime import datetime, timedelta from datetime import datetime, timedelta
from ordigi import constants from ordigi import constants
from ordigi import geolocation
from ordigi import media from ordigi import media
from ordigi.media import Media, get_all_subclasses from ordigi.media import Media, get_all_subclasses
@ -189,7 +188,7 @@ class FileSystem(object):
return folder_name return folder_name
def get_part(self, item, mask, metadata, db, subdirs): def get_part(self, item, mask, metadata, db, subdirs, loc):
"""Parse a specific folder's name given a mask and metadata. """Parse a specific folder's name given a mask and metadata.
:param item: Name of the item as defined in the path (i.e. date from %date) :param item: Name of the item as defined in the path (i.e. date from %date)
@ -215,7 +214,7 @@ class FileSystem(object):
if date is not None: if date is not None:
part = date.strftime(mask) part = date.strftime(mask)
elif item in ('location', 'city', 'state', 'country'): elif item in ('location', 'city', 'state', 'country'):
place_name = geolocation.place_name( place_name = loc.place_name(
metadata['latitude'], metadata['latitude'],
metadata['longitude'], metadata['longitude'],
db, db,
@ -251,7 +250,7 @@ class FileSystem(object):
return part return part
def get_path(self, metadata, db, subdirs='', whitespace_sub='_'): def get_path(self, metadata, db, loc, subdirs='', whitespace_sub='_'):
"""path_format: {%Y-%d-%m}/%u{city}/{album} """path_format: {%Y-%d-%m}/%u{city}/{album}
Returns file path. Returns file path.
@ -272,7 +271,7 @@ class FileSystem(object):
# parts = re.split(mask, this_part) # parts = re.split(mask, this_part)
# parts = this_part.split('%')[1:] # parts = this_part.split('%')[1:]
part = self.get_part(item, matched.group()[1:-1], metadata, db, part = self.get_part(item, matched.group()[1:-1], metadata, db,
subdirs) subdirs, loc)
part = part.strip() part = part.strip()
@ -570,7 +569,7 @@ class FileSystem(object):
return result return result
def sort_files(self, paths, destination, db, remove_duplicates=False, def sort_files(self, paths, destination, db, loc, remove_duplicates=False,
ignore_tags=set()): ignore_tags=set()):
""" """
Sort files into appropriate folder Sort files into appropriate folder
@ -592,7 +591,7 @@ class FileSystem(object):
if media: if media:
metadata = media.get_metadata() metadata = media.get_metadata()
# Get the destination path according to metadata # Get the destination path according to metadata
file_path = self.get_path(metadata, db, subdirs=subdirs) file_path = self.get_path(metadata, db, loc, subdirs=subdirs)
else: else:
# Keep same directory structure # Keep same directory structure
file_path = os.path.relpath(src_path, path) file_path = os.path.relpath(src_path, path)

View File

@ -1,153 +1,119 @@
"""Look up geolocation information for media objects."""
from os import path from os import path
import geopy import geopy
from geopy.geocoders import Nominatim from geopy.geocoders import Nominatim, options
import logging import logging
from ordigi import constants from ordigi import config
from ordigi.config import load_config, get_geocoder
__KEY__ = None __KEY__ = None
__DEFAULT_LOCATION__ = 'Unknown Location' __DEFAULT_LOCATION__ = 'Unknown Location'
__PREFER_ENGLISH_NAMES__ = None
def coordinates_by_name(name, db): class GeoLocation:
# Try to get cached location first """Look up geolocation information for media objects."""
cached_coordinates = db.get_location_coordinates(name)
if(cached_coordinates is not None):
return {
'latitude': cached_coordinates[0],
'longitude': cached_coordinates[1]
}
# If the name is not cached then we go ahead with an API lookup def __init__(self, geocoder='Nominatim', prefer_english_names=False, timeout=options.default_timeout):
geocoder = get_geocoder() self.geocoder = geocoder
if geocoder == 'Nominatim': self.prefer_english_names = prefer_english_names
locator = Nominatim(user_agent='myGeocoder') self.timeout = timeout
geolocation_info = locator.geocode(name)
if geolocation_info is not None: def coordinates_by_name(self, name, db, timeout=options.default_timeout):
# Try to get cached location first
cached_coordinates = db.get_location_coordinates(name)
if(cached_coordinates is not None):
return { return {
'latitude': geolocation_info.latitude, 'latitude': cached_coordinates[0],
'longitude': geolocation_info.longitude 'longitude': cached_coordinates[1]
} }
else:
raise NameError(geocoder)
return None # If the name is not cached then we go ahead with an API lookup
geocoder = self.geocoder
if geocoder == 'Nominatim':
def decimal_to_dms(decimal): locator = Nominatim(user_agent='myGeocoder', timeout=timeout)
decimal = float(decimal) geolocation_info = locator.geocode(name)
decimal_abs = abs(decimal) if geolocation_info is not None:
minutes, seconds = divmod(decimal_abs*3600, 60) return {
degrees, minutes = divmod(minutes, 60) 'latitude': geolocation_info.latitude,
degrees = degrees 'longitude': geolocation_info.longitude
sign = 1 if decimal >= 0 else -1 }
return (degrees, minutes, seconds, sign)
def dms_to_decimal(degrees, minutes, seconds, direction=' '):
sign = 1
if direction[0] in 'WSws':
sign = -1
return (degrees + minutes / 60 + seconds / 3600) * sign
def dms_string(decimal, type='latitude'):
# Example string -> 38 deg 14' 27.82" S
dms = decimal_to_dms(decimal)
if type == 'latitude':
direction = 'N' if decimal >= 0 else 'S'
elif type == 'longitude':
direction = 'E' if decimal >= 0 else 'W'
return '{} deg {}\' {}" {}'.format(dms[0], dms[1], dms[2], direction)
def get_prefer_english_names():
global __PREFER_ENGLISH_NAMES__
if __PREFER_ENGLISH_NAMES__ is not None:
return __PREFER_ENGLISH_NAMES__
config = load_config(constants.CONFIG_FILE)
if('prefer_english_names' not in config['Geolocation']):
return False
__PREFER_ENGLISH_NAMES__ = bool(config['Geolocation']['prefer_english_names'])
return __PREFER_ENGLISH_NAMES__
def place_name(lat, lon, db, cache=True, logger=logging.getLogger()):
lookup_place_name_default = {'default': __DEFAULT_LOCATION__}
if(lat is None or lon is None):
return lookup_place_name_default
# Convert lat/lon to floats
if(not isinstance(lat, float)):
lat = float(lat)
if(not isinstance(lon, float)):
lon = float(lon)
# Try to get cached location first
# 3km distace radious for a match
cached_place_name = None
if cache:
cached_place_name = db.get_location_name(lat, lon, 3000)
# We check that it's a dict to coerce an upgrade of the location
# db from a string location to a dictionary. See gh-160.
if(isinstance(cached_place_name, dict)):
return cached_place_name
lookup_place_name = {}
geocoder = get_geocoder()
if geocoder == 'Nominatim':
geolocation_info = lookup_osm(lat, lon, logger)
else:
raise NameError(geocoder)
if(geolocation_info is not None and 'address' in geolocation_info):
address = geolocation_info['address']
# gh-386 adds support for town
# taking precedence after city for backwards compatability
for loc in ['city', 'town', 'village', 'state', 'country']:
if(loc in address):
lookup_place_name[loc] = address[loc]
# In many cases the desired key is not available so we
# set the most specific as the default.
if('default' not in lookup_place_name):
lookup_place_name['default'] = address[loc]
if(lookup_place_name):
db.add_location(lat, lon, lookup_place_name)
# TODO: Maybe this should only be done on exit and not for every write.
db.update_location_db()
if('default' not in lookup_place_name):
lookup_place_name = lookup_place_name_default
return lookup_place_name
def lookup_osm(lat, lon, logger=logging.getLogger()):
prefer_english_names = get_prefer_english_names()
try:
locator = Nominatim(user_agent='myGeocoder')
coords = (lat, lon)
if(prefer_english_names):
lang='en'
else: else:
lang='local' raise NameError(geocoder)
return locator.reverse(coords, language=lang).raw
except geopy.exc.GeocoderUnavailable as e:
logger.error(e)
return None
# Fix *** TypeError: `address` must not be None
except (TypeError, ValueError) as e:
logger.error(e)
return None return None
def place_name(self, lat, lon, db, cache=True, logger=logging.getLogger(), timeout=options.default_timeout):
lookup_place_name_default = {'default': __DEFAULT_LOCATION__}
if(lat is None or lon is None):
return lookup_place_name_default
# Convert lat/lon to floats
if(not isinstance(lat, float)):
lat = float(lat)
if(not isinstance(lon, float)):
lon = float(lon)
# Try to get cached location first
# 3km distace radious for a match
cached_place_name = None
if cache:
cached_place_name = db.get_location_name(lat, lon, 3000)
# We check that it's a dict to coerce an upgrade of the location
# db from a string location to a dictionary. See gh-160.
if(isinstance(cached_place_name, dict)):
return cached_place_name
lookup_place_name = {}
geocoder = self.geocoder
if geocoder == 'Nominatim':
geolocation_info = self.lookup_osm(lat, lon, logger, timeout)
else:
raise NameError(geocoder)
if(geolocation_info is not None and 'address' in geolocation_info):
address = geolocation_info['address']
# gh-386 adds support for town
# taking precedence after city for backwards compatability
for loc in ['city', 'town', 'village', 'state', 'country']:
if(loc in address):
lookup_place_name[loc] = address[loc]
# In many cases the desired key is not available so we
# set the most specific as the default.
if('default' not in lookup_place_name):
lookup_place_name['default'] = address[loc]
if(lookup_place_name):
db.add_location(lat, lon, lookup_place_name)
# TODO: Maybe this should only be done on exit and not for every write.
db.update_location_db()
if('default' not in lookup_place_name):
lookup_place_name = lookup_place_name_default
return lookup_place_name
def lookup_osm(self, lat, lon, logger=logging.getLogger(), timeout=options.default_timeout):
try:
locator = Nominatim(user_agent='myGeocoder', timeout=timeout)
coords = (lat, lon)
if(self.prefer_english_names):
lang='en'
else:
lang='local'
locator_reverse = locator.reverse(coords, language=lang)
if locator_reverse is not None:
return locator_reverse.raw
else:
return None
except geopy.exc.GeocoderUnavailable or geopy.exc.GeocoderServiceError as e:
logger.error(e)
return None
# Fix *** TypeError: `address` must not be None
except (TypeError, ValueError) as e:
logger.error(e)
return None

View File

@ -6,7 +6,7 @@ from pathlib import Path
import shutil import shutil
import tempfile import tempfile
from ordigi import config from ordigi.config import Config
from ordigi.exiftool import _ExifToolProc from ordigi.exiftool import _ExifToolProc
ORDIGI_PATH = Path(__file__).parent.parent ORDIGI_PATH = Path(__file__).parent.parent
@ -41,7 +41,8 @@ def conf_path():
'geocoder': 'Nominatium' 'geocoder': 'Nominatium'
} }
conf_path = Path(tmp_path, "ordigi.conf") conf_path = Path(tmp_path, "ordigi.conf")
config.write(conf_path, conf) config = Config(conf_path)
config.write(conf)
yield conf_path yield conf_path

View File

@ -4,7 +4,7 @@ import shutil
import tempfile import tempfile
from unittest import mock from unittest import mock
from ordigi import config from ordigi.config import Config
# Helpers # Helpers
import random import random
@ -21,7 +21,8 @@ class TestConfig:
@pytest.fixture(scope="module") @pytest.fixture(scope="module")
def conf(self, conf_path): def conf(self, conf_path):
return config.load_config(conf_path) config = Config(conf_path)
return config.load_config()
def test_write(self, conf_path): def test_write(self, conf_path):
assert conf_path.is_file() assert conf_path.is_file()
@ -38,20 +39,21 @@ class TestConfig:
def test_load_config_no_exist(self): def test_load_config_no_exist(self):
# test file not exist # test file not exist
conf = config.load_config('filename') config = Config('filename')
assert conf == {} assert config.conf == {}
def test_load_config_invalid(self, conf_path): def test_load_config_invalid(self, conf_path):
# test invalid config # test invalid config
write_random_file(conf_path) write_random_file(conf_path)
with pytest.raises(Exception) as e: with pytest.raises(Exception) as e:
config.load_config(conf_path) config = Config(conf_path)
assert e.typename == 'MissingSectionHeaderError' assert e.typename == 'MissingSectionHeaderError'
def test_get_path_definition(self, conf): def test_get_path_definition(self, conf):
""" """
Get path definition from config Get path definition from config
""" """
path = config.get_path_definition(conf) config = Config(conf=conf)
path = config.get_path_definition()
assert path == '%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder}/{%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext}' assert path == '%u{%Y-%m}/{city}|{city}-{%Y}/{folders[:1]}/{folder}/{%Y-%m-%b-%H-%M-%S}-{basename}.%l{ext}'

View File

@ -10,9 +10,10 @@ from time import sleep
from .conftest import copy_sample_files from .conftest import copy_sample_files
from ordigi import constants from ordigi import constants
from ordigi.database import Db from ordigi.database import Db
from ordigi.filesystem import FileSystem
from ordigi.media import Media
from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exiftool from ordigi.exiftool import ExifToolCaching, exiftool_is_running, terminate_exiftool
from ordigi.filesystem import FileSystem
from ordigi.geolocation import GeoLocation
from ordigi.media import Media
@pytest.mark.skip() @pytest.mark.skip()
@ -67,12 +68,13 @@ class TestFilesystem:
exif_data = ExifToolCaching(str(file_path)).asdict() exif_data = ExifToolCaching(str(file_path)).asdict()
metadata = media.get_metadata() metadata = media.get_metadata()
loc = GeoLocation()
for item, regex in items.items(): for item, regex in items.items():
for mask in masks: for mask in masks:
matched = re.search(regex, mask) matched = re.search(regex, mask)
if matched: if matched:
part = filesystem.get_part(item, mask[1:-1], part = filesystem.get_part(item, mask[1:-1],
metadata, Db(tmp_path), subdirs) metadata, Db(tmp_path), subdirs, loc)
# check if part is correct # check if part is correct
assert isinstance(part, str), file_path assert isinstance(part, str), file_path
if item == 'basename': if item == 'basename':
@ -138,8 +140,9 @@ class TestFilesystem:
def test_sort_files(self, tmp_path): def test_sort_files(self, tmp_path):
db = Db(tmp_path) db = Db(tmp_path)
filesystem = FileSystem(path_format=self.path_format) filesystem = FileSystem(path_format=self.path_format)
loc = GeoLocation()
summary, has_errors = filesystem.sort_files([self.src_paths], tmp_path, db) summary, has_errors = filesystem.sort_files([self.src_paths],
tmp_path, db, loc)
# Summary is created and there is no errors # Summary is created and there is no errors
assert summary, summary assert summary, summary
@ -154,7 +157,8 @@ class TestFilesystem:
filesystem = FileSystem(path_format=self.path_format, mode=mode) filesystem = FileSystem(path_format=self.path_format, mode=mode)
# copy mode # copy mode
src_path = Path(self.src_paths, 'photo.png') src_path = Path(self.src_paths, 'photo.png')
dest_path = Path(tmp_path,'photo_copy.png') name = 'photo_' + mode + '.png'
dest_path = Path(tmp_path, name)
src_checksum = filesystem.checksum(src_path) src_checksum = filesystem.checksum(src_path)
result_copy = filesystem.sort_file(src_path, dest_path) result_copy = filesystem.sort_file(src_path, dest_path)
assert result_copy assert result_copy