Add --exclude-regex parameter to exclude directories on import #340 (#342)

The `--exclude-regex` parameter and `[Exclusions]` config section lets you pass in a regular expression to match against file source file paths and ignore.
This commit is contained in:
Michael Wong 2019-10-28 14:19:51 +11:00 committed by Jaisen Mathai
parent 3ad6c0db35
commit 75e65901a9
5 changed files with 211 additions and 10 deletions

View File

@ -128,6 +128,9 @@ Options:
--trash After copying files, move the old files to the
trash.
--allow-duplicates Import the file even if it's already been imported.
--debug Override the value in constants.py with True.
--exclude-regex TEXT Regular expression for directories or files to
exclude.
--help Show this message and exit.
```
@ -168,6 +171,20 @@ Options:
Usage: elodie.py verify
```
### Excluding folders and files from being imported
If you have specific folders or files which you would like to prevent from being imported you can provide regular expressions which will be used to match and skip files from being imported.
You can specify an exclusion at run time by using the `--exclude-regex` argument of the `import` command. You can pass multiple `--exclude-regex` arguments and all folder/file paths which match will be (silently) skipped.
If there are certain file or folder paths you *never* want to import then you can also add an `[Exclusions]` section to your `config.ini` file. Similar to the command line argument you can provide multiple exclusions. Here is an example.
```
[Exclusions]
synology_folders=@eaDir
thumbnails=.thumbnails
```
### Create your own folder structure
OK, so what if you don't like the folders being named `2015-07-Jul/Mountain View`? No problem!

View File

@ -19,6 +19,7 @@ from elodie import constants
from elodie import geolocation
from elodie import log
from elodie.compatability import _decode
from elodie.config import load_config
from elodie.filesystem import FileSystem
from elodie.localstorage import Db
from elodie.media.base import Base, get_all_subclasses
@ -97,8 +98,10 @@ def _batch(debug):
help='Import the file even if it\'s already been imported.')
@click.option('--debug', default=False, is_flag=True,
help='Override the value in constants.py with True.')
@click.option('--exclude-regex', default=set(), multiple=True,
help='Regular expression for directories or files to exclude.')
@click.argument('paths', nargs=-1, type=click.Path())
def _import(destination, source, file, album_from_folder, trash, allow_duplicates, debug, paths):
def _import(destination, source, file, album_from_folder, trash, allow_duplicates, debug, exclude_regex, paths):
"""Import files or directories by reading their EXIF and organizing them accordingly.
"""
constants.debug = debug
@ -115,12 +118,22 @@ def _import(destination, source, file, album_from_folder, trash, allow_duplicate
paths.add(source)
if file:
paths.add(file)
# if no exclude list was passed in we check if there's a config
if len(exclude_regex) == 0:
config = load_config()
if 'Exclusions' in config:
exclude_regex = [value for key, value in config.items('Exclusions')]
exclude_regex_list = set(exclude_regex)
for path in paths:
path = os.path.expanduser(path)
if os.path.isdir(path):
files.update(FILESYSTEM.get_all_files(path, None))
files.update(FILESYSTEM.get_all_files(path, None, exclude_regex_list))
else:
files.add(path)
if not FILESYSTEM.should_exclude(path, exclude_regex_list, True):
files.add(path)
for current_file in files:
dest_path = import_file(current_file, destination, album_from_folder,

View File

@ -85,7 +85,7 @@ class FileSystem(object):
return False
def get_all_files(self, path, extensions=None):
def get_all_files(self, path, extensions=None, exclude_regex_list=set()):
"""Recursively get all files which match a path and extension.
:param str path string: Path to start recursive file listing
@ -99,11 +99,19 @@ class FileSystem(object):
for cls in subclasses:
extensions.update(cls.extensions)
# Create a list of compiled regular expressions to match against the file path
compiled_regex_list = [re.compile(regex) for regex in exclude_regex_list]
for dirname, dirnames, filenames in os.walk(path):
for filename in filenames:
# If file extension is in `extensions` then append to the list
if os.path.splitext(filename)[1][1:].lower() in extensions:
yield os.path.join(dirname, filename)
# If file extension is in `extensions`
# And if file path is not in exclude regexes
# Then append to the list
filename_path = os.path.join(dirname, filename)
if (
os.path.splitext(filename)[1][1:].lower() in extensions and
not self.should_exclude(filename_path, compiled_regex_list, False)
):
yield filename_path
def get_current_directory(self):
"""Get the current working directory.
@ -625,3 +633,15 @@ class FileSystem(object):
# assume local time zone.
date_taken_in_seconds = time.mktime(date_taken)
os.utime(file_path, (time.time(), (date_taken_in_seconds)))
def should_exclude(self, path, regex_list=set(), needs_compiled=False):
if(len(regex_list) == 0):
return False
if(needs_compiled):
compiled_list = []
for regex in regex_list:
compiled_list.append(re.compile(regex))
regex_list = compiled_list
return any(regex.search(path) for regex in regex_list)

View File

@ -282,7 +282,7 @@ def test_import_invalid_file_exit_code():
helper.reset_dbs()
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, origin_invalid, origin_valid])
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--allow-duplicates', origin_invalid, origin_valid])
helper.restore_dbs()
shutil.rmtree(folder)
@ -290,6 +290,126 @@ def test_import_invalid_file_exit_code():
assert result.exit_code == 1, result.exit_code
def test_import_file_with_single_exclude():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--exclude-regex', origin_valid[0:5], '--allow-duplicates', origin_valid])
assert 'Success 0' in result.output, result.output
assert 'Error 0' in result.output, result.output
def test_import_file_with_multiple_exclude():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--exclude-regex', 'does not exist in path', '--exclude-regex', origin_valid[0:5], '--allow-duplicates', origin_valid])
assert 'Success 0' in result.output, result.output
assert 'Error 0' in result.output, result.output
def test_import_file_with_non_matching_exclude():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--exclude-regex', 'does not exist in path', '--allow-duplicates', origin_valid])
assert 'Success 1' in result.output, result.output
assert 'Error 0' in result.output, result.output
def test_import_directory_with_matching_exclude():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--source', folder, '--exclude-regex', folder[1:5], '--allow-duplicates'])
assert 'Success 0' in result.output, result.output
assert 'Error 0' in result.output, result.output
def test_import_directory_with_non_matching_exclude():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--source', folder, '--exclude-regex', 'non-matching', '--allow-duplicates'])
assert 'Success 1' in result.output, result.output
assert 'Error 0' in result.output, result.output
@mock.patch('elodie.config.config_file', '%s/config.ini-import-file-with-single-config-exclude' % gettempdir())
def test_import_file_with_single_config_exclude():
config_string = """
[Exclusions]
name1=valid
"""
with open('%s/config.ini-import-file-with-single-config-exclude' % gettempdir(), 'w') as f:
f.write(config_string)
if hasattr(load_config, 'config'):
del load_config.config
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--allow-duplicates', origin_valid, '--debug'])
if hasattr(load_config, 'config'):
del load_config.config
assert 'Success 0' in result.output, result.output
assert 'Error 0' in result.output, result.output
@mock.patch('elodie.config.config_file', '%s/config.ini-import-file-with-multiple-config-exclude' % gettempdir())
def test_import_file_with_multiple_config_exclude():
config_string = """
[Exclusions]
name1=notvalidatall
name2=valid
"""
with open('%s/config.ini-import-file-with-multiple-config-exclude' % gettempdir(), 'w') as f:
f.write(config_string)
if hasattr(load_config, 'config'):
del load_config.config
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
origin_valid = '%s/valid.jpg' % folder
shutil.copyfile(helper.get_file('plain.jpg'), origin_valid)
runner = CliRunner()
result = runner.invoke(elodie._import, ['--destination', folder_destination, '--allow-duplicates', origin_valid, '--debug'])
if hasattr(load_config, 'config'):
del load_config.config
assert 'Success 0' in result.output, result.output
assert 'Error 0' in result.output, result.output
def test_update_location_on_audio():
temporary_folder, folder = helper.create_working_folder()
temporary_folder_destination, folder_destination = helper.create_working_folder()
@ -637,11 +757,12 @@ def test_cli_batch_plugin_googlephotos():
gp.after('', '', final_file_path_1, sample_metadata_1)
gp.after('', '', final_file_path_2, sample_metadata_1)
runner = CliRunner()
result = runner.invoke(elodie._batch)
if hasattr(load_config, 'config'):
del load_config.config
runner = CliRunner()
result = runner.invoke(elodie._batch)
assert "elodie/elodie/tests/files/plain.jpg uploaded successfully.\"}\n" in result.output, result.output
assert "elodie/elodie/tests/files/no-exif.jpg uploaded successfully.\"}\n" in result.output, result.output

View File

@ -1126,6 +1126,36 @@ def test_set_utime_without_exif_date():
assert final_stat.st_mtime == time.mktime(metadata_final['date_taken']), (final_stat.st_mtime, time.mktime(metadata_final['date_taken']))
assert initial_checksum == final_checksum
def test_should_exclude_with_no_exclude_arg():
filesystem = FileSystem()
result = filesystem.should_exclude('/some/path')
assert result == False, result
def test_should_exclude_with_non_matching_regex():
filesystem = FileSystem()
result = filesystem.should_exclude('/some/path', {re.compile('foobar')})
assert result == False, result
def test_should_exclude_with_matching_regex():
filesystem = FileSystem()
result = filesystem.should_exclude('/some/path', {re.compile('some')})
assert result == True, result
def test_should_not_exclude_with_multiple_with_non_matching_regex():
filesystem = FileSystem()
result = filesystem.should_exclude('/some/path', {re.compile('foobar'), re.compile('dne')})
assert result == False, result
def test_should_exclude_with_multiple_with_one_matching_regex():
filesystem = FileSystem()
result = filesystem.should_exclude('/some/path', {re.compile('foobar'), re.compile('some')})
assert result == True, result
def test_should_exclude_with_complex_matching_regex():
filesystem = FileSystem()
result = filesystem.should_exclude('/var/folders/j9/h192v5v95gd_fhpv63qzyd1400d9ct/T/T497XPQH2R/UATR2GZZTX/2016-04-Apr/London/2016-04-07_11-15-26-valid-sample-title.txt', {re.compile('London.*\.txt$')})
assert result == True, result
@mock.patch('elodie.config.config_file', '%s/config.ini-does-not-exist' % gettempdir())
def test_get_folder_path_definition_default():
if hasattr(load_config, 'config'):