ordigi/elodie/filesystem.py

294 lines
9.9 KiB
Python
Raw Normal View History

"""
2016-01-08 23:49:06 +01:00
General file system methods.
.. moduleauthor:: Jaisen Mathai <jaisen@jmathai.com>
"""
2016-03-12 20:09:28 +01:00
from __future__ import print_function
from builtins import object
2016-01-08 23:49:06 +01:00
2015-10-02 09:20:27 +02:00
import os
import re
import shutil
2015-10-02 09:20:27 +02:00
import time
2015-10-08 11:22:30 +02:00
from elodie import geolocation
from elodie import constants
from elodie.localstorage import Db
2016-10-09 23:12:16 +02:00
from elodie.media.media import Media
2015-10-08 11:22:30 +02:00
2016-01-08 23:49:06 +01:00
class FileSystem(object):
"""A class for interacting with the file system."""
def create_directory(self, directory_path):
2016-01-08 23:49:06 +01:00
"""Create a directory if it does not already exist.
:param str directory_name: A fully qualified path of the
to create.
:returns: bool
"""
try:
if os.path.exists(directory_path):
return True
else:
os.makedirs(directory_path)
return True
except OSError:
# OSError is thrown for cases like no permission
pass
return False
def delete_directory_if_empty(self, directory_path):
2016-01-08 23:49:06 +01:00
"""Delete a directory only if it's empty.
Instead of checking first using `len([name for name in
os.listdir(directory_path)]) == 0`, we catch the OSError exception.
:param str directory_name: A fully qualified path of the directory
to delete.
"""
try:
os.rmdir(directory_path)
return True
except OSError:
pass
return False
def get_all_files(self, path, extensions=None):
2016-01-08 23:49:06 +01:00
"""Recursively get all files which match a path and extension.
:param str path string: Path to start recursive file listing
:param tuple(str) extensions: File extensions to include (whitelist)
"""
files = []
for dirname, dirnames, filenames in os.walk(path):
# print path to all filenames.
for filename in filenames:
if(
extensions is None or
filename.lower().endswith(extensions)
):
2016-01-26 20:01:05 +01:00
files.append(os.path.join(dirname, filename))
return files
2015-10-02 09:20:27 +02:00
def get_current_directory(self):
2016-01-08 23:49:06 +01:00
"""Get the current working directory.
2015-10-02 09:20:27 +02:00
2016-01-08 23:49:06 +01:00
:returns: str
"""
return os.getcwd()
def get_file_name(self, media):
2016-01-08 23:49:06 +01:00
"""Generate file name for a photo or video using its metadata.
We use an ISO8601-like format for the file name prefix. Instead of
colons as the separator for hours, minutes and seconds we use a hyphen.
https://en.wikipedia.org/wiki/ISO_8601#General_principles
:param media: A Photo or Video instance
:type media: :class:`~elodie.media.photo.Photo` or
:class:`~elodie.media.video.Video`
:returns: str or None for non-photo or non-videos
"""
if(not media.is_valid()):
2015-10-02 09:20:27 +02:00
return None
metadata = media.get_metadata()
if(metadata is None):
2015-10-02 09:20:27 +02:00
return None
# If the file has EXIF title we use that in the file name
# (i.e. my-favorite-photo-img_1234.jpg)
# We want to remove the date prefix we add to the name.
# This helps when re-running the program on file which were already
# processed.
base_name = re.sub(
'^\d{4}-\d{2}-\d{2}_\d{2}-\d{2}-\d{2}-',
'',
metadata['base_name']
)
if(len(base_name) == 0):
base_name = metadata['base_name']
if(
'title' in metadata and
metadata['title'] is not None and
len(metadata['title']) > 0
):
title_sanitized = re.sub('\W+', '-', metadata['title'].strip())
2015-11-03 10:38:53 +01:00
base_name = base_name.replace('-%s' % title_sanitized, '')
base_name = '%s-%s' % (base_name, title_sanitized)
file_name = '%s-%s.%s' % (
time.strftime(
'%Y-%m-%d_%H-%M-%S',
metadata['date_taken']
),
base_name,
metadata['extension'])
return file_name.lower()
2015-10-02 09:20:27 +02:00
def get_folder_name_by_date(self, time_obj):
2016-01-08 23:49:06 +01:00
"""Get date based folder name.
2015-10-02 09:20:27 +02:00
2016-01-08 23:49:06 +01:00
:param time time_obj: Time object to be used to determine folder name.
:returns: str
"""
return time.strftime('%Y-%m-%b', time_obj)
2015-10-08 11:22:30 +02:00
def get_folder_path(self, metadata):
2016-01-08 23:49:06 +01:00
"""Get folder path by various parameters.
:param time time_obj: Time object to be used to determine folder name.
:returns: str
"""
2015-10-08 11:22:30 +02:00
path = []
if(metadata['date_taken'] is not None):
path.append(time.strftime('%Y-%m-%b', metadata['date_taken']))
2015-10-08 11:22:30 +02:00
if(metadata['album'] is not None):
path.append(metadata['album'])
elif(
metadata['latitude'] is not None and
metadata['longitude'] is not None
):
place_name = geolocation.place_name(
metadata['latitude'],
metadata['longitude']
)
2015-10-17 08:08:48 +02:00
if(place_name is not None):
2015-10-08 11:22:30 +02:00
path.append(place_name)
2015-10-17 08:08:48 +02:00
# if we don't have a 2nd level directory we use 'Unknown Location'
if(len(path) < 2):
path.append('Unknown Location')
# return '/'.join(path[::-1])
2016-01-26 20:01:05 +01:00
return os.path.join(*path)
2015-10-08 11:22:30 +02:00
def process_file(self, _file, destination, media, **kwargs):
move = False
if('move' in kwargs):
move = kwargs['move']
allow_duplicate = False
if('allowDuplicate' in kwargs):
allow_duplicate = kwargs['allowDuplicate']
if(not media.is_valid()):
2016-03-12 20:09:28 +01:00
print('%s is not a valid media file. Skipping...' % _file)
return
metadata = media.get_metadata()
directory_name = self.get_folder_path(metadata)
2016-01-26 20:01:05 +01:00
dest_directory = os.path.join(destination, directory_name)
file_name = self.get_file_name(media)
2016-01-26 20:01:05 +01:00
dest_path = os.path.join(dest_directory, file_name)
db = Db()
checksum = db.checksum(_file)
if(checksum is None):
if(constants.debug is True):
2016-03-12 20:09:28 +01:00
print('Could not get checksum for %s. Skipping...' % _file)
return
# If duplicates are not allowed and this hash exists in the db then we
# return
if(allow_duplicate is False and db.check_hash(checksum) is True):
if(constants.debug is True):
2016-03-12 20:09:28 +01:00
print('%s already exists at %s. Skipping...' % (
_file,
db.get_hash(checksum)
2016-03-12 20:09:28 +01:00
))
return
self.create_directory(dest_directory)
if(move is True):
stat = os.stat(_file)
shutil.move(_file, dest_path)
os.utime(dest_path, (stat.st_atime, stat.st_mtime))
else:
2016-10-21 22:01:31 +02:00
# Do not use copy2(), will have an issue when copying to a network/mounted drive
# using copy and manual set_date_from_filename gets the job done
2016-10-09 23:12:16 +02:00
shutil.copy(_file, dest_path)
self.set_date_from_filename(dest_path)
db.add_hash(checksum, dest_path)
db.update_hash_db()
return dest_path
2016-10-09 23:12:16 +02:00
def set_date_from_filename(self, file):
""" Set the modification time on the file base on the file name.
"""
date_taken = None
file_name = os.path.basename(file)
# Initialize date taken to what's returned from the metadata function.
# If the folder and file name follow a time format of
2016-10-21 22:01:31 +02:00
# YYYY-MM-DD_HH-MM-SS-IMG_0001.JPG then we override the date_taken
(year, month, day, hour, minute, second) = [None] * 6
year_month_day_match = re.search('(\d{4})-(\d{2})-(\d{2})_(\d{2})-(\d{2})-(\d{2})', file_name)
2016-10-09 23:12:16 +02:00
if(year_month_day_match is not None):
(year, month, day, hour, minute, second) = year_month_day_match.groups()
2016-10-09 23:12:16 +02:00
# check if the file system path indicated a date and if so we
# override the metadata value
if(year is not None and month is not None and day is not None and hour is not None and minute is not None and second is not None):
2016-10-09 23:12:16 +02:00
date_taken = time.strptime(
'{}-{}-{} {}:{}:{}'.format(year, month, day, hour, minute, second),
'%Y-%m-%d %H:%M:%S'
2016-10-09 23:12:16 +02:00
)
os.utime(file, (time.time(), time.mktime(date_taken)))
def set_date_from_path_video(self, video):
2016-01-08 23:49:06 +01:00
"""Set the modification time on the file based on the file path.
Noop if the path doesn't match the format YYYY-MM/DD-IMG_0001.JPG.
:param elodie.media.video.Video video: An instance of Video.
"""
date_taken = None
video_file_path = video.get_file_path()
# Initialize date taken to what's returned from the metadata function.
# If the folder and file name follow a time format of
# YYYY-MM/DD-IMG_0001.JPG then we override the date_taken
(year, month, day) = [None] * 3
directory = os.path.dirname(video_file_path)
# If the directory matches we get back a match with
# groups() = (year, month)
year_month_match = re.search('(\d{4})-(\d{2})', directory)
if(year_month_match is not None):
(year, month) = year_month_match.groups()
day_match = re.search(
'^(\d{2})',
os.path.basename(video.get_file_path())
)
if(day_match is not None):
day = day_match.group(1)
# check if the file system path indicated a date and if so we
# override the metadata value
if(year is not None and month is not None):
if(day is not None):
date_taken = time.strptime(
'{}-{}-{}'.format(year, month, day),
'%Y-%m-%d'
)
else:
date_taken = time.strptime(
'{}-{}'.format(year, month),
'%Y-%m'
)
os.utime(video_file_path, (time.time(), time.mktime(date_taken)))