A script to remove whitespaces and other funky characters from filenames.
75 lines
2.1 KiB

from os.path import basename, dirname
from os.path import join as joinpath
from re import sub
from collections.abc import Iterable
from unidecode import unidecode
class Normalisename:
def __init__(self, separator, whitelist, operation=None):
self._separator = separator
self._whitelist = set(whitelist)
self._operation = None
self.operation = operation or self.noop
def operation(self):
return self._operation
def operation(self, value):
if not callable(value):
raise ValueError('Operation must be callable!')
self._operation = value
def noop(*_):
def separator(self):
return self._separator
def whitelist(self):
return self._whitelist.union({self.separator})
def __call__(self, path_or_paths):
if isinstance(path_or_paths, str):
return self.normalise(path_or_paths)
if isinstance(path_or_paths, Iterable):
return self.normalise_all(path_or_paths)
raise ValueError('Argument must be str Iterable[str]!')
def normalise_all(self, paths):
return [self.normalise(path) for path in paths]
def normalise(self, path):
path = self.strip_trailing_slash(path)
directory = dirname(path)
filename = basename(path)
normalpath = joinpath(directory, self.normalname(filename))
if path != normalpath:
self.operation(path, normalpath) # pylint: disable=not-callable
return normalpath
def strip_trailing_slash(path):
if path[-1] == '/':
path = path[:-1]
return path
def normalname(self, filename):
return unidecode(
ch for ch in sub(r'\s+', self.separator, filename)
if ch.isalnum()
or ch in self.whitelist
def check_normal(self, path):
filename = basename(path)
return filename == self.normalname(filename)