A script to remove whitespaces and other funky characters from filenames.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.1 KiB

from os.path import basename, dirname
from os.path import join as joinpath
from re import sub
from collections.abc import Iterable
from unidecode import unidecode
class Normalisename:
def __init__(self, separator, whitelist, operation=None):
self._separator = separator
self._whitelist = set(whitelist)
self._operation = None
self.operation = operation or self.noop
def operation(self):
return self._operation
def operation(self, value):
if not callable(value):
raise ValueError('Operation must be callable!')
self._operation = value
def noop(*_):
def separator(self):
return self._separator
def whitelist(self):
return self._whitelist.union({self.separator})
def __call__(self, path_or_paths):
if isinstance(path_or_paths, str):
return self.normalise(path_or_paths)
if isinstance(path_or_paths, Iterable):
return self.normalise_all(path_or_paths)
raise ValueError('Argument must be str Iterable[str]!')
def normalise_all(self, paths):
return [self.normalise(path) for path in paths]
def normalise(self, path):
path = self.strip_trailing_slash(path)
directory = dirname(path)
filename = basename(path)
normalpath = joinpath(directory, self.normalname(filename))
if path != normalpath:
self.operation(path, normalpath) # pylint: disable=not-callable
return normalpath
def strip_trailing_slash(path):
if path[-1] == '/':
path = path[:-1]
return path
def normalname(self, filename):
return unidecode(
ch for ch in sub(r'\s+', self.separator, filename)
if ch.isalnum()
or ch in self.whitelist
def check_normal(self, path):
filename = basename(path)
return filename == self.normalname(filename)