Implement independent directory tree hashing without tar

This patch implements the os.walk() based directory hashing mentioned
in 3fb80a43. Rough benchmarking shows that while it's not terrible
performance wise, the GNU tar based solution almost always outperforms
it. Therefore it is used as a fallback implementation in case GNU tar is
not found when trying to generate an identicon from a directory tree.
This commit is contained in:
2022-02-14 22:20:29 +01:00
parent 3179be1f76
commit bde2b0067e
2 changed files with 126 additions and 42 deletions

44
main.py
View File

@ -1,16 +1,13 @@
#!/usr/bin/env python3
# pylint: disable=consider-using-with
# (this code contains some IO stream juggling)
from sys import stdin
from sys import exit as sysexit
from io import BytesIO
from subprocess import Popen, PIPE
from pathlib import Path
import click
from blake3 import blake3
from identicon import Identicon
from stream import get_deterministic_stream, ClosableStream
DIGEST_SIZE = 20
@ -52,49 +49,12 @@ def get_input_stream(kwargs):
if (text := kwargs['text']) is not None:
stream = ClosableStream(BytesIO(text.encode()))
elif file := kwargs['file']:
stream = get_deterministic_stream(file)
stream = get_deterministic_stream(file, BUF_SIZE)
elif not stdin.isatty():
stream = ClosableStream(stdin.buffer)
return stream
class ClosableStream:
def __init__(self, stream, close_func=None):
self.stream = stream
self._close_func = close_func or (lambda: None)
def close(self):
return self._close_func()
def get_deterministic_stream(file):
if Path(file).is_dir():
return get_deterministic_tar_stream(file)
ifile = open(file, 'rb')
return ClosableStream(ifile, ifile.close)
def get_deterministic_tar_stream(file):
cmd = (
'tar',
f'--blocking-factor={BUF_SIZE//512}',
'--sort=name',
'--mtime=UTC 1970-01-01',
'--owner=root:0', '--group=root:0', '--numeric-owner',
'--mode=a=rwX', '--no-acls', '--no-xattrs', '--no-selinux',
'-C', file, '-cf', '-', '.'
)
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
def wait_and_check_exitcode():
exit_code = p.wait()
if exit_code != 0:
raise RuntimeError(f'Tar failed: {p.stderr.read().decode()}')
return ClosableStream(p.stdout, wait_and_check_exitcode)
def print_usage_and_exit():
command = main
with click.Context(command) as ctx: