Implement independent directory tree hashing without tar
This patch implements the os.walk() based directory hashing mentioned
in 3fb80a43. Rough benchmarking shows that while it's not terrible
performance wise, the GNU tar based solution almost always outperforms
it. Therefore it is used as a fallback implementation in case GNU tar is
not found when trying to generate an identicon from a directory tree.
This commit is contained in:
44
main.py
44
main.py
@@ -1,16 +1,13 @@
|
||||
#!/usr/bin/env python3
|
||||
# pylint: disable=consider-using-with
|
||||
# (this code contains some IO stream juggling)
|
||||
from sys import stdin
|
||||
from sys import exit as sysexit
|
||||
from io import BytesIO
|
||||
from subprocess import Popen, PIPE
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
from blake3 import blake3
|
||||
|
||||
from identicon import Identicon
|
||||
from stream import get_deterministic_stream, ClosableStream
|
||||
|
||||
|
||||
DIGEST_SIZE = 20
|
||||
@@ -52,49 +49,12 @@ def get_input_stream(kwargs):
|
||||
if (text := kwargs['text']) is not None:
|
||||
stream = ClosableStream(BytesIO(text.encode()))
|
||||
elif file := kwargs['file']:
|
||||
stream = get_deterministic_stream(file)
|
||||
stream = get_deterministic_stream(file, BUF_SIZE)
|
||||
elif not stdin.isatty():
|
||||
stream = ClosableStream(stdin.buffer)
|
||||
return stream
|
||||
|
||||
|
||||
class ClosableStream:
|
||||
def __init__(self, stream, close_func=None):
|
||||
self.stream = stream
|
||||
self._close_func = close_func or (lambda: None)
|
||||
|
||||
def close(self):
|
||||
return self._close_func()
|
||||
|
||||
|
||||
def get_deterministic_stream(file):
|
||||
if Path(file).is_dir():
|
||||
return get_deterministic_tar_stream(file)
|
||||
|
||||
ifile = open(file, 'rb')
|
||||
return ClosableStream(ifile, ifile.close)
|
||||
|
||||
|
||||
def get_deterministic_tar_stream(file):
|
||||
cmd = (
|
||||
'tar',
|
||||
f'--blocking-factor={BUF_SIZE//512}',
|
||||
'--sort=name',
|
||||
'--mtime=UTC 1970-01-01',
|
||||
'--owner=root:0', '--group=root:0', '--numeric-owner',
|
||||
'--mode=a=rwX', '--no-acls', '--no-xattrs', '--no-selinux',
|
||||
'-C', file, '-cf', '-', '.'
|
||||
)
|
||||
p = Popen(cmd, stdout=PIPE, stderr=PIPE)
|
||||
|
||||
def wait_and_check_exitcode():
|
||||
exit_code = p.wait()
|
||||
if exit_code != 0:
|
||||
raise RuntimeError(f'Tar failed: {p.stderr.read().decode()}')
|
||||
|
||||
return ClosableStream(p.stdout, wait_and_check_exitcode)
|
||||
|
||||
|
||||
def print_usage_and_exit():
|
||||
command = main
|
||||
with click.Context(command) as ctx:
|
||||
|
||||
Reference in New Issue
Block a user