Source code for pathlib_mate.hashes

# -*- coding: utf-8 -*-

import hashlib

DEFAULT_CHUNK_SIZE = 1 << 6


[docs]def get_text_fingerprint(text, hash_meth, encoding="utf-8"): # pragma: no cover """ Use default hash method to return hash value of a piece of string default setting use 'utf-8' encoding. """ m = hash_meth() m.update(text.encode(encoding)) return m.hexdigest()
def get_file_fingerprint(abspath, hash_meth, nbytes=0, chunk_size=DEFAULT_CHUNK_SIZE): if nbytes < 0: raise ValueError("chunk_size cannot smaller than 0") if chunk_size < 1: raise ValueError("chunk_size cannot smaller than 1") if (nbytes > 0) and (nbytes < chunk_size): chunk_size = nbytes m = hash_meth() with open(abspath, "rb") as f: if nbytes: # use first n bytes have_reads = 0 while True: have_reads += chunk_size if have_reads > nbytes: n = nbytes - (have_reads - chunk_size) if n: data = f.read(n) m.update(data) break else: data = f.read(chunk_size) m.update(data) else: # use entire content while True: data = f.read(chunk_size) if not data: break m.update(data) return m.hexdigest()
[docs]def md5file(abspath, nbytes=0, chunk_size=DEFAULT_CHUNK_SIZE): """ Return md5 hash value of a piece of a file Estimate processing time on: :param abspath: the absolute path to the file :param nbytes: only has first N bytes of the file. if 0 or None, hash all file CPU = i7-4600U 2.10GHz - 2.70GHz, RAM = 8.00 GB 1 second can process 0.25GB data - 0.59G - 2.43 sec - 1.3G - 5.68 sec - 1.9G - 7.72 sec - 2.5G - 10.32 sec - 3.9G - 16.0 sec """ return get_file_fingerprint(abspath, hashlib.md5, nbytes=nbytes, chunk_size=chunk_size)
[docs]def sha256file(abspath, nbytes=0, chunk_size=DEFAULT_CHUNK_SIZE): """ Return sha256 hash value of a piece of a file Estimate processing time on: :param abspath: the absolute path to the file :param nbytes: only has first N bytes of the file. if 0 or None, hash all file """ return get_file_fingerprint(abspath, hashlib.sha256, nbytes=nbytes, chunk_size=chunk_size)
[docs]def sha512file(abspath, nbytes=0, chunk_size=DEFAULT_CHUNK_SIZE): """ Return sha512 hash value of a piece of a file Estimate processing time on: :param abspath: the absolute path to the file :param nbytes: only has first N bytes of the file. if 0 or None, hash all file """ return get_file_fingerprint(abspath, hashlib.sha512, nbytes=nbytes, chunk_size=chunk_size)