#! /usr/bin/env python3 """ LICENSE Apache License 2.0 --- PURPOSE This script calculates checksums (MD5, SHA1 and ADLER32) of a file. These checksums are then saved under a duplicated directory tree under DigestRootDir in separate files each. The checksums are intended to be returned by mod_want_digest for the Apache httpd on the HTTP header "Want-Digest" with a GET request. The module takes care of file transferred to the web server by HTTP itself but if they are copied to the file system by other means. A precalculation of the checksums is preferrable to calculating the checksums on the fly because it takes too much time in case of larger files (>~250MB). --- AUTHOR T. Wetzel, tim.wetzel@desy.de, 2021 """ ## imports import os import stat from argparse import ArgumentParser import hashlib import zlib ## globals #__all__ = ['...'] CHUNK_SIZE = 16384 valid_extensions = ['md5', 'sha', 'adler32'] # mode for chmod: 0777 file_mode = stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH|stat.S_IWOTH dir_mode = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO ## utility functions def full_digest_save_path(file_path, extension='', digest_dir=None): if digest_dir is None: global DIGEST_ROOT_DIR digest_dir = DIGEST_ROOT_DIR # add the extension dot if not there. if extension != '' and not extension.startswith('.'): extension = '.'+extension # if file_path starts with a slash, discard it if file_path.startswith('/'): file_path = file_path.lstrip('/') # joins given path with DIGEST_ROOT_DIR full_path = os.path.join(digest_dir, file_path+extension) return full_path def digest_exists(file_path): exists = [] for extension in valid_extensions: if os.path.isfile(full_digest_save_path(file_path, extension)): exists.append(extension) return exists ## main functions def on_creation(f, update=False): """ This function covers both creation and update of the digests associated with the file in path f. """ # check if checksums exist under DIGEST_ROOT_DIR if not update: existing_digests = digest_exists(f) if set(existing_digests) == set(valid_extensions): print(f"Digests for {f} already exist. Exiting.") exit(os.EX_OK) # check if file really exists if os.path.isfile(f): #check for lock file lockfile = full_digest_save_path(f, extension='lock') if os.path.isfile(lockfile): print(f"lock file {lockfile} exists, someone else is busy here. Exiting.") exit(os.EX_TEMPFAIL) else: if not os.path.isdir(os.path.dirname(lockfile)): os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True) os.chmod(os.path.dirname(lockfile), mode=dir_mode) lock = True open(lockfile, 'w').close() # prepare digests md5_digest = hashlib.md5() sha_digest = hashlib.sha1() # initially setting adler_digest to 1 as it will be used in the # running checksum. according to the documentation the default value # is 1, using it will ensure a correct running checksum. adler_digest = 1 with open(f, 'rb') as file_handle: # file in chunks because file might be binary # and not line based for chunk in iter(lambda: file_handle.read(CHUNK_SIZE), b''): # calculate MD5, SHA1 and ADLER32 md5_digest.update(chunk) sha_digest.update(chunk) adler_digest = zlib.adler32(chunk, adler_digest) # check if save path for digest files exists and create if necessary digest_save_path = full_digest_save_path(f) if not os.path.isdir(os.path.dirname(digest_save_path)): os.makedirs(os.path.dirname(digest_save_path), mode=0o0777, exist_ok=True) os.chmod(os.path.dirname(lockfile), mode=dir_mode) # save digest files with open(digest_save_path+'.md5', 'w') as md5_file: md5_file.write(str(md5_digest.hexdigest())) os.chmod(digest_save_path+'.md5', file_mode) with open(digest_save_path+'.sha', 'w') as sha_file: sha_file.write(str(sha_digest.hexdigest())) os.chmod(digest_save_path+'.sha', file_mode) with open(digest_save_path+'.adler32', 'w') as adler_file: adler_file.write(str(adler_digest).zfill(8)) os.chmod(digest_save_path+'.adler32', file_mode) if lock: os.remove(lockfile) print(f"written digests for {f}") else: raise IOError(f"File {f} not found.") exit(os.EX_NOINPUT) # TODO: write to logfile def on_update(f): """ on_update(f) should update the digests associated with the file in path f but the functionality is also achieved by on_creation(), so until there is no reason to split the two, on_creation() is used. """ pass def on_delete(f): """ on_delete(f) takes care of deleting digests associated with the deleted file in f. It also recursively removes directories in the path until one of them is not empty. """ # check if file has been deleted rm_path = full_digest_save_path(f) if not os.path.isfile(f): lockfile = full_digest_save_path(f, extension='lock') if os.path.isfile(lockfile): print(f"lock file {lockfile} exists, someone else is busy here. Exiting.") exit(0) else: os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True) os.chmod(os.path.dirname(lockfile), mode=dir_mode) lock = True open(lockfile, 'a').close() for extension in valid_extensions: try: os.remove(rm_path+'.'+extension) print(f"Deleted digest {rm_path+'.'+extension} for {f}.") except: print(f"unable to remove file {rm_path+'.'+extension}.") continue if lock: os.remove(lockfile) try: os.removedirs(os.path.dirname(rm_path)) print(f"removing {os.path.dirname(rm_path)}.") except OSError: pass else: print(f"File {rm_path} could not be deleted.") def on_move(f, o): """ on_move(f, o) moves the digests associated with file o to new location f in the DigestRootDir. """ old_path = full_digest_save_path(o) new_path = full_digest_save_path(f) lockfile = full_digest_save_path(f, extension='lock') if os.path.isfile(lockfile): print(f"lock file {lockfile} exists, someone else is busy here. Exiting.") exit(0) else: os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True) os.chmod(os.path.dirname(lockfile), mode=dir_mode) lock = True open(lockfile, 'a').close() for extension in valid_extensions: try: os.rename(old_path+'.'+extension, new_path+'.'+extension) except: print(f"unable to move file {old_path+'.'+extension} to {new_path+'.'+extension}.") continue if lock: os.remove(lockfile) try: os.removedirs(os.path.dirname(old_path)) except: print(f"unable to remove directory {os.path.dirname(old_path)}, possibly not empty.") # move checksum files to new path # if old directory is empty, recursively delete dirs upwards if also empty. print(f"on_move of {f}") def main(): parser = ArgumentParser() parser.add_argument('-m', '--method', help='Choice of method to call. Allowed: "create", "modify", "delete", "move"') parser.add_argument('-f', '--file_path', help='File for which a digest is to be created/updated/deleted.') parser.add_argument('-o', '--old_path', help='Old file path of file that has been moved to new path specified in -f. Digests associated with the old file path are deleted.') parser.add_argument('-d', '--digest_root_dir', help='Root directory for saving the digests.', default='/var/www/hashes') args = parser.parse_args() global DIGEST_ROOT_DIR DIGEST_ROOT_DIR = args.digest_root_dir if args is None: print("No arguments supplied, I don't know what to do here, exiting.") parser.print_help() exit(os.EX_USAGE) elif args.file_path is None: print("No file_path supplied, I don't know which file you want to tend me to, exiting.") parser.print_help() exit(os.EX_USAGE) # TODO: check for lockfile here if args.method == 'create': on_creation(args.file_path) elif args.method == 'modify': on_creation(args.file_path, update=True) elif args.method == 'delete': on_delete(args.file_path) elif args.method == 'move': on_move(args.file_path, args.old_path) else: print("The method you specified is not implemented, exiting.") parser.print_help() exit(os.EX_USAGE) ## on calling the script directly: if __name__ == '__main__': ## argument parsing main() exit(os.EX_OK)