Skip to content
Snippets Groups Projects
dir_watcher.py 8.95 KiB
Newer Older
#! /usr/bin/env python3

"""
LICENSE
Apache License 2.0
---
PURPOSE
This script calculates checksums (MD5, SHA1 and ADLER32) of a file.
These checksums are then saved under a duplicated directory tree
under DigestRootDir in separate files each.
The checksums are intended to be returned by mod_want_digest
for the Apache httpd on the HTTP header "Want-Digest" with a
GET request.
The module takes care of file transferred to the web server
by HTTP itself but if they are copied to the file system
by other means. A precalculation of the checksums is preferrable
to calculating the checksums on the fly because it takes too
much time in case of larger files (>~250MB).
---
AUTHOR
T. Wetzel, tim.wetzel@desy.de, 2021
"""

## imports
import os
import stat
from argparse import ArgumentParser

import hashlib
import zlib

## globals
#__all__ = ['...']

CHUNK_SIZE = 16384
valid_extensions = ['md5', 'sha', 'adler32']
# mode for chmod: 0777
file_mode = stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH|stat.S_IWOTH
dir_mode = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO

## utility functions

def full_digest_save_path(file_path, extension='', digest_dir=None):
    if digest_dir is None:
        global DIGEST_ROOT_DIR
        digest_dir = DIGEST_ROOT_DIR
    # add the extension dot if not there.
    if extension != '' and not extension.startswith('.'):
        extension = '.'+extension
    # if file_path starts with a slash, discard it
    if file_path.startswith('/'):
        file_path = file_path.lstrip('/')
    # joins given path with DIGEST_ROOT_DIR
    full_path = os.path.join(digest_dir, file_path+extension)
    return full_path

def digest_exists(file_path):
    exists = []
    for extension in valid_extensions:
        if os.path.isfile(full_digest_save_path(file_path, extension)):
            exists.append(extension)

    return exists

## main functions
def on_creation(f, update=False):
    """ This function covers both creation and update of the digests
        associated with the file in path f.
    """

    # check if checksums exist under DIGEST_ROOT_DIR
    if not update:
        existing_digests = digest_exists(f)
        if set(existing_digests) == set(valid_extensions):
            print(f"Digests for {f} already exist. Exiting.")
            exit(os.EX_OK)
    # check if file really exists
    if os.path.isfile(f):
        #check for lock file
        lockfile = full_digest_save_path(f, extension='lock')
        if os.path.isfile(lockfile):
            print(f"lock file {lockfile} exists, someone else is busy here. Exiting.")
            exit(os.EX_TEMPFAIL)
        else:
            if not os.path.isdir(os.path.dirname(lockfile)):
                os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True)
                os.chmod(os.path.dirname(lockfile), mode=dir_mode)
            lock = True
            open(lockfile, 'w').close()

        # prepare digests
        md5_digest = hashlib.md5()
        sha_digest = hashlib.sha1()
        # initially setting adler_digest to 1 as it will be used in the
        # running checksum. according to the documentation the default value
        # is 1, using it will ensure a correct running checksum.
        adler_digest = 1
        with open(f, 'rb') as file_handle:
            # file in chunks because file might be binary
            # and not line based
            for chunk in iter(lambda: file_handle.read(CHUNK_SIZE), b''):
                # calculate MD5, SHA1 and ADLER32
                md5_digest.update(chunk)
                sha_digest.update(chunk)
                adler_digest = zlib.adler32(chunk, adler_digest)

        # check if save path for digest files exists and create if necessary
        digest_save_path = full_digest_save_path(f)
        if not os.path.isdir(os.path.dirname(digest_save_path)):
            os.makedirs(os.path.dirname(digest_save_path), mode=0o0777, exist_ok=True)
            os.chmod(os.path.dirname(lockfile), mode=dir_mode)
        # save digest files
        with open(digest_save_path+'.md5', 'w') as md5_file:
            md5_file.write(str(md5_digest.hexdigest()))
        os.chmod(digest_save_path+'.md5', file_mode)
        with open(digest_save_path+'.sha', 'w') as sha_file:
            sha_file.write(str(sha_digest.hexdigest()))
        os.chmod(digest_save_path+'.sha', file_mode)
        with open(digest_save_path+'.adler32', 'w') as adler_file:
        os.chmod(digest_save_path+'.adler32', file_mode)
        if lock:
            os.remove(lockfile)
        print(f"written digests for {f}")
    else:
        raise IOError(f"File {f} not found.")
        exit(os.EX_NOINPUT)
    # TODO: write to logfile

def on_update(f):
    """ on_update(f) should update the digests associated with the file
        in path f but the functionality is also achieved by on_creation(),
        so until there is no reason to split the two, on_creation() is used.
    """
    pass

def on_delete(f):
    """ on_delete(f) takes care of deleting digests associated with the
        deleted file in f. It also recursively removes directories in the path
        until one of them is not empty.
    """
    # check if file has been deleted
    rm_path = full_digest_save_path(f)
    if not os.path.isfile(f):
        lockfile = full_digest_save_path(f, extension='lock')
        if os.path.isfile(lockfile):
            print(f"lock file {lockfile} exists, someone else is busy here. Exiting.")
            exit(0)
        else:
            os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True)
            os.chmod(os.path.dirname(lockfile), mode=dir_mode)
            lock = True
            open(lockfile, 'a').close()

        for extension in valid_extensions:
            try:
                os.remove(rm_path+'.'+extension)
                print(f"Deleted digest {rm_path+'.'+extension} for {f}.")
            except:
                print(f"unable to remove file {rm_path+'.'+extension}.")
                continue
        if lock:
            os.remove(lockfile)
        try:
            os.removedirs(os.path.dirname(rm_path))
            print(f"removing {os.path.dirname(rm_path)}.")
        except OSError:
            pass
    else:
        print(f"File {rm_path} could not be deleted.")

def on_move(f, o):
    """ on_move(f, o) moves the digests associated with file o to new location
        f in the DigestRootDir.
    """
    old_path = full_digest_save_path(o)
    new_path = full_digest_save_path(f)

    lockfile = full_digest_save_path(f, extension='lock')
    if os.path.isfile(lockfile):
        print(f"lock file {lockfile} exists, someone else is busy here. Exiting.")
        exit(0)
    else:
        os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True)
        os.chmod(os.path.dirname(lockfile), mode=dir_mode)
        lock = True
        open(lockfile, 'a').close()

    for extension in valid_extensions:
        try:
            os.rename(old_path+'.'+extension, new_path+'.'+extension)
        except:
            print(f"unable to move file {old_path+'.'+extension} to {new_path+'.'+extension}.")
            continue

    if lock:
        os.remove(lockfile)
    try:
        os.removedirs(os.path.dirname(old_path))
    except:
        print(f"unable to remove directory {os.path.dirname(old_path)}, possibly not empty.")

    # move checksum files to new path
    # if old directory is empty, recursively delete dirs upwards if also empty.
    print(f"on_move of {f}")

def main():
    parser = ArgumentParser()
    parser.add_argument('-m', '--method', help='Choice of method to call. Allowed: "create", "modify", "delete", "move"')
    parser.add_argument('-f', '--file_path', help='File for which a digest is to be created/updated/deleted.')
    parser.add_argument('-o', '--old_path', help='Old file path of file that has been moved to new path specified in -f. Digests associated with the old file path are deleted.')
    parser.add_argument('-d', '--digest_root_dir', help='Root directory for saving the digests.', default='/var/www/hashes')
    args = parser.parse_args()

    global DIGEST_ROOT_DIR
    DIGEST_ROOT_DIR = args.digest_root_dir

    if args is None:
        print("No arguments supplied, I don't know what to do here, exiting.")
        parser.print_help()
        exit(os.EX_USAGE)
    elif args.file_path is None:
        print("No file_path supplied, I don't know which file you want to tend me to, exiting.")
        parser.print_help()
        exit(os.EX_USAGE)

    # TODO: check for lockfile here

    if args.method == 'create':
        on_creation(args.file_path)
    elif args.method == 'modify':
        on_creation(args.file_path, update=True)
    elif args.method == 'delete':
        on_delete(args.file_path)
    elif args.method == 'move':
        on_move(args.file_path, args.old_path)
    else:
        print("The method you specified is not implemented, exiting.")
        parser.print_help()
        exit(os.EX_USAGE)


## on calling the script directly:
if __name__ == '__main__':
    ## argument parsing
    main()

    exit(os.EX_OK)