Newer
Older

Tim Wetzel
committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#! /usr/bin/env python3
"""
LICENSE
Apache License 2.0
---
PURPOSE
This script calculates checksums (MD5, SHA1 and ADLER32) of a file.
These checksums are then saved under a duplicated directory tree
under DigestRootDir in separate files each.
The checksums are intended to be returned by mod_want_digest
for the Apache httpd on the HTTP header "Want-Digest" with a
GET request.
The module takes care of file transferred to the web server
by HTTP itself but if they are copied to the file system
by other means. A precalculation of the checksums is preferrable
to calculating the checksums on the fly because it takes too
much time in case of larger files (>~250MB).
---
AUTHOR
T. Wetzel, tim.wetzel@desy.de, 2021
"""
## imports
import os
import stat
from argparse import ArgumentParser
import hashlib
import zlib
## globals
#__all__ = ['...']
CHUNK_SIZE = 16384
valid_extensions = ['md5', 'sha', 'adler32']
# mode for chmod: 0777
file_mode = stat.S_IRUSR|stat.S_IWUSR|stat.S_IRGRP|stat.S_IWGRP|stat.S_IROTH|stat.S_IWOTH
dir_mode = stat.S_IRWXU | stat.S_IRWXG | stat.S_IRWXO
## utility functions
def full_digest_save_path(file_path, extension='', digest_dir=None):
if digest_dir is None:
global DIGEST_ROOT_DIR
digest_dir = DIGEST_ROOT_DIR
# add the extension dot if not there.
if extension != '' and not extension.startswith('.'):
extension = '.'+extension
# if file_path starts with a slash, discard it
if file_path.startswith('/'):
file_path = file_path.lstrip('/')
# joins given path with DIGEST_ROOT_DIR
full_path = os.path.join(digest_dir, file_path+extension)
return full_path
def digest_exists(file_path):
exists = []
for extension in valid_extensions:
if os.path.isfile(full_digest_save_path(file_path, extension)):
exists.append(extension)
return exists
## main functions
def on_creation(f, update=False):
""" This function covers both creation and update of the digests
associated with the file in path f.
"""
# check if checksums exist under DIGEST_ROOT_DIR
if not update:
existing_digests = digest_exists(f)
if set(existing_digests) == set(valid_extensions):
print(f"Digests for {f} already exist. Exiting.")
exit(os.EX_OK)
# check if file really exists
if os.path.isfile(f):
#check for lock file
lockfile = full_digest_save_path(f, extension='lock')
if os.path.isfile(lockfile):
print(f"lock file {lockfile} exists, someone else is busy here. Exiting.")
exit(os.EX_TEMPFAIL)
else:
if not os.path.isdir(os.path.dirname(lockfile)):
os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True)
os.chmod(os.path.dirname(lockfile), mode=dir_mode)
lock = True
open(lockfile, 'w').close()
# prepare digests
md5_digest = hashlib.md5()
sha_digest = hashlib.sha1()
# initially setting adler_digest to 1 as it will be used in the
# running checksum. according to the documentation the default value
# is 1, using it will ensure a correct running checksum.
adler_digest = 1
with open(f, 'rb') as file_handle:
# file in chunks because file might be binary
# and not line based
for chunk in iter(lambda: file_handle.read(CHUNK_SIZE), b''):
# calculate MD5, SHA1 and ADLER32
md5_digest.update(chunk)
sha_digest.update(chunk)
adler_digest = zlib.adler32(chunk, adler_digest)
# check if save path for digest files exists and create if necessary
digest_save_path = full_digest_save_path(f)
if not os.path.isdir(os.path.dirname(digest_save_path)):
os.makedirs(os.path.dirname(digest_save_path), mode=0o0777, exist_ok=True)
os.chmod(os.path.dirname(lockfile), mode=dir_mode)
# save digest files
with open(digest_save_path+'.md5', 'w') as md5_file:
md5_file.write(str(md5_digest.hexdigest()))
os.chmod(digest_save_path+'.md5', file_mode)
with open(digest_save_path+'.sha', 'w') as sha_file:
sha_file.write(str(sha_digest.hexdigest()))
os.chmod(digest_save_path+'.sha', file_mode)
with open(digest_save_path+'.adler32', 'w') as adler_file:

Tim Wetzel
committed
adler_file.write(str(adler_digest).zfill(8))

Tim Wetzel
committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
os.chmod(digest_save_path+'.adler32', file_mode)
if lock:
os.remove(lockfile)
print(f"written digests for {f}")
else:
raise IOError(f"File {f} not found.")
exit(os.EX_NOINPUT)
# TODO: write to logfile
def on_update(f):
""" on_update(f) should update the digests associated with the file
in path f but the functionality is also achieved by on_creation(),
so until there is no reason to split the two, on_creation() is used.
"""
pass
def on_delete(f):
""" on_delete(f) takes care of deleting digests associated with the
deleted file in f. It also recursively removes directories in the path
until one of them is not empty.
"""
# check if file has been deleted
rm_path = full_digest_save_path(f)
if not os.path.isfile(f):
lockfile = full_digest_save_path(f, extension='lock')
if os.path.isfile(lockfile):
print(f"lock file {lockfile} exists, someone else is busy here. Exiting.")
exit(0)
else:
os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True)
os.chmod(os.path.dirname(lockfile), mode=dir_mode)
lock = True
open(lockfile, 'a').close()
for extension in valid_extensions:
try:
os.remove(rm_path+'.'+extension)
print(f"Deleted digest {rm_path+'.'+extension} for {f}.")
except:
print(f"unable to remove file {rm_path+'.'+extension}.")
continue
if lock:
os.remove(lockfile)
try:
os.removedirs(os.path.dirname(rm_path))
print(f"removing {os.path.dirname(rm_path)}.")
except OSError:
pass
else:
print(f"File {rm_path} could not be deleted.")
def on_move(f, o):
""" on_move(f, o) moves the digests associated with file o to new location
f in the DigestRootDir.
"""
old_path = full_digest_save_path(o)
new_path = full_digest_save_path(f)
lockfile = full_digest_save_path(f, extension='lock')
if os.path.isfile(lockfile):
print(f"lock file {lockfile} exists, someone else is busy here. Exiting.")
exit(0)
else:
os.makedirs(os.path.dirname(lockfile), mode=0o0777, exist_ok=True)
os.chmod(os.path.dirname(lockfile), mode=dir_mode)
lock = True
open(lockfile, 'a').close()
for extension in valid_extensions:
try:
os.rename(old_path+'.'+extension, new_path+'.'+extension)
except:
print(f"unable to move file {old_path+'.'+extension} to {new_path+'.'+extension}.")
continue
if lock:
os.remove(lockfile)
try:
os.removedirs(os.path.dirname(old_path))
except:
print(f"unable to remove directory {os.path.dirname(old_path)}, possibly not empty.")
# move checksum files to new path
# if old directory is empty, recursively delete dirs upwards if also empty.
print(f"on_move of {f}")
def main():
parser = ArgumentParser()
parser.add_argument('-m', '--method', help='Choice of method to call. Allowed: "create", "modify", "delete", "move"')
parser.add_argument('-f', '--file_path', help='File for which a digest is to be created/updated/deleted.')
parser.add_argument('-o', '--old_path', help='Old file path of file that has been moved to new path specified in -f. Digests associated with the old file path are deleted.')
parser.add_argument('-d', '--digest_root_dir', help='Root directory for saving the digests.', default='/var/www/hashes')
args = parser.parse_args()
global DIGEST_ROOT_DIR
DIGEST_ROOT_DIR = args.digest_root_dir
if args is None:
print("No arguments supplied, I don't know what to do here, exiting.")
parser.print_help()
exit(os.EX_USAGE)
elif args.file_path is None:
print("No file_path supplied, I don't know which file you want to tend me to, exiting.")
parser.print_help()
exit(os.EX_USAGE)
# TODO: check for lockfile here
if args.method == 'create':
on_creation(args.file_path)
elif args.method == 'modify':
on_creation(args.file_path, update=True)
elif args.method == 'delete':
on_delete(args.file_path)
elif args.method == 'move':
on_move(args.file_path, args.old_path)
else:
print("The method you specified is not implemented, exiting.")
parser.print_help()
exit(os.EX_USAGE)
## on calling the script directly:
if __name__ == '__main__':
## argument parsing
main()
exit(os.EX_OK)