1
2 """Processing of implementation manifests.
3
4 A manifest is a string representing a directory tree, with the property
5 that two trees will generate identical manifest strings if and only if:
6
7 - They have extactly the same set of files, directories and symlinks.
8 - For each pair of corresponding directories in the two sets:
9 - The mtimes are the same (OldSHA1 only).
10 - For each pair of corresponding files in the two sets:
11 - The size, executable flag and mtime are the same.
12 - The contents have matching secure hash values.
13 - For each pair of corresponding symlinks in the two sets:
14 - The mtime and size are the same.
15 - The targets have matching secure hash values.
16
17 The manifest is typically processed with a secure hash itself. So, the idea is that
18 any significant change to the contents of the tree will change the secure hash value
19 of the manifest.
20
21 A top-level ".manifest" file is ignored.
22 """
23
24
25
26
27
28 import os, stat, base64
29 from zeroinstall import SafeException, _, logger
30 from zeroinstall.zerostore import BadDigest, parse_algorithm_digest_pair, format_algorithm_digest_pair
31
32 import hashlib
33 sha1_new = hashlib.sha1
34
36 """Abstract base class for algorithms.
37 An algorithm knows how to generate a manifest from a directory tree.
38 @ivar rating: how much we like this algorithm (higher is better)
39 @type rating: int
40 """
42 """Returns an iterator that yields each line of the manifest for the directory
43 tree rooted at 'root'."""
44 raise Exception('Abstract')
45
47 """Create a new digest. Call update() on the returned object to digest the data.
48 Call getID() to turn it into a full ID string."""
49 raise Exception('Abstract')
50
52 """Convert a digest (from new_digest) to a full ID."""
53 raise Exception('Abstract')
54
56 """@deprecated: Injector versions before 0.20 only supported this algorithm."""
57
58 rating = 10
59
61 """@type root: str"""
62 def recurse(sub):
63
64
65
66
67 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub)
68 assert sub.startswith('/')
69
70 if sub == '/.manifest': return
71
72 full = os.path.join(root, sub[1:].replace('/', os.sep))
73 info = os.lstat(full)
74
75 m = info.st_mode
76 if stat.S_ISDIR(m):
77 if sub != '/':
78 yield "D %s %s" % (int(info.st_mtime), sub)
79 items = os.listdir(full)
80 items.sort()
81 subdir = sub
82 if not subdir.endswith('/'):
83 subdir += '/'
84 for x in items:
85 for y in recurse(subdir + x):
86 yield y
87 return
88
89 assert sub[1:]
90 leaf = os.path.basename(sub[1:])
91 if stat.S_ISREG(m):
92 with open(full, 'rb') as stream:
93 d = sha1_new(stream.read()).hexdigest()
94 if m & 0o111:
95 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
96 else:
97 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
98 elif stat.S_ISLNK(m):
99 target = os.readlink(full).encode('utf-8')
100 d = sha1_new(target).hexdigest()
101
102
103 yield "S %s %s %s" % (d, len(target), leaf)
104 else:
105 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
106 full)
107 for x in recurse('/'): yield x
108
111
112 - def getID(self, digest):
113 """@rtype: str"""
114 return 'sha1=' + digest.hexdigest()
115
117 """Look-up an L{Algorithm} by name.
118 @type name: str
119 @rtype: L{Algorithm}
120 @raise BadDigest: if the name is unknown."""
121 try:
122 return algorithms[name]
123 except KeyError:
124 raise BadDigest(_("Unknown algorithm '%s'") % name)
125
127 """@type root: str
128 @type alg: str
129 @deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead."""
130 return get_algorithm(alg).generate_manifest(root)
131
133 """Writes a .manifest file into 'dir', and returns the digest.
134 You should call fixup_permissions before this to ensure that the permissions are correct.
135 On exit, dir itself has mode 555. Subdirectories are not changed.
136 @param dir: root of the implementation
137 @type dir: str
138 @param digest_or_alg: should be an instance of Algorithm. Passing a digest here is deprecated.
139 @type digest_or_alg: L{Algorithm}"""
140 mfile = os.path.join(dir, '.manifest')
141 if os.path.islink(mfile) or os.path.exists(mfile):
142 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir)
143 manifest = ''
144 if isinstance(digest_or_alg, Algorithm):
145 alg = digest_or_alg
146 digest = alg.new_digest()
147 else:
148 digest = digest_or_alg
149 alg = get_algorithm('sha1')
150 for line in alg.generate_manifest(dir):
151 manifest += line + '\n'
152 manifest = manifest.encode('utf-8')
153 digest.update(manifest)
154
155 os.chmod(dir, 0o755)
156 with open(mfile, 'wb') as stream:
157 os.chmod(dir, 0o555)
158 stream.write(manifest)
159 os.chmod(mfile, 0o444)
160 return digest
161
163 """Take an ID in the form 'alg=value' and return a tuple (alg, value).
164 @type id: str
165 @rtype: (L{Algorithm}, str)
166 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format."""
167 alg, digest = parse_algorithm_digest_pair(id)
168 return (get_algorithm(alg), digest)
169
171 """Copy path src to dest, checking that the contents give the right digest.
172 dest must not exist. New file is created with a mode of 'mode & umask'.
173 @param src: source filename
174 @type src: str
175 @param dest: target filename
176 @type dest: str
177 @param mode: target mode
178 @type mode: int
179 @param alg: algorithm to generate digest
180 @type alg: L{Algorithm}
181 @param required_digest: expected digest value
182 @type required_digest: str
183 @raise BadDigest: the contents of the file don't match required_digest"""
184 with open(src, 'rb') as src_obj:
185 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode)
186 try:
187 digest = alg.new_digest()
188 while True:
189 data = src_obj.read(256)
190 if not data: break
191 digest.update(data)
192 while data:
193 written = os.write(dest_fd, data)
194 assert written >= 0
195 data = data[written:]
196 finally:
197 os.close(dest_fd)
198 actual = digest.hexdigest()
199 if actual == required_digest: return
200 os.unlink(dest)
201 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n"
202 "Expected: %(required_digest)s\n"
203 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
204
205 -def verify(root, required_digest = None):
206 """Ensure that directory 'dir' generates the given digest.
207 For a non-error return:
208 - Dir's name must be a digest (in the form "alg=value")
209 - The calculated digest of the contents must match this name.
210 - If there is a .manifest file, then its digest must also match.
211 @type root: str
212 @type required_digest: str | None
213 @raise BadDigest: if verification fails."""
214 if required_digest is None:
215 required_digest = os.path.basename(root)
216 alg = splitID(required_digest)[0]
217
218 digest = alg.new_digest()
219 lines = []
220 for line in alg.generate_manifest(root):
221 line += '\n'
222 digest.update(line.encode('utf-8'))
223 lines.append(line)
224 actual_digest = alg.getID(digest)
225
226 manifest_file = os.path.join(root, '.manifest')
227 if os.path.isfile(manifest_file):
228 digest = alg.new_digest()
229 with open(manifest_file, 'rb') as stream:
230 digest.update(stream.read())
231 manifest_digest = alg.getID(digest)
232 else:
233 manifest_digest = None
234
235 if required_digest == actual_digest == manifest_digest:
236 return
237
238 error = BadDigest(_("Cached item does NOT verify."))
239
240 error.detail = _(" Expected: %(required_digest)s\n"
241 " Actual: %(actual_digest)s\n"
242 ".manifest digest: %(manifest_digest)s\n\n") \
243 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')}
244
245 if manifest_digest is None:
246 error.detail += _("No .manifest, so no further details available.")
247 elif manifest_digest == actual_digest:
248 error.detail += _("The .manifest file matches the actual contents. Very strange!")
249 elif manifest_digest == required_digest:
250 import difflib
251 with open(manifest_file, 'rt') as stream:
252 diff = difflib.unified_diff(stream.readlines(), lines,
253 'Recorded', 'Actual')
254 error.detail += _("The .manifest file matches the directory name.\n" \
255 "The contents of the directory have changed:\n") + \
256 ''.join(diff)
257 elif required_digest == actual_digest:
258 error.detail += _("The directory contents are correct, but the .manifest file is wrong!")
259 else:
260 error.detail += _("The .manifest file matches neither of the other digests. Odd.")
261 raise error
262
263
264
265
267 """Copy directory source to be a subdirectory of target if it matches the required_digest.
268 manifest_data is normally source/.manifest. source and manifest_data are not trusted
269 (will typically be under the control of another user).
270 The copy is first done to a temporary directory in target, then renamed to the final name
271 only if correct. Therefore, an invalid 'target/required_digest' will never exist.
272 A successful return means than target/required_digest now exists (whether we created it or not).
273 @type source: str
274 @type target: str
275 @type manifest_data: str
276 @type required_digest: str"""
277 import tempfile
278
279 alg, digest_value = splitID(required_digest)
280
281 if isinstance(alg, OldSHA1):
282 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying."))
283
284 digest = alg.new_digest()
285 digest.update(manifest_data)
286 manifest_digest = alg.getID(digest)
287
288 if manifest_digest != required_digest:
289 raise BadDigest(_("Manifest has been tampered with!\n"
290 "Manifest digest: %(actual_digest)s\n"
291 "Directory name : %(required_digest)s")
292 % {'actual_digest': manifest_digest, 'required_digest': required_digest})
293
294 target_impl = os.path.join(target, required_digest)
295 if os.path.isdir(target_impl):
296 logger.info(_("Target directory '%s' already exists"), target_impl)
297 return
298
299
300
301
302 wanted = _parse_manifest(manifest_data.decode('utf-8'))
303
304 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target)
305 try:
306 _copy_files(alg, wanted, source, tmpdir)
307
308 if wanted:
309 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' +
310 '\n- '.join(wanted.keys()))
311
312
313 for root, dirs, files in os.walk(tmpdir):
314 for d in dirs:
315 path = os.path.join(root, d)
316 mode = os.stat(path).st_mode
317 os.chmod(path, mode & 0o555)
318
319
320 actual_digest = alg.getID(add_manifest_file(tmpdir, alg))
321 if actual_digest != required_digest:
322 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n"
323 "Unless the target was modified during the copy, this is a BUG\n"
324 "in 0store and should be reported.\n"
325 "Expected: %(required_digest)s\n"
326 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest})
327 try:
328 os.chmod(tmpdir, 0o755)
329 os.rename(tmpdir, target_impl)
330 os.chmod(target_impl, 0o555)
331 tmpdir = None
332 except OSError:
333 if not os.path.isdir(target_impl):
334 raise
335
336 finally:
337 if tmpdir is not None:
338 logger.info(_("Deleting tmpdir '%s'") % tmpdir)
339 from zeroinstall.support import ro_rmtree
340 ro_rmtree(tmpdir)
341
343 """Parse a manifest file.
344 @param manifest_data: the contents of the manifest file
345 @type manifest_data: str
346 @return: a mapping from paths to information about that path
347 @rtype: {str: tuple}"""
348 wanted = {}
349 dir = ''
350 for line in manifest_data.split('\n'):
351 if not line: break
352 if line[0] == 'D':
353 data = line.split(' ', 1)
354 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line)
355 path = data[-1]
356 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line)
357 path = path[1:]
358 dir = path
359 elif line[0] == 'S':
360 data = line.split(' ', 3)
361 path = os.path.join(dir, data[-1])
362 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line)
363 else:
364 data = line.split(' ', 4)
365 path = os.path.join(dir, data[-1])
366 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line)
367 if path in wanted:
368 raise BadDigest(_('Duplicate entry "%s"') % line)
369 wanted[path] = data[:-1]
370 return wanted
371
373 """Scan for files under 'source'. For each one:
374 If it is in wanted and has the right details (or they can be fixed; e.g. mtime),
375 then copy it into 'target'.
376 If it's not in wanted, warn and skip it.
377 On exit, wanted contains only files that were not found.
378 @type alg: L{Algorithm}
379 @type wanted: {str: tuple}
380 @type source: str
381 @type target: str"""
382 dir = ''
383 for line in alg.generate_manifest(source):
384 if line[0] == 'D':
385 type, name = line.split(' ', 1)
386 assert name.startswith('/')
387 dir = name[1:]
388 path = dir
389 elif line[0] == 'S':
390 type, actual_digest, actual_size, name = line.split(' ', 3)
391 path = os.path.join(dir, name)
392 else:
393 assert line[0] in 'XF'
394 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4)
395 path = os.path.join(dir, name)
396 try:
397 required_details = wanted.pop(path)
398 except KeyError:
399 logger.warning(_("Skipping file not in manifest: '%s'"), path)
400 continue
401 if required_details[0] != type:
402 raise BadDigest(_("Item '%s' has wrong type!") % path)
403 if type == 'D':
404 os.mkdir(os.path.join(target, path))
405 elif type in 'XF':
406 required_type, required_digest, required_mtime, required_size = required_details
407 if required_size != actual_size:
408 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
409 "%(required_size)s according to manifest)") %
410 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
411 required_mtime = int(required_mtime)
412 dest_path = os.path.join(target, path)
413 if type == 'X':
414 mode = 0o555
415 else:
416 mode = 0o444
417 copy_with_verify(os.path.join(source, path),
418 dest_path,
419 mode,
420 alg,
421 required_digest)
422 os.utime(dest_path, (required_mtime, required_mtime))
423 elif type == 'S':
424 required_type, required_digest, required_size = required_details
425 if required_size != actual_size:
426 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be "
427 "%(required_size)s according to manifest)") %
428 {'path': path, 'actual_size': actual_size, 'required_size': required_size})
429 symlink_target = os.readlink(os.path.join(source, path))
430 symlink_digest = alg.new_digest()
431 symlink_digest.update(symlink_target.encode('utf-8'))
432 if symlink_digest.hexdigest() != required_digest:
433 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be "
434 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest})
435 dest_path = os.path.join(target, path)
436 os.symlink(symlink_target, dest_path)
437 else:
438 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
439
441 new_digest = None
442
443 - def __init__(self, name, rating, hash_name = None):
444 """@type name: str
445 @type rating: int
446 @type hash_name: str | None"""
447 self.name = name
448 self.new_digest = getattr(hashlib, hash_name or name)
449 self.rating = rating
450
452 """@type root: str"""
453 def recurse(sub):
454
455
456
457
458 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub)
459 assert sub.startswith('/')
460
461 full = os.path.join(root, sub[1:])
462 info = os.lstat(full)
463 new_digest = self.new_digest
464
465 m = info.st_mode
466 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full)
467 if sub != '/':
468 yield "D %s" % sub
469 items = os.listdir(full)
470 items.sort()
471 dirs = []
472 for leaf in items:
473 path = os.path.join(root, sub[1:], leaf)
474 info = os.lstat(path)
475 m = info.st_mode
476
477 if stat.S_ISREG(m):
478 if leaf == '.manifest': continue
479
480 with open(path, 'rb') as stream:
481 d = new_digest(stream.read()).hexdigest()
482 if m & 0o111:
483 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
484 else:
485 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf)
486 elif stat.S_ISLNK(m):
487 target = os.readlink(path).encode('utf-8')
488 d = new_digest(target).hexdigest()
489
490
491 yield "S %s %s %s" % (d, len(target), leaf)
492 elif stat.S_ISDIR(m):
493 dirs.append(leaf)
494 else:
495 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") %
496 path)
497
498 if not sub.endswith('/'):
499 sub += '/'
500 for x in dirs:
501
502 for y in recurse(sub + x): yield y
503 return
504
505 for x in recurse('/'): yield x
506
507 - def getID(self, digest):
517
518 algorithms = {
519 'sha1': OldSHA1(),
520 'sha1new': HashLibAlgorithm('sha1new', 50, 'sha1'),
521 'sha256': HashLibAlgorithm('sha256', 80),
522 'sha256new': HashLibAlgorithm('sha256new', 90, 'sha256'),
523 }
524
525
527 """Set permissions recursively for children of root:
528 - If any X bit is set, they all must be.
529 - World readable, non-writable.
530 @type root: str
531 @raise Exception: if there are unsafe special bits set (setuid, etc)."""
532
533 for main, dirs, files in os.walk(root):
534 for x in ['.'] + files:
535 full = os.path.join(main, x)
536
537 raw_mode = os.lstat(full).st_mode
538 if stat.S_ISLNK(raw_mode): continue
539
540 mode = stat.S_IMODE(raw_mode)
541 if mode & ~0o777:
542 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)})
543 if mode & 0o111:
544 os.chmod(full, 0o555)
545 else:
546 os.chmod(full, 0o444)
547