Package zeroinstall :: Package zerostore :: Module manifest
[frames] | no frames]

Source Code for Module zeroinstall.zerostore.manifest

  1   
  2  """Processing of implementation manifests. 
  3   
  4  A manifest is a string representing a directory tree, with the property 
  5  that two trees will generate identical manifest strings if and only if: 
  6   
  7   - They have extactly the same set of files, directories and symlinks. 
  8   - For each pair of corresponding directories in the two sets: 
  9     - The mtimes are the same (OldSHA1 only). 
 10   - For each pair of corresponding files in the two sets: 
 11     - The size, executable flag and mtime are the same. 
 12     - The contents have matching secure hash values. 
 13   - For each pair of corresponding symlinks in the two sets: 
 14     - The mtime and size are the same. 
 15     - The targets have matching secure hash values. 
 16   
 17  The manifest is typically processed with a secure hash itself. So, the idea is that 
 18  any significant change to the contents of the tree will change the secure hash value 
 19  of the manifest. 
 20   
 21  A top-level ".manifest" file is ignored. 
 22  """ 
 23   
 24  # Copyright (C) 2009, Thomas Leonard 
 25  # See the README file for details, or visit http://0install.net. 
 26   
 27   
 28  import os, stat, base64 
 29  from zeroinstall import SafeException, _, logger 
 30  from zeroinstall.zerostore import BadDigest, parse_algorithm_digest_pair, format_algorithm_digest_pair 
 31   
 32  import hashlib 
 33  sha1_new = hashlib.sha1 
 34   
35 -class Algorithm(object):
36 """Abstract base class for algorithms. 37 An algorithm knows how to generate a manifest from a directory tree. 38 @ivar rating: how much we like this algorithm (higher is better) 39 @type rating: int 40 """
41 - def generate_manifest(self, root):
42 """Returns an iterator that yields each line of the manifest for the directory 43 tree rooted at 'root'.""" 44 raise Exception('Abstract')
45
46 - def new_digest(self):
47 """Create a new digest. Call update() on the returned object to digest the data. 48 Call getID() to turn it into a full ID string.""" 49 raise Exception('Abstract')
50
51 - def getID(self, digest):
52 """Convert a digest (from new_digest) to a full ID.""" 53 raise Exception('Abstract')
54
55 -class OldSHA1(Algorithm):
56 """@deprecated: Injector versions before 0.20 only supported this algorithm.""" 57 58 rating = 10 59
60 - def generate_manifest(self, root):
61 """@type root: str""" 62 def recurse(sub): 63 # To ensure that a line-by-line comparison of the manifests 64 # is possible, we require that filenames don't contain newlines. 65 # Otherwise, you can name a file so that the part after the \n 66 # would be interpreted as another line in the manifest. 67 if '\n' in sub: raise BadDigest("Newline in filename '%s'" % sub) 68 assert sub.startswith('/') 69 70 if sub == '/.manifest': return 71 72 full = os.path.join(root, sub[1:].replace('/', os.sep)) 73 info = os.lstat(full) 74 75 m = info.st_mode 76 if stat.S_ISDIR(m): 77 if sub != '/': 78 yield "D %s %s" % (int(info.st_mtime), sub) 79 items = os.listdir(full) 80 items.sort() 81 subdir = sub 82 if not subdir.endswith('/'): 83 subdir += '/' 84 for x in items: 85 for y in recurse(subdir + x): 86 yield y 87 return 88 89 assert sub[1:] 90 leaf = os.path.basename(sub[1:]) 91 if stat.S_ISREG(m): 92 with open(full, 'rb') as stream: 93 d = sha1_new(stream.read()).hexdigest() # XXX could be very large! 94 if m & 0o111: 95 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 96 else: 97 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 98 elif stat.S_ISLNK(m): 99 target = os.readlink(full).encode('utf-8') 100 d = sha1_new(target).hexdigest() 101 # Note: Can't use utime on symlinks, so skip mtime 102 # Note: eCryptfs may report length as zero, so count ourselves instead 103 yield "S %s %s %s" % (d, len(target), leaf) 104 else: 105 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") % 106 full)
107 for x in recurse('/'): yield x
108
109 - def new_digest(self):
110 return sha1_new()
111
112 - def getID(self, digest):
113 """@rtype: str""" 114 return 'sha1=' + digest.hexdigest()
115
116 -def get_algorithm(name):
117 """Look-up an L{Algorithm} by name. 118 @type name: str 119 @rtype: L{Algorithm} 120 @raise BadDigest: if the name is unknown.""" 121 try: 122 return algorithms[name] 123 except KeyError: 124 raise BadDigest(_("Unknown algorithm '%s'") % name)
125
126 -def generate_manifest(root, alg = 'sha1'):
127 """@type root: str 128 @type alg: str 129 @deprecated: use L{get_algorithm} and L{Algorithm.generate_manifest} instead.""" 130 return get_algorithm(alg).generate_manifest(root)
131
132 -def add_manifest_file(dir, digest_or_alg):
133 """Writes a .manifest file into 'dir', and returns the digest. 134 You should call fixup_permissions before this to ensure that the permissions are correct. 135 On exit, dir itself has mode 555. Subdirectories are not changed. 136 @param dir: root of the implementation 137 @type dir: str 138 @param digest_or_alg: should be an instance of Algorithm. Passing a digest here is deprecated. 139 @type digest_or_alg: L{Algorithm}""" 140 mfile = os.path.join(dir, '.manifest') 141 if os.path.islink(mfile) or os.path.exists(mfile): 142 raise SafeException(_("Directory '%s' already contains a .manifest file!") % dir) 143 manifest = '' 144 if isinstance(digest_or_alg, Algorithm): 145 alg = digest_or_alg 146 digest = alg.new_digest() 147 else: 148 digest = digest_or_alg 149 alg = get_algorithm('sha1') 150 for line in alg.generate_manifest(dir): 151 manifest += line + '\n' 152 manifest = manifest.encode('utf-8') 153 digest.update(manifest) 154 155 os.chmod(dir, 0o755) 156 with open(mfile, 'wb') as stream: 157 os.chmod(dir, 0o555) 158 stream.write(manifest) 159 os.chmod(mfile, 0o444) 160 return digest
161
162 -def splitID(id):
163 """Take an ID in the form 'alg=value' and return a tuple (alg, value). 164 @type id: str 165 @rtype: (L{Algorithm}, str) 166 @raise BadDigest: if the algorithm isn't known or the ID has the wrong format.""" 167 alg, digest = parse_algorithm_digest_pair(id) 168 return (get_algorithm(alg), digest)
169
170 -def copy_with_verify(src, dest, mode, alg, required_digest):
171 """Copy path src to dest, checking that the contents give the right digest. 172 dest must not exist. New file is created with a mode of 'mode & umask'. 173 @param src: source filename 174 @type src: str 175 @param dest: target filename 176 @type dest: str 177 @param mode: target mode 178 @type mode: int 179 @param alg: algorithm to generate digest 180 @type alg: L{Algorithm} 181 @param required_digest: expected digest value 182 @type required_digest: str 183 @raise BadDigest: the contents of the file don't match required_digest""" 184 with open(src, 'rb') as src_obj: 185 dest_fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, mode) 186 try: 187 digest = alg.new_digest() 188 while True: 189 data = src_obj.read(256) 190 if not data: break 191 digest.update(data) 192 while data: 193 written = os.write(dest_fd, data) 194 assert written >= 0 195 data = data[written:] 196 finally: 197 os.close(dest_fd) 198 actual = digest.hexdigest() 199 if actual == required_digest: return 200 os.unlink(dest) 201 raise BadDigest(_("Copy failed: file '%(src)s' has wrong digest (may have been tampered with)\n" 202 "Expected: %(required_digest)s\n" 203 "Actual: %(actual_digest)s") % {'src': src, 'required_digest': required_digest, 'actual_digest': actual})
204
205 -def verify(root, required_digest = None):
206 """Ensure that directory 'dir' generates the given digest. 207 For a non-error return: 208 - Dir's name must be a digest (in the form "alg=value") 209 - The calculated digest of the contents must match this name. 210 - If there is a .manifest file, then its digest must also match. 211 @type root: str 212 @type required_digest: str | None 213 @raise BadDigest: if verification fails.""" 214 if required_digest is None: 215 required_digest = os.path.basename(root) 216 alg = splitID(required_digest)[0] 217 218 digest = alg.new_digest() 219 lines = [] 220 for line in alg.generate_manifest(root): 221 line += '\n' 222 digest.update(line.encode('utf-8')) 223 lines.append(line) 224 actual_digest = alg.getID(digest) 225 226 manifest_file = os.path.join(root, '.manifest') 227 if os.path.isfile(manifest_file): 228 digest = alg.new_digest() 229 with open(manifest_file, 'rb') as stream: 230 digest.update(stream.read()) 231 manifest_digest = alg.getID(digest) 232 else: 233 manifest_digest = None 234 235 if required_digest == actual_digest == manifest_digest: 236 return 237 238 error = BadDigest(_("Cached item does NOT verify.")) 239 240 error.detail = _(" Expected: %(required_digest)s\n" 241 " Actual: %(actual_digest)s\n" 242 ".manifest digest: %(manifest_digest)s\n\n") \ 243 % {'required_digest': required_digest, 'actual_digest': actual_digest, 'manifest_digest': manifest_digest or _('No .manifest file')} 244 245 if manifest_digest is None: 246 error.detail += _("No .manifest, so no further details available.") 247 elif manifest_digest == actual_digest: 248 error.detail += _("The .manifest file matches the actual contents. Very strange!") 249 elif manifest_digest == required_digest: 250 import difflib 251 with open(manifest_file, 'rt') as stream: 252 diff = difflib.unified_diff(stream.readlines(), lines, 253 'Recorded', 'Actual') 254 error.detail += _("The .manifest file matches the directory name.\n" \ 255 "The contents of the directory have changed:\n") + \ 256 ''.join(diff) 257 elif required_digest == actual_digest: 258 error.detail += _("The directory contents are correct, but the .manifest file is wrong!") 259 else: 260 error.detail += _("The .manifest file matches neither of the other digests. Odd.") 261 raise error
262 263 # XXX: Be more careful about the source tree changing under us. In particular, what happens if: 264 # - A regualar file suddenly turns into a symlink? 265 # - We find a device file (users can hard-link them if on the same device)
266 -def copy_tree_with_verify(source, target, manifest_data, required_digest):
267 """Copy directory source to be a subdirectory of target if it matches the required_digest. 268 manifest_data is normally source/.manifest. source and manifest_data are not trusted 269 (will typically be under the control of another user). 270 The copy is first done to a temporary directory in target, then renamed to the final name 271 only if correct. Therefore, an invalid 'target/required_digest' will never exist. 272 A successful return means than target/required_digest now exists (whether we created it or not). 273 @type source: str 274 @type target: str 275 @type manifest_data: str 276 @type required_digest: str""" 277 import tempfile 278 279 alg, digest_value = splitID(required_digest) 280 281 if isinstance(alg, OldSHA1): 282 raise SafeException(_("Sorry, the 'sha1' algorithm does not support copying.")) 283 284 digest = alg.new_digest() 285 digest.update(manifest_data) 286 manifest_digest = alg.getID(digest) 287 288 if manifest_digest != required_digest: 289 raise BadDigest(_("Manifest has been tampered with!\n" 290 "Manifest digest: %(actual_digest)s\n" 291 "Directory name : %(required_digest)s") 292 % {'actual_digest': manifest_digest, 'required_digest': required_digest}) 293 294 target_impl = os.path.join(target, required_digest) 295 if os.path.isdir(target_impl): 296 logger.info(_("Target directory '%s' already exists"), target_impl) 297 return 298 299 # We've checked that the source's manifest matches required_digest, so it 300 # is what we want. Make a list of all the files we need to copy... 301 302 wanted = _parse_manifest(manifest_data.decode('utf-8')) 303 304 tmpdir = tempfile.mkdtemp(prefix = 'tmp-copy-', dir = target) 305 try: 306 _copy_files(alg, wanted, source, tmpdir) 307 308 if wanted: 309 raise SafeException(_('Copy failed; files missing from source:') + '\n- ' + 310 '\n- '.join(wanted.keys())) 311 312 # Make directories read-only (files are already RO) 313 for root, dirs, files in os.walk(tmpdir): 314 for d in dirs: 315 path = os.path.join(root, d) 316 mode = os.stat(path).st_mode 317 os.chmod(path, mode & 0o555) 318 319 # Check that the copy is correct 320 actual_digest = alg.getID(add_manifest_file(tmpdir, alg)) 321 if actual_digest != required_digest: 322 raise SafeException(_("Copy failed; double-check of target gave the wrong digest.\n" 323 "Unless the target was modified during the copy, this is a BUG\n" 324 "in 0store and should be reported.\n" 325 "Expected: %(required_digest)s\n" 326 "Actual: %(actual_digest)s") % {'required_digest': required_digest, 'actual_digest': actual_digest}) 327 try: 328 os.chmod(tmpdir, 0o755) # need write permission to rename on MacOS X 329 os.rename(tmpdir, target_impl) 330 os.chmod(target_impl, 0o555) 331 tmpdir = None 332 except OSError: 333 if not os.path.isdir(target_impl): 334 raise 335 # else someone else installed it already - return success 336 finally: 337 if tmpdir is not None: 338 logger.info(_("Deleting tmpdir '%s'") % tmpdir) 339 from zeroinstall.support import ro_rmtree 340 ro_rmtree(tmpdir)
341
342 -def _parse_manifest(manifest_data):
343 """Parse a manifest file. 344 @param manifest_data: the contents of the manifest file 345 @type manifest_data: str 346 @return: a mapping from paths to information about that path 347 @rtype: {str: tuple}""" 348 wanted = {} 349 dir = '' 350 for line in manifest_data.split('\n'): 351 if not line: break 352 if line[0] == 'D': 353 data = line.split(' ', 1) 354 if len(data) != 2: raise BadDigest(_("Bad line '%s'") % line) 355 path = data[-1] 356 if not path.startswith('/'): raise BadDigest(_("Not absolute: '%s'") % line) 357 path = path[1:] 358 dir = path 359 elif line[0] == 'S': 360 data = line.split(' ', 3) 361 path = os.path.join(dir, data[-1]) 362 if len(data) != 4: raise BadDigest(_("Bad line '%s'") % line) 363 else: 364 data = line.split(' ', 4) 365 path = os.path.join(dir, data[-1]) 366 if len(data) != 5: raise BadDigest(_("Bad line '%s'") % line) 367 if path in wanted: 368 raise BadDigest(_('Duplicate entry "%s"') % line) 369 wanted[path] = data[:-1] 370 return wanted
371
372 -def _copy_files(alg, wanted, source, target):
373 """Scan for files under 'source'. For each one: 374 If it is in wanted and has the right details (or they can be fixed; e.g. mtime), 375 then copy it into 'target'. 376 If it's not in wanted, warn and skip it. 377 On exit, wanted contains only files that were not found. 378 @type alg: L{Algorithm} 379 @type wanted: {str: tuple} 380 @type source: str 381 @type target: str""" 382 dir = '' 383 for line in alg.generate_manifest(source): 384 if line[0] == 'D': 385 type, name = line.split(' ', 1) 386 assert name.startswith('/') 387 dir = name[1:] 388 path = dir 389 elif line[0] == 'S': 390 type, actual_digest, actual_size, name = line.split(' ', 3) 391 path = os.path.join(dir, name) 392 else: 393 assert line[0] in 'XF' 394 type, actual_digest, actual_mtime, actual_size, name = line.split(' ', 4) 395 path = os.path.join(dir, name) 396 try: 397 required_details = wanted.pop(path) 398 except KeyError: 399 logger.warning(_("Skipping file not in manifest: '%s'"), path) 400 continue 401 if required_details[0] != type: 402 raise BadDigest(_("Item '%s' has wrong type!") % path) 403 if type == 'D': 404 os.mkdir(os.path.join(target, path)) 405 elif type in 'XF': 406 required_type, required_digest, required_mtime, required_size = required_details 407 if required_size != actual_size: 408 raise SafeException(_("File '%(path)s' has wrong size (%(actual_size)s bytes, but should be " 409 "%(required_size)s according to manifest)") % 410 {'path': path, 'actual_size': actual_size, 'required_size': required_size}) 411 required_mtime = int(required_mtime) 412 dest_path = os.path.join(target, path) 413 if type == 'X': 414 mode = 0o555 415 else: 416 mode = 0o444 417 copy_with_verify(os.path.join(source, path), 418 dest_path, 419 mode, 420 alg, 421 required_digest) 422 os.utime(dest_path, (required_mtime, required_mtime)) 423 elif type == 'S': 424 required_type, required_digest, required_size = required_details 425 if required_size != actual_size: 426 raise SafeException(_("Symlink '%(path)s' has wrong size (%(actual_size)s bytes, but should be " 427 "%(required_size)s according to manifest)") % 428 {'path': path, 'actual_size': actual_size, 'required_size': required_size}) 429 symlink_target = os.readlink(os.path.join(source, path)) 430 symlink_digest = alg.new_digest() 431 symlink_digest.update(symlink_target.encode('utf-8')) 432 if symlink_digest.hexdigest() != required_digest: 433 raise SafeException(_("Symlink '%(path)s' has wrong target (digest should be " 434 "%(digest)s according to manifest)") % {'path': path, 'digest': required_digest}) 435 dest_path = os.path.join(target, path) 436 os.symlink(symlink_target, dest_path) 437 else: 438 raise SafeException(_("Unknown manifest type %(type)s for '%(path)s'") % {'type': type, 'path': path})
439
440 -class HashLibAlgorithm(Algorithm):
441 new_digest = None # Constructor for digest objects 442
443 - def __init__(self, name, rating, hash_name = None):
444 """@type name: str 445 @type rating: int 446 @type hash_name: str | None""" 447 self.name = name 448 self.new_digest = getattr(hashlib, hash_name or name) 449 self.rating = rating
450
451 - def generate_manifest(self, root):
452 """@type root: str""" 453 def recurse(sub): 454 # To ensure that a line-by-line comparison of the manifests 455 # is possible, we require that filenames don't contain newlines. 456 # Otherwise, you can name a file so that the part after the \n 457 # would be interpreted as another line in the manifest. 458 if '\n' in sub: raise BadDigest(_("Newline in filename '%s'") % sub) 459 assert sub.startswith('/') 460 461 full = os.path.join(root, sub[1:]) 462 info = os.lstat(full) 463 new_digest = self.new_digest 464 465 m = info.st_mode 466 if not stat.S_ISDIR(m): raise Exception(_('Not a directory: "%s"') % full) 467 if sub != '/': 468 yield "D %s" % sub 469 items = os.listdir(full) 470 items.sort() 471 dirs = [] 472 for leaf in items: 473 path = os.path.join(root, sub[1:], leaf) 474 info = os.lstat(path) 475 m = info.st_mode 476 477 if stat.S_ISREG(m): 478 if leaf == '.manifest': continue 479 480 with open(path, 'rb') as stream: 481 d = new_digest(stream.read()).hexdigest() 482 if m & 0o111: 483 yield "X %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 484 else: 485 yield "F %s %s %s %s" % (d, int(info.st_mtime), info.st_size, leaf) 486 elif stat.S_ISLNK(m): 487 target = os.readlink(path).encode('utf-8') 488 d = new_digest(target).hexdigest() 489 # Note: Can't use utime on symlinks, so skip mtime 490 # Note: eCryptfs may report length as zero, so count ourselves instead 491 yield "S %s %s %s" % (d, len(target), leaf) 492 elif stat.S_ISDIR(m): 493 dirs.append(leaf) 494 else: 495 raise SafeException(_("Unknown object '%s' (not a file, directory or symlink)") % 496 path) 497 498 if not sub.endswith('/'): 499 sub += '/' 500 for x in dirs: 501 # Note: "sub" is always Unix style. Don't use os.path.join here. 502 for y in recurse(sub + x): yield y 503 return
504 505 for x in recurse('/'): yield x
506
507 - def getID(self, digest):
508 """@rtype: str""" 509 if self.name in ('sha1new', 'sha256'): 510 digest_str = digest.hexdigest() 511 else: 512 # Base32-encode newer algorithms to make the digest shorter. 513 # We can't use base64 as Windows is case insensitive. 514 # There's no need for padding (and = characters in paths cause problems for some software). 515 digest_str = base64.b32encode(digest.digest()).rstrip(b'=').decode('ascii') 516 return format_algorithm_digest_pair(self.name, digest_str)
517 518 algorithms = { 519 'sha1': OldSHA1(), 520 'sha1new': HashLibAlgorithm('sha1new', 50, 'sha1'), 521 'sha256': HashLibAlgorithm('sha256', 80), 522 'sha256new': HashLibAlgorithm('sha256new', 90, 'sha256'), 523 } 524 525
526 -def fixup_permissions(root):
527 """Set permissions recursively for children of root: 528 - If any X bit is set, they all must be. 529 - World readable, non-writable. 530 @type root: str 531 @raise Exception: if there are unsafe special bits set (setuid, etc).""" 532 533 for main, dirs, files in os.walk(root): 534 for x in ['.'] + files: 535 full = os.path.join(main, x) 536 537 raw_mode = os.lstat(full).st_mode 538 if stat.S_ISLNK(raw_mode): continue 539 540 mode = stat.S_IMODE(raw_mode) 541 if mode & ~0o777: 542 raise Exception(_("Unsafe mode: extracted file '%(filename)s' had special bits set in mode '%(mode)s'") % {'filename': full, 'mode': oct(mode)}) 543 if mode & 0o111: 544 os.chmod(full, 0o555) 545 else: 546 os.chmod(full, 0o444)
547