Package zeroinstall :: Package zerostore :: Module unpack
[frames] | no frames]

Source Code for Module zeroinstall.zerostore.unpack

  1  """Unpacking archives of various formats.""" 
  2   
  3  # Copyright (C) 2009, Thomas Leonard 
  4  # See the README file for details, or visit http://0install.net. 
  5   
  6  from zeroinstall import _, logger 
  7  import os, subprocess 
  8  import shutil 
  9  import glob 
 10  import traceback 
 11  from tempfile import mkdtemp, mkstemp 
 12  import re 
 13  import errno 
 14  from zeroinstall import SafeException 
 15  from zeroinstall.support import find_in_path, ro_rmtree 
 16   
 17  _cpio_version = None 
18 -def _get_cpio_version():
19 global _cpio_version 20 if _cpio_version is None: 21 child = subprocess.Popen(['cpio', '--version'], stdout = subprocess.PIPE, 22 stderr = subprocess.STDOUT, universal_newlines = True) 23 out, unused = child.communicate() 24 child.stdout.close() 25 child.wait() 26 _cpio_version = out.split('\n', 1)[0] 27 logger.debug(_("cpio version = %s"), _cpio_version) 28 return _cpio_version
29
30 -def _gnu_cpio():
31 gnu_cpio = '(GNU cpio)' in _get_cpio_version() 32 logger.debug(_("Is GNU cpio = %s"), gnu_cpio) 33 return gnu_cpio
34 35 _tar_version = None
36 -def _get_tar_version():
37 """@rtype: str""" 38 global _tar_version 39 if _tar_version is None: 40 child = subprocess.Popen(['tar', '--version'], stdout = subprocess.PIPE, 41 stderr = subprocess.STDOUT, universal_newlines = True) 42 out, unused = child.communicate() 43 child.stdout.close() 44 child.wait() 45 _tar_version = out.split('\n', 1)[0] 46 logger.debug(_("tar version = %s"), _tar_version) 47 return _tar_version
48
49 -def _gnu_tar():
50 """@rtype: bool""" 51 gnu_tar = '(GNU tar)' in _get_tar_version() 52 logger.debug(_("Is GNU tar = %s"), gnu_tar) 53 return gnu_tar
54
55 -def recent_gnu_tar():
56 """@rtype: bool 57 @deprecated: should be private""" 58 recent_gnu_tar = False 59 if _gnu_tar(): 60 version = re.search(r'\)\s*(\d+(\.\d+)*)', _get_tar_version()) 61 if version: 62 version = list(map(int, version.group(1).split('.'))) 63 recent_gnu_tar = version > [1, 13, 92] 64 else: 65 logger.warning(_("Failed to extract GNU tar version number")) 66 logger.debug(_("Recent GNU tar = %s"), recent_gnu_tar) 67 return recent_gnu_tar
68 69 # Disabled, as Plash does not currently support fchmod(2). 70 _pola_run = None 71 #_pola_run = find_in_path('pola-run') 72 #if _pola_run: 73 # info('Found pola-run: %s', _pola_run) 74 #else: 75 # info('pola-run not found; archive extraction will not be sandboxed') 76
77 -def type_from_url(url):
78 """Guess the MIME type for this resource based on its URL. Returns None if we don't know what it is. 79 @type url: str 80 @rtype: str | None""" 81 url = url.lower() 82 if url.endswith('.rpm'): return 'application/x-rpm' 83 if url.endswith('.deb'): return 'application/x-deb' 84 if url.endswith('.tar.bz2'): return 'application/x-bzip-compressed-tar' 85 if url.endswith('.tar.gz'): return 'application/x-compressed-tar' 86 if url.endswith('.tar.lzma'): return 'application/x-lzma-compressed-tar' 87 if url.endswith('.tar.xz'): return 'application/x-xz-compressed-tar' 88 if url.endswith('.tbz'): return 'application/x-bzip-compressed-tar' 89 if url.endswith('.tgz'): return 'application/x-compressed-tar' 90 if url.endswith('.tlz'): return 'application/x-lzma-compressed-tar' 91 if url.endswith('.txz'): return 'application/x-xz-compressed-tar' 92 if url.endswith('.tar'): return 'application/x-tar' 93 if url.endswith('.zip'): return 'application/zip' 94 if url.endswith('.cab'): return 'application/vnd.ms-cab-compressed' 95 if url.endswith('.dmg'): return 'application/x-apple-diskimage' 96 if url.endswith('.gem'): return 'application/x-ruby-gem' 97 return None
98
99 -def check_type_ok(mime_type):
100 """Check we have the needed software to extract from an archive of the given type. 101 @type mime_type: str 102 @raise SafeException: if the needed software is not available""" 103 assert mime_type 104 if mime_type == 'application/x-rpm': 105 if not find_in_path('rpm2cpio'): 106 raise SafeException(_("This package looks like an RPM, but you don't have the rpm2cpio command " 107 "I need to extract it. Install the 'rpm' package first (this works even if " 108 "you're on a non-RPM-based distribution such as Debian).")) 109 elif mime_type == 'application/x-deb': 110 if not find_in_path('ar'): 111 raise SafeException(_("This package looks like a Debian package, but you don't have the 'ar' command " 112 "I need to extract it. Install the package containing it (sometimes called 'binutils') " 113 "first. This works even if you're on a non-Debian-based distribution such as Red Hat).")) 114 elif mime_type == 'application/x-bzip-compressed-tar': 115 pass # We'll fall back to Python's built-in tar.bz2 support 116 elif mime_type == 'application/zip': 117 if not find_in_path('unzip'): 118 raise SafeException(_("This package looks like a zip-compressed archive, but you don't have the 'unzip' command " 119 "I need to extract it. Install the package containing it first.")) 120 elif mime_type == 'application/vnd.ms-cab-compressed': 121 if not find_in_path('cabextract'): 122 raise SafeException(_("This package looks like a Microsoft Cabinet archive, but you don't have the 'cabextract' command " 123 "I need to extract it. Install the package containing it first.")) 124 elif mime_type == 'application/x-apple-diskimage': 125 if not find_in_path('hdiutil'): 126 raise SafeException(_("This package looks like a Apple Disk Image, but you don't have the 'hdiutil' command " 127 "I need to extract it.")) 128 elif mime_type == 'application/x-lzma-compressed-tar': 129 pass # We can get it through Zero Install 130 elif mime_type == 'application/x-xz-compressed-tar': 131 if not find_in_path('unxz'): 132 raise SafeException(_("This package looks like a xz-compressed package, but you don't have the 'unxz' command " 133 "I need to extract it. Install the package containing it (it's probably called 'xz-utils') " 134 "first.")) 135 elif mime_type in ('application/x-compressed-tar', 'application/x-tar', 'application/x-ruby-gem'): 136 pass 137 else: 138 from zeroinstall import version 139 raise SafeException(_("Unsupported archive type '%(type)s' (for injector version %(version)s)") % {'type': mime_type, 'version': version})
140
141 -def _exec_maybe_sandboxed(writable, prog, *args):
142 """execlp prog, with (only) the 'writable' directory writable if sandboxing is available. 143 If no sandbox is available, run without a sandbox.""" 144 prog_path = find_in_path(prog) 145 if not prog_path: raise Exception(_("'%s' not found in $PATH") % prog) 146 if _pola_run is None: 147 os.execlp(prog_path, prog_path, *args) 148 # We have pola-shell :-) 149 pola_args = ['--prog', prog_path, '-f', '/'] 150 for a in args: 151 pola_args += ['-a', a] 152 if writable: 153 pola_args += ['-fw', writable] 154 os.execl(_pola_run, _pola_run, *pola_args)
155
156 -def unpack_archive_over(url, data, destdir, extract = None, type = None, start_offset = 0):
157 """Like unpack_archive, except that we unpack to a temporary directory first and 158 then move things over, checking that we're not following symlinks at each stage. 159 Use this when you want to unpack an unarchive into a directory which already has 160 stuff in it. 161 @type url: str 162 @type data: file 163 @type destdir: str 164 @type extract: str | None 165 @type type: str | None 166 @type start_offset: int 167 @note: Since 0.49, the leading "extract" component is removed (unlike unpack_archive). 168 @since: 0.28""" 169 import stat 170 tmpdir = mkdtemp(dir = destdir) 171 assert extract is None or os.sep not in extract, extract 172 try: 173 mtimes = [] 174 175 unpack_archive(url, data, tmpdir, extract, type, start_offset) 176 177 if extract is None: 178 srcdir = tmpdir 179 else: 180 srcdir = os.path.join(tmpdir, extract) 181 assert not os.path.islink(srcdir) 182 183 stem_len = len(srcdir) 184 for root, dirs, files in os.walk(srcdir): 185 relative_root = root[stem_len + 1:] or '.' 186 target_root = os.path.join(destdir, relative_root) 187 try: 188 info = os.lstat(target_root) 189 except OSError as ex: 190 if ex.errno != errno.ENOENT: 191 raise # Some odd error. 192 # Doesn't exist. OK. 193 os.mkdir(target_root) 194 else: 195 if stat.S_ISLNK(info.st_mode): 196 raise SafeException(_('Attempt to unpack dir over symlink "%s"!') % relative_root) 197 elif not stat.S_ISDIR(info.st_mode): 198 raise SafeException(_('Attempt to unpack dir over non-directory "%s"!') % relative_root) 199 mtimes.append((relative_root, os.lstat(os.path.join(srcdir, root)).st_mtime)) 200 201 for s in dirs: # Symlinks are counted as directories 202 src = os.path.join(srcdir, relative_root, s) 203 if os.path.islink(src): 204 files.append(s) 205 206 for f in files: 207 src = os.path.join(srcdir, relative_root, f) 208 dest = os.path.join(destdir, relative_root, f) 209 if os.path.islink(dest): 210 raise SafeException(_('Attempt to unpack file over symlink "%s"!') % 211 os.path.join(relative_root, f)) 212 os.rename(src, dest) 213 214 for path, mtime in mtimes[1:]: 215 os.utime(os.path.join(destdir, path), (mtime, mtime)) 216 finally: 217 ro_rmtree(tmpdir)
218
219 -def unpack_archive(url, data, destdir, extract = None, type = None, start_offset = 0):
220 """Unpack stream 'data' into directory 'destdir'. If extract is given, extract just 221 that sub-directory from the archive (i.e. destdir/extract will exist afterwards). 222 Works out the format from the name. 223 @type url: str 224 @type data: file 225 @type destdir: str 226 @type extract: str | None 227 @type type: str | None 228 @type start_offset: int""" 229 if type is None: type = type_from_url(url) 230 if type is None: raise SafeException(_("Unknown extension (and no MIME type given) in '%s'") % url) 231 if type == 'application/x-bzip-compressed-tar': 232 extract_tar(data, destdir, extract, 'bzip2', start_offset) 233 elif type == 'application/x-deb': 234 extract_deb(data, destdir, extract, start_offset) 235 elif type == 'application/x-rpm': 236 extract_rpm(data, destdir, extract, start_offset) 237 elif type == 'application/zip': 238 extract_zip(data, destdir, extract, start_offset) 239 elif type == 'application/x-tar': 240 extract_tar(data, destdir, extract, None, start_offset) 241 elif type == 'application/x-lzma-compressed-tar': 242 extract_tar(data, destdir, extract, 'lzma', start_offset) 243 elif type == 'application/x-xz-compressed-tar': 244 extract_tar(data, destdir, extract, 'xz', start_offset) 245 elif type == 'application/x-compressed-tar': 246 extract_tar(data, destdir, extract, 'gzip', start_offset) 247 elif type == 'application/vnd.ms-cab-compressed': 248 extract_cab(data, destdir, extract, start_offset) 249 elif type == 'application/x-apple-diskimage': 250 extract_dmg(data, destdir, extract, start_offset) 251 elif type == 'application/x-ruby-gem': 252 extract_gem(data, destdir, extract, start_offset) 253 else: 254 raise SafeException(_('Unknown MIME type "%(type)s" for "%(url)s"') % {'type': type, 'url': url})
255
256 -def extract_deb(stream, destdir, extract = None, start_offset = 0):
257 """@type stream: file 258 @type destdir: str 259 @type start_offset: int""" 260 if extract: 261 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Debs')) 262 263 stream.seek(start_offset) 264 # ar can't read from stdin, so make a copy... 265 deb_copy_name = os.path.join(destdir, 'archive.deb') 266 with open(deb_copy_name, 'wb') as deb_copy: 267 shutil.copyfileobj(stream, deb_copy) 268 269 data_tar = None 270 p = subprocess.Popen(('ar', 't', 'archive.deb'), stdout=subprocess.PIPE, cwd=destdir, universal_newlines=True) 271 o = p.communicate()[0] 272 for line in o.split('\n'): 273 if line == 'data.tar': 274 data_compression = None 275 elif line == 'data.tar.gz': 276 data_compression = 'gzip' 277 elif line == 'data.tar.bz2': 278 data_compression = 'bzip2' 279 elif line == 'data.tar.lzma': 280 data_compression = 'lzma' 281 elif line == 'data.tar.xz': 282 data_compression = 'xz' 283 else: 284 continue 285 data_tar = line 286 break 287 else: 288 raise SafeException(_("File is not a Debian package.")) 289 290 _extract(stream, destdir, ('ar', 'x', 'archive.deb', data_tar)) 291 os.unlink(deb_copy_name) 292 data_name = os.path.join(destdir, data_tar) 293 with open(data_name, 'rb') as data_stream: 294 os.unlink(data_name) 295 extract_tar(data_stream, destdir, None, data_compression)
296
297 -def extract_rpm(stream, destdir, extract = None, start_offset = 0):
298 if extract: 299 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for RPMs')) 300 fd, cpiopath = mkstemp('-rpm-tmp') 301 try: 302 child = os.fork() 303 if child == 0: 304 try: 305 try: 306 os.dup2(stream.fileno(), 0) 307 os.lseek(0, start_offset, 0) 308 os.dup2(fd, 1) 309 _exec_maybe_sandboxed(None, 'rpm2cpio', '-') 310 except: 311 traceback.print_exc() 312 finally: 313 os._exit(1) 314 id, status = os.waitpid(child, 0) 315 assert id == child 316 if status != 0: 317 raise SafeException(_("rpm2cpio failed; can't unpack RPM archive; exit code %d") % status) 318 os.close(fd) 319 fd = None 320 321 args = ['cpio', '-mid'] 322 if _gnu_cpio(): 323 args.append('--quiet') 324 325 with open(cpiopath, 'rb') as cpio_stream: 326 _extract(cpio_stream, destdir, args) 327 # Set the mtime of every directory under 'tmp' to 0, since cpio doesn't 328 # preserve directory mtimes. 329 for root, dirs, files in os.walk(destdir): 330 os.utime(root, (0, 0)) 331 finally: 332 if fd is not None: 333 os.close(fd) 334 os.unlink(cpiopath)
335
336 -def extract_gem(stream, destdir, extract = None, start_offset = 0):
337 """@type stream: file 338 @type destdir: str 339 @type start_offset: int 340 @since: 0.53""" 341 stream.seek(start_offset) 342 payload = 'data.tar.gz' 343 payload_stream = None 344 tmpdir = mkdtemp(dir = destdir) 345 try: 346 extract_tar(stream, destdir=tmpdir, extract=payload, decompress=None) 347 with open(os.path.join(tmpdir, payload), 'rb') as payload_stream: 348 extract_tar(payload_stream, destdir=destdir, extract=extract, decompress='gzip') 349 finally: 350 if payload_stream: 351 payload_stream.close() 352 ro_rmtree(tmpdir)
353
354 -def extract_cab(stream, destdir, extract, start_offset = 0):
355 """@since: 0.24""" 356 if extract: 357 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for Cabinet files')) 358 359 stream.seek(start_offset) 360 # cabextract can't read from stdin, so make a copy... 361 cab_copy_name = os.path.join(destdir, 'archive.cab') 362 cab_copy = open(cab_copy_name, 'wb') 363 shutil.copyfileobj(stream, cab_copy) 364 cab_copy.close() 365 366 _extract(stream, destdir, ['cabextract', '-s', '-q', 'archive.cab']) 367 os.unlink(cab_copy_name)
368
369 -def extract_dmg(stream, destdir, extract, start_offset = 0):
370 """@since: 0.46""" 371 if extract: 372 raise SafeException(_('Sorry, but the "extract" attribute is not yet supported for DMGs')) 373 374 stream.seek(start_offset) 375 # hdiutil can't read from stdin, so make a copy... 376 dmg_copy_name = os.path.join(destdir, 'archive.dmg') 377 dmg_copy = open(dmg_copy_name, 'wb') 378 shutil.copyfileobj(stream, dmg_copy) 379 dmg_copy.close() 380 381 mountpoint = mkdtemp(prefix='archive') 382 subprocess.check_call(["hdiutil", "attach", "-quiet", "-mountpoint", mountpoint, "-nobrowse", dmg_copy_name]) 383 subprocess.check_call(["cp", "-pR"] + glob.glob("%s/*" % mountpoint) + [destdir]) 384 subprocess.check_call(["hdiutil", "detach", "-quiet", mountpoint]) 385 os.rmdir(mountpoint) 386 os.unlink(dmg_copy_name)
387
388 -def extract_zip(stream, destdir, extract, start_offset = 0):
389 """@type stream: file 390 @type destdir: str 391 @type extract: str 392 @type start_offset: int""" 393 if extract: 394 # Limit the characters we accept, to avoid sending dodgy 395 # strings to zip 396 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract): 397 raise SafeException(_('Illegal character in extract attribute')) 398 399 stream.seek(start_offset) 400 # unzip can't read from stdin, so make a copy... 401 zip_copy_name = os.path.join(destdir, 'archive.zip') 402 with open(zip_copy_name, 'wb') as zip_copy: 403 shutil.copyfileobj(stream, zip_copy) 404 405 args = ['unzip', '-q', '-o', 'archive.zip'] 406 407 if extract: 408 args.append(extract + '/*') 409 410 _extract(stream, destdir, args) 411 os.unlink(zip_copy_name)
412
413 -def extract_tar(stream, destdir, extract, decompress, start_offset = 0):
414 """@type stream: file 415 @type destdir: str 416 @type extract: str 417 @type decompress: str 418 @type start_offset: int""" 419 if extract: 420 # Limit the characters we accept, to avoid sending dodgy 421 # strings to tar 422 if not re.match('^[a-zA-Z0-9][- _a-zA-Z0-9.]*$', extract): 423 raise SafeException(_('Illegal character in extract attribute')) 424 425 assert decompress in [None, 'bzip2', 'gzip', 'lzma', 'xz'] 426 427 if _gnu_tar(): 428 ext_cmd = ['tar'] 429 if decompress: 430 if decompress == 'bzip2': 431 ext_cmd.append('--bzip2') 432 elif decompress == 'gzip': 433 ext_cmd.append('-z') 434 elif decompress == 'lzma': 435 unlzma = find_in_path('unlzma') 436 if not unlzma: 437 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma')) 438 ext_cmd.append('--use-compress-program=' + unlzma) 439 elif decompress == 'xz': 440 unxz = find_in_path('unxz') 441 if not unxz: 442 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz')) 443 ext_cmd.append('--use-compress-program=' + unxz) 444 445 if recent_gnu_tar(): 446 ext_cmd.extend(('-x', '--no-same-owner', '--no-same-permissions')) 447 else: 448 ext_cmd.extend(('xf', '-')) 449 450 if extract: 451 ext_cmd.append(extract) 452 453 _extract(stream, destdir, ext_cmd, start_offset) 454 else: 455 import tempfile 456 457 # Since we don't have GNU tar, use python's tarfile module. This will probably 458 # be a lot slower and we do not support lzma and xz; however, it is portable. 459 # (lzma and xz are handled by first uncompressing stream to a temporary file. 460 # this is simple to do, but less efficient than piping through the program) 461 if decompress is None: 462 rmode = 'r|' 463 elif decompress == 'bzip2': 464 rmode = 'r|bz2' 465 elif decompress == 'gzip': 466 rmode = 'r|gz' 467 elif decompress == 'lzma': 468 unlzma = find_in_path('unlzma') 469 if not unlzma: 470 unlzma = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unlzma')) 471 temp = tempfile.NamedTemporaryFile(suffix='.tar', mode='w+b') 472 subprocess.check_call(unlzma, stdin=stream, stdout=temp) 473 rmode = 'r|' 474 stream = temp 475 elif decompress == 'xz': 476 unxz = find_in_path('unxz') 477 if not unxz: 478 unxz = os.path.abspath(os.path.join(os.path.dirname(__file__), '_unxz')) 479 temp = tempfile.NamedTemporaryFile(suffix='.tar', mode='w+b') 480 subprocess.check_call(unxz, stdin=stream, stdout=temp) 481 rmode = 'r|' 482 stream = temp 483 else: 484 raise SafeException(_('GNU tar unavailable; unsupported compression format: %s') % decompress) 485 486 import tarfile 487 488 stream.seek(start_offset) 489 # Python 2.5.1 crashes if name is None; see Python bug #1706850 490 tar = tarfile.open(name = '', mode = rmode, fileobj = stream) 491 492 current_umask = os.umask(0) 493 os.umask(current_umask) 494 495 uid = gid = None 496 try: 497 uid = os.geteuid() 498 gid = os.getegid() 499 except: 500 logger.debug(_("Can't get uid/gid")) 501 502 def chmod_extract(tarinfo): 503 # If any X bit is set, they all must be 504 if tarinfo.mode & 0o111: 505 tarinfo.mode |= 0o111 506 507 # Everyone gets read and write (subject to the umask) 508 # No special bits are allowed. 509 tarinfo.mode = ((tarinfo.mode | 0o666) & ~current_umask) & 0o777 510 511 # Don't change owner, even if run as root 512 if uid: 513 tarinfo.uid = uid 514 if gid: 515 tarinfo.gid = gid 516 tar.extract(tarinfo, destdir)
517 518 extracted_anything = False 519 ext_dirs = [] 520 521 for tarinfo in tar: 522 if extract is None or \ 523 tarinfo.name.startswith(extract + '/') or \ 524 tarinfo.name == extract: 525 if tarinfo.isdir(): 526 ext_dirs.append(tarinfo) 527 528 chmod_extract(tarinfo) 529 extracted_anything = True 530 531 # Due to a bug in tarfile (python versions < 2.5), we have to manually 532 # set the mtime of each directory that we extract after extracting everything. 533 534 for tarinfo in ext_dirs: 535 dirname = os.path.join(destdir, tarinfo.name) 536 os.utime(dirname, (tarinfo.mtime, tarinfo.mtime)) 537 538 tar.close() 539 540 if extract and not extracted_anything: 541 raise SafeException(_('Unable to find specified file = %s in archive') % extract) 542
543 -def _extract(stream, destdir, command, start_offset = 0):
544 """Run execvp('command') inside destdir in a child process, with 545 stream seeked to 'start_offset' as stdin. 546 @type stream: file 547 @type destdir: str 548 @type command: [str] 549 @type start_offset: int""" 550 551 # Some zip archives are missing timezone information; force consistent results 552 child_env = os.environ.copy() 553 child_env['TZ'] = 'GMT' 554 555 stream.seek(start_offset) 556 557 # TODO: use pola-run if available, once it supports fchmod 558 child = subprocess.Popen(command, cwd = destdir, stdin = stream, stderr = subprocess.PIPE, env = child_env) 559 560 unused, cerr = child.communicate() 561 562 status = child.wait() 563 if status != 0: 564 raise SafeException(_('Failed to extract archive (using %(command)s); exit code %(status)d:\n%(err)s') % {'command': command, 'status': status, 'err': cerr.strip()})
565