Package zeroinstall :: Package zerostore :: Module optimise
[frames] | no frames]

Source Code for Module zeroinstall.zerostore.optimise

  1  """Optimise the cache.""" 
  2   
  3  # Copyright (C) 2009, Thomas Leonard 
  4  # See the README file for details, or visit http://0install.net. 
  5   
  6  from __future__ import print_function 
  7   
  8  from zeroinstall import _, logger 
  9  import os, sys 
 10   
11 -def _already_linked(a, b):
12 """@type a: str 13 @type b: str 14 @rtype: bool""" 15 ai = os.stat(a) 16 bi = os.stat(b) 17 return (ai.st_dev, ai.st_ino) == (bi.st_dev, bi.st_ino)
18
19 -def _byte_identical(a, b):
20 """@type a: str 21 @type b: str 22 @rtype: bool""" 23 with open(a, 'rb') as af: 24 with open(b, 'rb') as bf: 25 while True: 26 adata = af.read(100) 27 bdata = bf.read(100) 28 if adata != bdata: 29 return False 30 if not adata: 31 return True
32 53
54 -def optimise(impl_dir):
55 """Scan an implementation cache directory for duplicate files, and 56 hard-link any duplicates together to save space. 57 @param impl_dir: a $cache/0install.net/implementations directory 58 @type impl_dir: str 59 @return: (unique bytes, duplicated bytes, already linked, manifest size) 60 @rtype: (int, int, int, int)""" 61 62 first_copy = {} # TypeDigest -> Path 63 dup_size = uniq_size = already_linked = man_size = 0 64 65 import random 66 from zeroinstall.zerostore import BadDigest, parse_algorithm_digest_pair 67 68 for x in range(10): 69 tmpfile = os.path.join(impl_dir, 'optimise-%d' % random.randint(0, 1000000)) 70 if not os.path.exists(tmpfile): 71 break 72 else: 73 raise Exception(_("Can't generate unused tempfile name!")) 74 75 dirs = os.listdir(impl_dir) 76 total = len(dirs) 77 msg = "" 78 def clear(): 79 print("\r" + (" " * len(msg)) + "\r", end='')
80 for i, impl in enumerate(dirs): 81 clear() 82 msg = _("[%(done)d / %(total)d] Reading manifests...") % {'done': i, 'total': total} 83 print(msg, end='') 84 sys.stdout.flush() 85 86 try: 87 alg, manifest_digest = parse_algorithm_digest_pair(impl) 88 except BadDigest: 89 logger.warning(_("Skipping non-implementation '%s'"), impl) 90 continue 91 manifest_path = os.path.join(impl_dir, impl, '.manifest') 92 try: 93 ms = open(manifest_path, 'rt') 94 except OSError as ex: 95 logger.warning(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path, 'exception': str(ex)}) 96 continue 97 98 if alg == 'sha1': 99 ms.close() 100 continue 101 102 man_size += os.path.getsize(manifest_path) 103 104 dir = "" 105 for line in ms: 106 if line[0] == 'D': 107 itype, path = line.split(' ', 1) 108 assert path.startswith('/') 109 dir = path[1:-1] # Strip slash and newline 110 continue 111 112 if line[0] == "S": 113 itype, digest, size, rest = line.split(' ', 3) 114 uniq_size += int(size) 115 continue 116 117 assert line[0] in "FX" 118 119 itype, digest, mtime, size, path = line.split(' ', 4) 120 path = path[:-1] # Strip newline 121 size = int(size) 122 123 key = (itype, digest, mtime, size) 124 loc_path = (impl, dir, path) 125 126 first_loc = first_copy.get(key, None) 127 if first_loc: 128 first_full = os.path.join(impl_dir, *first_loc) 129 new_full = os.path.join(impl_dir, *loc_path) 130 if _already_linked(first_full, new_full): 131 already_linked += size 132 else: 133 _link(first_full, new_full, tmpfile) 134 dup_size += size 135 else: 136 first_copy[key] = loc_path 137 uniq_size += size 138 139 ms.close() 140 clear() 141 return (uniq_size, dup_size, already_linked, man_size) 142