1 """Optimise the cache."""
2
3
4
5
6 from __future__ import print_function
7
8 from zeroinstall import _, logger
9 import os, sys
10
12 """@type a: str
13 @type b: str
14 @rtype: bool"""
15 ai = os.stat(a)
16 bi = os.stat(b)
17 return (ai.st_dev, ai.st_ino) == (bi.st_dev, bi.st_ino)
18
20 """@type a: str
21 @type b: str
22 @rtype: bool"""
23 with open(a, 'rb') as af:
24 with open(b, 'rb') as bf:
25 while True:
26 adata = af.read(100)
27 bdata = bf.read(100)
28 if adata != bdata:
29 return False
30 if not adata:
31 return True
32
34 """Keep 'a', delete 'b' and hard-link to 'a'
35 @type a: str
36 @type b: str
37 @type tmpfile: str"""
38 if not _byte_identical(a, b):
39 logger.warning(_("Files should be identical, but they're not!\n%(file_a)s\n%(file_b)s"), {'file_a': a, 'file_b': b})
40
41 b_dir = os.path.dirname(b)
42 old_mode = os.lstat(b_dir).st_mode
43 os.chmod(b_dir, old_mode | 0o200)
44 try:
45 os.link(a, tmpfile)
46 try:
47 os.rename(tmpfile, b)
48 except:
49 os.unlink(tmpfile)
50 raise
51 finally:
52 os.chmod(b_dir, old_mode)
53
55 """Scan an implementation cache directory for duplicate files, and
56 hard-link any duplicates together to save space.
57 @param impl_dir: a $cache/0install.net/implementations directory
58 @type impl_dir: str
59 @return: (unique bytes, duplicated bytes, already linked, manifest size)
60 @rtype: (int, int, int, int)"""
61
62 first_copy = {}
63 dup_size = uniq_size = already_linked = man_size = 0
64
65 import random
66 from zeroinstall.zerostore import BadDigest, parse_algorithm_digest_pair
67
68 for x in range(10):
69 tmpfile = os.path.join(impl_dir, 'optimise-%d' % random.randint(0, 1000000))
70 if not os.path.exists(tmpfile):
71 break
72 else:
73 raise Exception(_("Can't generate unused tempfile name!"))
74
75 dirs = os.listdir(impl_dir)
76 total = len(dirs)
77 msg = ""
78 def clear():
79 print("\r" + (" " * len(msg)) + "\r", end='')
80 for i, impl in enumerate(dirs):
81 clear()
82 msg = _("[%(done)d / %(total)d] Reading manifests...") % {'done': i, 'total': total}
83 print(msg, end='')
84 sys.stdout.flush()
85
86 try:
87 alg, manifest_digest = parse_algorithm_digest_pair(impl)
88 except BadDigest:
89 logger.warning(_("Skipping non-implementation '%s'"), impl)
90 continue
91 manifest_path = os.path.join(impl_dir, impl, '.manifest')
92 try:
93 ms = open(manifest_path, 'rt')
94 except OSError as ex:
95 logger.warning(_("Failed to read manifest file '%(manifest_path)s': %(exception)s"), {'manifest': manifest_path, 'exception': str(ex)})
96 continue
97
98 if alg == 'sha1':
99 ms.close()
100 continue
101
102 man_size += os.path.getsize(manifest_path)
103
104 dir = ""
105 for line in ms:
106 if line[0] == 'D':
107 itype, path = line.split(' ', 1)
108 assert path.startswith('/')
109 dir = path[1:-1]
110 continue
111
112 if line[0] == "S":
113 itype, digest, size, rest = line.split(' ', 3)
114 uniq_size += int(size)
115 continue
116
117 assert line[0] in "FX"
118
119 itype, digest, mtime, size, path = line.split(' ', 4)
120 path = path[:-1]
121 size = int(size)
122
123 key = (itype, digest, mtime, size)
124 loc_path = (impl, dir, path)
125
126 first_loc = first_copy.get(key, None)
127 if first_loc:
128 first_full = os.path.join(impl_dir, *first_loc)
129 new_full = os.path.join(impl_dir, *loc_path)
130 if _already_linked(first_full, new_full):
131 already_linked += size
132 else:
133 _link(first_full, new_full, tmpfile)
134 dup_size += size
135 else:
136 first_copy[key] = loc_path
137 uniq_size += size
138
139 ms.close()
140 clear()
141 return (uniq_size, dup_size, already_linked, man_size)
142