##// END OF EJS Templates
store: eliminate reference cycle in fncachestore...
Simon Heimberg -
r9133:996c1cd8 default
parent child Browse files
Show More
@@ -1,332 +1,333 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from i18n import _
9 9 import osutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 if not path.startswith('data/'):
18 18 return path
19 19 return (path
20 20 .replace(".hg/", ".hg.hg/")
21 21 .replace(".i/", ".i.hg/")
22 22 .replace(".d/", ".d.hg/"))
23 23
24 24 def decodedir(path):
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".d.hg/", ".d/")
29 29 .replace(".i.hg/", ".i/")
30 30 .replace(".hg.hg/", ".hg/"))
31 31
32 32 def _buildencodefun():
33 33 e = '_'
34 34 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 35 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
36 36 for x in (range(32) + range(126, 256) + win_reserved):
37 37 cmap[chr(x)] = "~%02x" % x
38 38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 39 cmap[chr(x)] = e + chr(x).lower()
40 40 dmap = {}
41 41 for k, v in cmap.iteritems():
42 42 dmap[v] = k
43 43 def decode(s):
44 44 i = 0
45 45 while i < len(s):
46 46 for l in xrange(1, 4):
47 47 try:
48 48 yield dmap[s[i:i+l]]
49 49 i += l
50 50 break
51 51 except KeyError:
52 52 pass
53 53 else:
54 54 raise KeyError
55 55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 56 lambda s: decodedir("".join(list(decode(s)))))
57 57
58 58 encodefilename, decodefilename = _buildencodefun()
59 59
60 60 def _build_lower_encodefun():
61 61 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 62 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
63 63 for x in (range(32) + range(126, 256) + win_reserved):
64 64 cmap[chr(x)] = "~%02x" % x
65 65 for x in range(ord("A"), ord("Z")+1):
66 66 cmap[chr(x)] = chr(x).lower()
67 67 return lambda s: "".join([cmap[c] for c in s])
68 68
69 69 lowerencode = _build_lower_encodefun()
70 70
71 71 _windows_reserved_filenames = '''con prn aux nul
72 72 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
74 74 def auxencode(path):
75 75 res = []
76 76 for n in path.split('/'):
77 77 if n:
78 78 base = n.split('.')[0]
79 79 if base and (base in _windows_reserved_filenames):
80 80 # encode third letter ('aux' -> 'au~78')
81 81 ec = "~%02x" % ord(n[2])
82 82 n = n[0:2] + ec + n[3:]
83 83 if n[-1] in '. ':
84 84 # encode last period or space ('foo...' -> 'foo..~2e')
85 85 n = n[:-1] + "~%02x" % ord(n[-1])
86 86 res.append(n)
87 87 return '/'.join(res)
88 88
89 89 MAX_PATH_LEN_IN_HGSTORE = 120
90 90 DIR_PREFIX_LEN = 8
91 91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
92 92 def hybridencode(path):
93 93 '''encodes path with a length limit
94 94
95 95 Encodes all paths that begin with 'data/', according to the following.
96 96
97 97 Default encoding (reversible):
98 98
99 99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
100 100 characters are encoded as '~xx', where xx is the two digit hex code
101 101 of the character (see encodefilename).
102 102 Relevant path components consisting of Windows reserved filenames are
103 103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
104 104
105 105 Hashed encoding (not reversible):
106 106
107 107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
108 108 non-reversible hybrid hashing of the path is done instead.
109 109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
110 110 levels of the lowerencoded path, but not more levels than can fit into
111 111 _MAX_SHORTENED_DIRS_LEN.
112 112 Then follows the filler followed by the sha digest of the full path.
113 113 The filler is the beginning of the basename of the lowerencoded path
114 114 (the basename is everything after the last path separator). The filler
115 115 is as long as possible, filling in characters from the basename until
116 116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
117 117 of the basename have been taken).
118 118 The extension (e.g. '.i' or '.d') is preserved.
119 119
120 120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
121 121 encoding was used.
122 122 '''
123 123 if not path.startswith('data/'):
124 124 return path
125 125 # escape directories ending with .i and .d
126 126 path = encodedir(path)
127 127 ndpath = path[len('data/'):]
128 128 res = 'data/' + auxencode(encodefilename(ndpath))
129 129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
130 130 digest = _sha(path).hexdigest()
131 131 aep = auxencode(lowerencode(ndpath))
132 132 _root, ext = os.path.splitext(aep)
133 133 parts = aep.split('/')
134 134 basename = parts[-1]
135 135 sdirs = []
136 136 for p in parts[:-1]:
137 137 d = p[:DIR_PREFIX_LEN]
138 138 if d[-1] in '. ':
139 139 # Windows can't access dirs ending in period or space
140 140 d = d[:-1] + '_'
141 141 t = '/'.join(sdirs) + '/' + d
142 142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
143 143 break
144 144 sdirs.append(d)
145 145 dirs = '/'.join(sdirs)
146 146 if len(dirs) > 0:
147 147 dirs += '/'
148 148 res = 'dh/' + dirs + digest + ext
149 149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
150 150 if space_left > 0:
151 151 filler = basename[:space_left]
152 152 res = 'dh/' + dirs + filler + digest + ext
153 153 return res
154 154
155 155 def _calcmode(path):
156 156 try:
157 157 # files in .hg/ will be created using this mode
158 158 mode = os.stat(path).st_mode
159 159 # avoid some useless chmods
160 160 if (0777 & ~util.umask) == (0777 & mode):
161 161 mode = None
162 162 except OSError:
163 163 mode = None
164 164 return mode
165 165
166 166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
167 167
168 168 class basicstore(object):
169 169 '''base class for local repository stores'''
170 170 def __init__(self, path, opener, pathjoiner):
171 171 self.pathjoiner = pathjoiner
172 172 self.path = path
173 173 self.createmode = _calcmode(path)
174 174 op = opener(self.path)
175 175 op.createmode = self.createmode
176 176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
177 177
178 178 def join(self, f):
179 179 return self.pathjoiner(self.path, encodedir(f))
180 180
181 181 def _walk(self, relpath, recurse):
182 182 '''yields (unencoded, encoded, size)'''
183 183 path = self.pathjoiner(self.path, relpath)
184 184 striplen = len(self.path) + len(os.sep)
185 185 l = []
186 186 if os.path.isdir(path):
187 187 visit = [path]
188 188 while visit:
189 189 p = visit.pop()
190 190 for f, kind, st in osutil.listdir(p, stat=True):
191 191 fp = self.pathjoiner(p, f)
192 192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
193 193 n = util.pconvert(fp[striplen:])
194 194 l.append((decodedir(n), n, st.st_size))
195 195 elif kind == stat.S_IFDIR and recurse:
196 196 visit.append(fp)
197 197 return sorted(l)
198 198
199 199 def datafiles(self):
200 200 return self._walk('data', True)
201 201
202 202 def walk(self):
203 203 '''yields (unencoded, encoded, size)'''
204 204 # yield data files first
205 205 for x in self.datafiles():
206 206 yield x
207 207 # yield manifest before changelog
208 208 for x in reversed(self._walk('', False)):
209 209 yield x
210 210
211 211 def copylist(self):
212 212 return ['requires'] + _data.split()
213 213
214 214 class encodedstore(basicstore):
215 215 def __init__(self, path, opener, pathjoiner):
216 216 self.pathjoiner = pathjoiner
217 217 self.path = self.pathjoiner(path, 'store')
218 218 self.createmode = _calcmode(self.path)
219 219 op = opener(self.path)
220 220 op.createmode = self.createmode
221 221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
222 222
223 223 def datafiles(self):
224 224 for a, b, size in self._walk('data', True):
225 225 try:
226 226 a = decodefilename(a)
227 227 except KeyError:
228 228 a = None
229 229 yield a, b, size
230 230
231 231 def join(self, f):
232 232 return self.pathjoiner(self.path, encodefilename(f))
233 233
234 234 def copylist(self):
235 235 return (['requires', '00changelog.i'] +
236 236 [self.pathjoiner('store', f) for f in _data.split()])
237 237
238 238 class fncache(object):
239 239 # the filename used to be partially encoded
240 240 # hence the encodedir/decodedir dance
241 241 def __init__(self, opener):
242 242 self.opener = opener
243 243 self.entries = None
244 244
245 245 def _load(self):
246 246 '''fill the entries from the fncache file'''
247 247 self.entries = set()
248 248 try:
249 249 fp = self.opener('fncache', mode='rb')
250 250 except IOError:
251 251 # skip nonexistent file
252 252 return
253 253 for n, line in enumerate(fp):
254 254 if (len(line) < 2) or (line[-1] != '\n'):
255 255 t = _('invalid entry in fncache, line %s') % (n + 1)
256 256 raise util.Abort(t)
257 257 self.entries.add(decodedir(line[:-1]))
258 258 fp.close()
259 259
260 260 def rewrite(self, files):
261 261 fp = self.opener('fncache', mode='wb')
262 262 for p in files:
263 263 fp.write(encodedir(p) + '\n')
264 264 fp.close()
265 265 self.entries = set(files)
266 266
267 267 def add(self, fn):
268 268 if self.entries is None:
269 269 self._load()
270 270 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
271 271
272 272 def __contains__(self, fn):
273 273 if self.entries is None:
274 274 self._load()
275 275 return fn in self.entries
276 276
277 277 def __iter__(self):
278 278 if self.entries is None:
279 279 self._load()
280 280 return iter(self.entries)
281 281
282 282 class fncachestore(basicstore):
283 283 def __init__(self, path, opener, pathjoiner):
284 284 self.pathjoiner = pathjoiner
285 285 self.path = self.pathjoiner(path, 'store')
286 286 self.createmode = _calcmode(self.path)
287 self._op = opener(self.path)
288 self._op.createmode = self.createmode
289 self.fncache = fncache(self._op)
287 op = opener(self.path)
288 op.createmode = self.createmode
289 fnc = fncache(op)
290 self.fncache = fnc
290 291
291 292 def fncacheopener(path, mode='r', *args, **kw):
292 293 if (mode not in ('r', 'rb')
293 294 and path.startswith('data/')
294 and path not in self.fncache):
295 self.fncache.add(path)
296 return self._op(hybridencode(path), mode, *args, **kw)
295 and path not in fnc):
296 fnc.add(path)
297 return op(hybridencode(path), mode, *args, **kw)
297 298 self.opener = fncacheopener
298 299
299 300 def join(self, f):
300 301 return self.pathjoiner(self.path, hybridencode(f))
301 302
302 303 def datafiles(self):
303 304 rewrite = False
304 305 existing = []
305 306 pjoin = self.pathjoiner
306 307 spath = self.path
307 308 for f in self.fncache:
308 309 ef = hybridencode(f)
309 310 try:
310 311 st = os.stat(pjoin(spath, ef))
311 312 yield f, ef, st.st_size
312 313 existing.append(f)
313 314 except OSError:
314 315 # nonexistent entry
315 316 rewrite = True
316 317 if rewrite:
317 318 # rewrite fncache to remove nonexistent entries
318 319 # (may be caused by rollback / strip)
319 320 self.fncache.rewrite(existing)
320 321
321 322 def copylist(self):
322 323 d = _data + ' dh fncache'
323 324 return (['requires', '00changelog.i'] +
324 325 [self.pathjoiner('store', f) for f in d.split()])
325 326
326 327 def store(requirements, path, opener, pathjoiner=None):
327 328 pathjoiner = pathjoiner or os.path.join
328 329 if 'store' in requirements:
329 330 if 'fncache' in requirements:
330 331 return fncachestore(path, opener, pathjoiner)
331 332 return encodedstore(path, opener, pathjoiner)
332 333 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now