##// END OF EJS Templates
store: fncache may contain non-existent entries (fixes b9a56b816ff2)
Adrian Buehlmann -
r17784:73e1ab39 default
parent child Browse files
Show More
@@ -1,545 +1,546 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z")+1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187
188 188 def _hashencode(path, dotencode):
189 189 digest = _sha(path).hexdigest()
190 190 le = lowerencode(path).split('/')[1:]
191 191 parts = _auxencode(le, dotencode)
192 192 basename = parts[-1]
193 193 _root, ext = os.path.splitext(basename)
194 194 sdirs = []
195 195 sdirslen = 0
196 196 for p in parts[:-1]:
197 197 d = p[:_dirprefixlen]
198 198 if d[-1] in '. ':
199 199 # Windows can't access dirs ending in period or space
200 200 d = d[:-1] + '_'
201 201 if sdirslen == 0:
202 202 t = len(d)
203 203 else:
204 204 t = sdirslen + 1 + len(d)
205 205 if t > _maxshortdirslen:
206 206 break
207 207 sdirs.append(d)
208 208 sdirslen = t
209 209 dirs = '/'.join(sdirs)
210 210 if len(dirs) > 0:
211 211 dirs += '/'
212 212 res = 'dh/' + dirs + digest + ext
213 213 spaceleft = _maxstorepathlen - len(res)
214 214 if spaceleft > 0:
215 215 filler = basename[:spaceleft]
216 216 res = 'dh/' + dirs + filler + digest + ext
217 217 return res
218 218
219 219 def _hybridencode(path, dotencode):
220 220 '''encodes path with a length limit
221 221
222 222 Encodes all paths that begin with 'data/', according to the following.
223 223
224 224 Default encoding (reversible):
225 225
226 226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 227 characters are encoded as '~xx', where xx is the two digit hex code
228 228 of the character (see encodefilename).
229 229 Relevant path components consisting of Windows reserved filenames are
230 230 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
231 231
232 232 Hashed encoding (not reversible):
233 233
234 234 If the default-encoded path is longer than _maxstorepathlen, a
235 235 non-reversible hybrid hashing of the path is done instead.
236 236 This encoding uses up to _dirprefixlen characters of all directory
237 237 levels of the lowerencoded path, but not more levels than can fit into
238 238 _maxshortdirslen.
239 239 Then follows the filler followed by the sha digest of the full path.
240 240 The filler is the beginning of the basename of the lowerencoded path
241 241 (the basename is everything after the last path separator). The filler
242 242 is as long as possible, filling in characters from the basename until
243 243 the encoded path has _maxstorepathlen characters (or all chars of the
244 244 basename have been taken).
245 245 The extension (e.g. '.i' or '.d') is preserved.
246 246
247 247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 248 encoding was used.
249 249 '''
250 250 path = encodedir(path)
251 251 ef = _encodefname(path).split('/')
252 252 res = '/'.join(_auxencode(ef, dotencode))
253 253 if len(res) > _maxstorepathlen:
254 254 res = _hashencode(path, dotencode)
255 255 return res
256 256
257 257 def _pathencode(path):
258 258 if len(path) > _maxstorepathlen:
259 259 return None
260 260 ef = _encodefname(encodedir(path)).split('/')
261 261 res = '/'.join(_auxencode(ef, True))
262 262 if len(res) > _maxstorepathlen:
263 263 return None
264 264 return res
265 265
266 266 _pathencode = getattr(parsers, 'pathencode', _pathencode)
267 267
268 268 def _dothybridencode(f):
269 269 ef = _pathencode(f)
270 270 if ef is None:
271 271 return _hashencode(encodedir(f), True)
272 272 return ef
273 273
274 274 def _plainhybridencode(f):
275 275 return _hybridencode(f, False)
276 276
277 277 def _calcmode(vfs):
278 278 try:
279 279 # files in .hg/ will be created using this mode
280 280 mode = vfs.stat().st_mode
281 281 # avoid some useless chmods
282 282 if (0777 & ~util.umask) == (0777 & mode):
283 283 mode = None
284 284 except OSError:
285 285 mode = None
286 286 return mode
287 287
288 288 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
289 289 ' phaseroots obsstore')
290 290
291 291 class basicstore(object):
292 292 '''base class for local repository stores'''
293 293 def __init__(self, path, vfstype):
294 294 vfs = vfstype(path)
295 295 self.path = vfs.base
296 296 self.createmode = _calcmode(vfs)
297 297 vfs.createmode = self.createmode
298 298 self.rawvfs = vfs
299 299 self.vfs = scmutil.filtervfs(vfs, encodedir)
300 300 self.opener = self.vfs
301 301
302 302 def join(self, f):
303 303 return self.path + '/' + encodedir(f)
304 304
305 305 def _walk(self, relpath, recurse):
306 306 '''yields (unencoded, encoded, size)'''
307 307 path = self.path
308 308 if relpath:
309 309 path += '/' + relpath
310 310 striplen = len(self.path) + 1
311 311 l = []
312 312 if self.rawvfs.isdir(path):
313 313 visit = [path]
314 314 readdir = self.rawvfs.readdir
315 315 while visit:
316 316 p = visit.pop()
317 317 for f, kind, st in readdir(p, stat=True):
318 318 fp = p + '/' + f
319 319 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
320 320 n = util.pconvert(fp[striplen:])
321 321 l.append((decodedir(n), n, st.st_size))
322 322 elif kind == stat.S_IFDIR and recurse:
323 323 visit.append(fp)
324 324 l.sort()
325 325 return l
326 326
327 327 def datafiles(self):
328 328 return self._walk('data', True)
329 329
330 330 def walk(self):
331 331 '''yields (unencoded, encoded, size)'''
332 332 # yield data files first
333 333 for x in self.datafiles():
334 334 yield x
335 335 # yield manifest before changelog
336 336 for x in reversed(self._walk('', False)):
337 337 yield x
338 338
339 339 def copylist(self):
340 340 return ['requires'] + _data.split()
341 341
342 342 def write(self):
343 343 pass
344 344
345 345 def __contains__(self, path):
346 346 '''Checks if the store contains path'''
347 347 path = "/".join(("data", path))
348 348 # file?
349 349 if os.path.exists(self.join(path + ".i")):
350 350 return True
351 351 # dir?
352 352 if not path.endswith("/"):
353 353 path = path + "/"
354 354 return os.path.exists(self.join(path))
355 355
356 356 class encodedstore(basicstore):
357 357 def __init__(self, path, vfstype):
358 358 vfs = vfstype(path + '/store')
359 359 self.path = vfs.base
360 360 self.createmode = _calcmode(vfs)
361 361 vfs.createmode = self.createmode
362 362 self.rawvfs = vfs
363 363 self.vfs = scmutil.filtervfs(vfs, encodefilename)
364 364 self.opener = self.vfs
365 365
366 366 def datafiles(self):
367 367 for a, b, size in self._walk('data', True):
368 368 try:
369 369 a = decodefilename(a)
370 370 except KeyError:
371 371 a = None
372 372 yield a, b, size
373 373
374 374 def join(self, f):
375 375 return self.path + '/' + encodefilename(f)
376 376
377 377 def copylist(self):
378 378 return (['requires', '00changelog.i'] +
379 379 ['store/' + f for f in _data.split()])
380 380
381 381 class fncache(object):
382 382 # the filename used to be partially encoded
383 383 # hence the encodedir/decodedir dance
384 384 def __init__(self, vfs):
385 385 self.vfs = vfs
386 386 self.entries = None
387 387 self._dirty = False
388 388
389 389 def _load(self):
390 390 '''fill the entries from the fncache file'''
391 391 self._dirty = False
392 392 try:
393 393 fp = self.vfs('fncache', mode='rb')
394 394 except IOError:
395 395 # skip nonexistent file
396 396 self.entries = set()
397 397 return
398 398 self.entries = set(decodedir(fp.read()).splitlines())
399 399 if '' in self.entries:
400 400 fp.seek(0)
401 401 for n, line in enumerate(fp):
402 402 if not line.rstrip('\n'):
403 403 t = _('invalid entry in fncache, line %s') % (n + 1)
404 404 raise util.Abort(t)
405 405 fp.close()
406 406
407 407 def _write(self, files, atomictemp):
408 408 fp = self.vfs('fncache', mode='wb', atomictemp=atomictemp)
409 409 if files:
410 410 fp.write(encodedir('\n'.join(files) + '\n'))
411 411 fp.close()
412 412 self._dirty = False
413 413
414 414 def rewrite(self, files):
415 415 self._write(files, False)
416 416 self.entries = set(files)
417 417
418 418 def write(self):
419 419 if self._dirty:
420 420 self._write(self.entries, True)
421 421
422 422 def add(self, fn):
423 423 if self.entries is None:
424 424 self._load()
425 425 if fn not in self.entries:
426 426 self._dirty = True
427 427 self.entries.add(fn)
428 428
429 429 def __contains__(self, fn):
430 430 if self.entries is None:
431 431 self._load()
432 432 return fn in self.entries
433 433
434 434 def __iter__(self):
435 435 if self.entries is None:
436 436 self._load()
437 437 return iter(self.entries)
438 438
439 439 class _fncachevfs(scmutil.abstractvfs):
440 440 def __init__(self, vfs, fnc, encode):
441 441 self.vfs = vfs
442 442 self.fncache = fnc
443 443 self.encode = encode
444 444
445 445 def _getmustaudit(self):
446 446 return self.vfs.mustaudit
447 447
448 448 def _setmustaudit(self, onoff):
449 449 self.vfs.mustaudit = onoff
450 450
451 451 mustaudit = property(_getmustaudit, _setmustaudit)
452 452
453 453 def __call__(self, path, mode='r', *args, **kw):
454 454 if mode not in ('r', 'rb') and path.startswith('data/'):
455 455 self.fncache.add(path)
456 456 return self.vfs(self.encode(path), mode, *args, **kw)
457 457
458 458 def join(self, path):
459 459 if path:
460 460 return self.vfs.join(self.encode(path))
461 461 else:
462 462 return self.vfs.join(path)
463 463
464 464 class fncachestore(basicstore):
465 465 def __init__(self, path, vfstype, dotencode):
466 466 if dotencode:
467 467 encode = _dothybridencode
468 468 else:
469 469 encode = _plainhybridencode
470 470 self.encode = encode
471 471 vfs = vfstype(path + '/store')
472 472 self.path = vfs.base
473 473 self.pathsep = self.path + '/'
474 474 self.createmode = _calcmode(vfs)
475 475 vfs.createmode = self.createmode
476 476 self.rawvfs = vfs
477 477 fnc = fncache(vfs)
478 478 self.fncache = fnc
479 479 self.vfs = _fncachevfs(vfs, fnc, encode)
480 480 self.opener = self.vfs
481 481
482 482 def join(self, f):
483 483 return self.pathsep + self.encode(f)
484 484
485 485 def getsize(self, path):
486 486 return self.rawvfs.stat(path).st_size
487 487
488 488 def datafiles(self):
489 489 rewrite = False
490 490 existing = []
491 491 for f in sorted(self.fncache):
492 492 ef = self.encode(f)
493 493 try:
494 494 yield f, ef, self.getsize(ef)
495 495 existing.append(f)
496 496 except OSError, err:
497 497 if err.errno != errno.ENOENT:
498 498 raise
499 499 # nonexistent entry
500 500 rewrite = True
501 501 if rewrite:
502 502 # rewrite fncache to remove nonexistent entries
503 503 # (may be caused by rollback / strip)
504 504 self.fncache.rewrite(existing)
505 505
506 506 def copylist(self):
507 507 d = ('data dh fncache phaseroots obsstore'
508 508 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
509 509 return (['requires', '00changelog.i'] +
510 510 ['store/' + f for f in d.split()])
511 511
512 512 def write(self):
513 513 self.fncache.write()
514 514
515 515 def _exists(self, f):
516 516 ef = self.encode(f)
517 517 try:
518 518 self.getsize(ef)
519 519 return True
520 520 except OSError, err:
521 521 if err.errno != errno.ENOENT:
522 522 raise
523 523 # nonexistent entry
524 524 return False
525 525
526 526 def __contains__(self, path):
527 527 '''Checks if the store contains path'''
528 528 path = "/".join(("data", path))
529 529 # check for files (exact match)
530 if path + '.i' in self.fncache:
530 e = path + '.i'
531 if e in self.fncache and self._exists(e):
531 532 return True
532 533 # now check for directories (prefix match)
533 534 if not path.endswith('/'):
534 535 path += '/'
535 536 for e in self.fncache:
536 if e.startswith(path):
537 if e.startswith(path) and self._exists(e):
537 538 return True
538 539 return False
539 540
540 541 def store(requirements, path, vfstype):
541 542 if 'store' in requirements:
542 543 if 'fncache' in requirements:
543 544 return fncachestore(path, vfstype, 'dotencode' in requirements)
544 545 return encodedstore(path, vfstype)
545 546 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now