##// END OF EJS Templates
store: assert the fncache have been loaded if dirty...
Boris Feld -
r38718:89d93dd1 default
parent child Browse files
Show More
@@ -1,592 +1,593 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 node,
19 19 policy,
20 20 pycompat,
21 21 util,
22 22 vfs as vfsmod,
23 23 )
24 24
25 25 parsers = policy.importmod(r'parsers')
26 26
27 27 # This avoids a collision between a file named foo and a dir named
28 28 # foo.i or foo.d
29 29 def _encodedir(path):
30 30 '''
31 31 >>> _encodedir(b'data/foo.i')
32 32 'data/foo.i'
33 33 >>> _encodedir(b'data/foo.i/bla.i')
34 34 'data/foo.i.hg/bla.i'
35 35 >>> _encodedir(b'data/foo.i.hg/bla.i')
36 36 'data/foo.i.hg.hg/bla.i'
37 37 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
38 38 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
39 39 '''
40 40 return (path
41 41 .replace(".hg/", ".hg.hg/")
42 42 .replace(".i/", ".i.hg/")
43 43 .replace(".d/", ".d.hg/"))
44 44
45 45 encodedir = getattr(parsers, 'encodedir', _encodedir)
46 46
47 47 def decodedir(path):
48 48 '''
49 49 >>> decodedir(b'data/foo.i')
50 50 'data/foo.i'
51 51 >>> decodedir(b'data/foo.i.hg/bla.i')
52 52 'data/foo.i/bla.i'
53 53 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
54 54 'data/foo.i.hg/bla.i'
55 55 '''
56 56 if ".hg/" not in path:
57 57 return path
58 58 return (path
59 59 .replace(".d.hg/", ".d/")
60 60 .replace(".i.hg/", ".i/")
61 61 .replace(".hg.hg/", ".hg/"))
62 62
63 63 def _reserved():
64 64 ''' characters that are problematic for filesystems
65 65
66 66 * ascii escapes (0..31)
67 67 * ascii hi (126..255)
68 68 * windows specials
69 69
70 70 these characters will be escaped by encodefunctions
71 71 '''
72 72 winreserved = [ord(x) for x in u'\\:*?"<>|']
73 73 for x in range(32):
74 74 yield x
75 75 for x in range(126, 256):
76 76 yield x
77 77 for x in winreserved:
78 78 yield x
79 79
80 80 def _buildencodefun():
81 81 '''
82 82 >>> enc, dec = _buildencodefun()
83 83
84 84 >>> enc(b'nothing/special.txt')
85 85 'nothing/special.txt'
86 86 >>> dec(b'nothing/special.txt')
87 87 'nothing/special.txt'
88 88
89 89 >>> enc(b'HELLO')
90 90 '_h_e_l_l_o'
91 91 >>> dec(b'_h_e_l_l_o')
92 92 'HELLO'
93 93
94 94 >>> enc(b'hello:world?')
95 95 'hello~3aworld~3f'
96 96 >>> dec(b'hello~3aworld~3f')
97 97 'hello:world?'
98 98
99 99 >>> enc(b'the\\x07quick\\xADshot')
100 100 'the~07quick~adshot'
101 101 >>> dec(b'the~07quick~adshot')
102 102 'the\\x07quick\\xadshot'
103 103 '''
104 104 e = '_'
105 105 xchr = pycompat.bytechr
106 106 asciistr = list(map(xchr, range(127)))
107 107 capitals = list(range(ord("A"), ord("Z") + 1))
108 108
109 109 cmap = dict((x, x) for x in asciistr)
110 110 for x in _reserved():
111 111 cmap[xchr(x)] = "~%02x" % x
112 112 for x in capitals + [ord(e)]:
113 113 cmap[xchr(x)] = e + xchr(x).lower()
114 114
115 115 dmap = {}
116 116 for k, v in cmap.iteritems():
117 117 dmap[v] = k
118 118 def decode(s):
119 119 i = 0
120 120 while i < len(s):
121 121 for l in xrange(1, 4):
122 122 try:
123 123 yield dmap[s[i:i + l]]
124 124 i += l
125 125 break
126 126 except KeyError:
127 127 pass
128 128 else:
129 129 raise KeyError
130 130 return (lambda s: ''.join([cmap[s[c:c + 1]] for c in xrange(len(s))]),
131 131 lambda s: ''.join(list(decode(s))))
132 132
133 133 _encodefname, _decodefname = _buildencodefun()
134 134
135 135 def encodefilename(s):
136 136 '''
137 137 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
138 138 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
139 139 '''
140 140 return _encodefname(encodedir(s))
141 141
142 142 def decodefilename(s):
143 143 '''
144 144 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
145 145 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
146 146 '''
147 147 return decodedir(_decodefname(s))
148 148
149 149 def _buildlowerencodefun():
150 150 '''
151 151 >>> f = _buildlowerencodefun()
152 152 >>> f(b'nothing/special.txt')
153 153 'nothing/special.txt'
154 154 >>> f(b'HELLO')
155 155 'hello'
156 156 >>> f(b'hello:world?')
157 157 'hello~3aworld~3f'
158 158 >>> f(b'the\\x07quick\\xADshot')
159 159 'the~07quick~adshot'
160 160 '''
161 161 xchr = pycompat.bytechr
162 162 cmap = dict([(xchr(x), xchr(x)) for x in xrange(127)])
163 163 for x in _reserved():
164 164 cmap[xchr(x)] = "~%02x" % x
165 165 for x in range(ord("A"), ord("Z") + 1):
166 166 cmap[xchr(x)] = xchr(x).lower()
167 167 def lowerencode(s):
168 168 return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
169 169 return lowerencode
170 170
171 171 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
172 172
173 173 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
174 174 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
175 175 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
176 176 def _auxencode(path, dotencode):
177 177 '''
178 178 Encodes filenames containing names reserved by Windows or which end in
179 179 period or space. Does not touch other single reserved characters c.
180 180 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
181 181 Additionally encodes space or period at the beginning, if dotencode is
182 182 True. Parameter path is assumed to be all lowercase.
183 183 A segment only needs encoding if a reserved name appears as a
184 184 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
185 185 doesn't need encoding.
186 186
187 187 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
188 188 >>> _auxencode(s.split(b'/'), True)
189 189 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
190 190 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
191 191 >>> _auxencode(s.split(b'/'), False)
192 192 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
193 193 >>> _auxencode([b'foo. '], True)
194 194 ['foo.~20']
195 195 >>> _auxencode([b' .foo'], True)
196 196 ['~20.foo']
197 197 '''
198 198 for i, n in enumerate(path):
199 199 if not n:
200 200 continue
201 201 if dotencode and n[0] in '. ':
202 202 n = "~%02x" % ord(n[0:1]) + n[1:]
203 203 path[i] = n
204 204 else:
205 205 l = n.find('.')
206 206 if l == -1:
207 207 l = len(n)
208 208 if ((l == 3 and n[:3] in _winres3) or
209 209 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
210 210 and n[:3] in _winres4)):
211 211 # encode third letter ('aux' -> 'au~78')
212 212 ec = "~%02x" % ord(n[2:3])
213 213 n = n[0:2] + ec + n[3:]
214 214 path[i] = n
215 215 if n[-1] in '. ':
216 216 # encode last period or space ('foo...' -> 'foo..~2e')
217 217 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
218 218 return path
219 219
220 220 _maxstorepathlen = 120
221 221 _dirprefixlen = 8
222 222 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
223 223
224 224 def _hashencode(path, dotencode):
225 225 digest = node.hex(hashlib.sha1(path).digest())
226 226 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
227 227 parts = _auxencode(le, dotencode)
228 228 basename = parts[-1]
229 229 _root, ext = os.path.splitext(basename)
230 230 sdirs = []
231 231 sdirslen = 0
232 232 for p in parts[:-1]:
233 233 d = p[:_dirprefixlen]
234 234 if d[-1] in '. ':
235 235 # Windows can't access dirs ending in period or space
236 236 d = d[:-1] + '_'
237 237 if sdirslen == 0:
238 238 t = len(d)
239 239 else:
240 240 t = sdirslen + 1 + len(d)
241 241 if t > _maxshortdirslen:
242 242 break
243 243 sdirs.append(d)
244 244 sdirslen = t
245 245 dirs = '/'.join(sdirs)
246 246 if len(dirs) > 0:
247 247 dirs += '/'
248 248 res = 'dh/' + dirs + digest + ext
249 249 spaceleft = _maxstorepathlen - len(res)
250 250 if spaceleft > 0:
251 251 filler = basename[:spaceleft]
252 252 res = 'dh/' + dirs + filler + digest + ext
253 253 return res
254 254
255 255 def _hybridencode(path, dotencode):
256 256 '''encodes path with a length limit
257 257
258 258 Encodes all paths that begin with 'data/', according to the following.
259 259
260 260 Default encoding (reversible):
261 261
262 262 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
263 263 characters are encoded as '~xx', where xx is the two digit hex code
264 264 of the character (see encodefilename).
265 265 Relevant path components consisting of Windows reserved filenames are
266 266 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
267 267
268 268 Hashed encoding (not reversible):
269 269
270 270 If the default-encoded path is longer than _maxstorepathlen, a
271 271 non-reversible hybrid hashing of the path is done instead.
272 272 This encoding uses up to _dirprefixlen characters of all directory
273 273 levels of the lowerencoded path, but not more levels than can fit into
274 274 _maxshortdirslen.
275 275 Then follows the filler followed by the sha digest of the full path.
276 276 The filler is the beginning of the basename of the lowerencoded path
277 277 (the basename is everything after the last path separator). The filler
278 278 is as long as possible, filling in characters from the basename until
279 279 the encoded path has _maxstorepathlen characters (or all chars of the
280 280 basename have been taken).
281 281 The extension (e.g. '.i' or '.d') is preserved.
282 282
283 283 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
284 284 encoding was used.
285 285 '''
286 286 path = encodedir(path)
287 287 ef = _encodefname(path).split('/')
288 288 res = '/'.join(_auxencode(ef, dotencode))
289 289 if len(res) > _maxstorepathlen:
290 290 res = _hashencode(path, dotencode)
291 291 return res
292 292
293 293 def _pathencode(path):
294 294 de = encodedir(path)
295 295 if len(path) > _maxstorepathlen:
296 296 return _hashencode(de, True)
297 297 ef = _encodefname(de).split('/')
298 298 res = '/'.join(_auxencode(ef, True))
299 299 if len(res) > _maxstorepathlen:
300 300 return _hashencode(de, True)
301 301 return res
302 302
303 303 _pathencode = getattr(parsers, 'pathencode', _pathencode)
304 304
305 305 def _plainhybridencode(f):
306 306 return _hybridencode(f, False)
307 307
308 308 def _calcmode(vfs):
309 309 try:
310 310 # files in .hg/ will be created using this mode
311 311 mode = vfs.stat().st_mode
312 312 # avoid some useless chmods
313 313 if (0o777 & ~util.umask) == (0o777 & mode):
314 314 mode = None
315 315 except OSError:
316 316 mode = None
317 317 return mode
318 318
319 319 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
320 320 ' phaseroots obsstore')
321 321
322 322 def isrevlog(f, kind, st):
323 323 return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
324 324
325 325 class basicstore(object):
326 326 '''base class for local repository stores'''
327 327 def __init__(self, path, vfstype):
328 328 vfs = vfstype(path)
329 329 self.path = vfs.base
330 330 self.createmode = _calcmode(vfs)
331 331 vfs.createmode = self.createmode
332 332 self.rawvfs = vfs
333 333 self.vfs = vfsmod.filtervfs(vfs, encodedir)
334 334 self.opener = self.vfs
335 335
336 336 def join(self, f):
337 337 return self.path + '/' + encodedir(f)
338 338
339 339 def _walk(self, relpath, recurse, filefilter=isrevlog):
340 340 '''yields (unencoded, encoded, size)'''
341 341 path = self.path
342 342 if relpath:
343 343 path += '/' + relpath
344 344 striplen = len(self.path) + 1
345 345 l = []
346 346 if self.rawvfs.isdir(path):
347 347 visit = [path]
348 348 readdir = self.rawvfs.readdir
349 349 while visit:
350 350 p = visit.pop()
351 351 for f, kind, st in readdir(p, stat=True):
352 352 fp = p + '/' + f
353 353 if filefilter(f, kind, st):
354 354 n = util.pconvert(fp[striplen:])
355 355 l.append((decodedir(n), n, st.st_size))
356 356 elif kind == stat.S_IFDIR and recurse:
357 357 visit.append(fp)
358 358 l.sort()
359 359 return l
360 360
361 361 def datafiles(self):
362 362 return self._walk('data', True) + self._walk('meta', True)
363 363
364 364 def topfiles(self):
365 365 # yield manifest before changelog
366 366 return reversed(self._walk('', False))
367 367
368 368 def walk(self):
369 369 '''yields (unencoded, encoded, size)'''
370 370 # yield data files first
371 371 for x in self.datafiles():
372 372 yield x
373 373 for x in self.topfiles():
374 374 yield x
375 375
376 376 def copylist(self):
377 377 return ['requires'] + _data.split()
378 378
379 379 def write(self, tr):
380 380 pass
381 381
382 382 def invalidatecaches(self):
383 383 pass
384 384
385 385 def markremoved(self, fn):
386 386 pass
387 387
388 388 def __contains__(self, path):
389 389 '''Checks if the store contains path'''
390 390 path = "/".join(("data", path))
391 391 # file?
392 392 if self.vfs.exists(path + ".i"):
393 393 return True
394 394 # dir?
395 395 if not path.endswith("/"):
396 396 path = path + "/"
397 397 return self.vfs.exists(path)
398 398
399 399 class encodedstore(basicstore):
400 400 def __init__(self, path, vfstype):
401 401 vfs = vfstype(path + '/store')
402 402 self.path = vfs.base
403 403 self.createmode = _calcmode(vfs)
404 404 vfs.createmode = self.createmode
405 405 self.rawvfs = vfs
406 406 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
407 407 self.opener = self.vfs
408 408
409 409 def datafiles(self):
410 410 for a, b, size in super(encodedstore, self).datafiles():
411 411 try:
412 412 a = decodefilename(a)
413 413 except KeyError:
414 414 a = None
415 415 yield a, b, size
416 416
417 417 def join(self, f):
418 418 return self.path + '/' + encodefilename(f)
419 419
420 420 def copylist(self):
421 421 return (['requires', '00changelog.i'] +
422 422 ['store/' + f for f in _data.split()])
423 423
424 424 class fncache(object):
425 425 # the filename used to be partially encoded
426 426 # hence the encodedir/decodedir dance
427 427 def __init__(self, vfs):
428 428 self.vfs = vfs
429 429 self.entries = None
430 430 self._dirty = False
431 431
432 432 def _load(self):
433 433 '''fill the entries from the fncache file'''
434 434 self._dirty = False
435 435 try:
436 436 fp = self.vfs('fncache', mode='rb')
437 437 except IOError:
438 438 # skip nonexistent file
439 439 self.entries = set()
440 440 return
441 441 self.entries = set(decodedir(fp.read()).splitlines())
442 442 if '' in self.entries:
443 443 fp.seek(0)
444 444 for n, line in enumerate(util.iterfile(fp)):
445 445 if not line.rstrip('\n'):
446 446 t = _('invalid entry in fncache, line %d') % (n + 1)
447 447 raise error.Abort(t)
448 448 fp.close()
449 449
450 450 def write(self, tr):
451 451 if self._dirty:
452 assert self.entries is not None
452 453 tr.addbackup('fncache')
453 454 fp = self.vfs('fncache', mode='wb', atomictemp=True)
454 455 if self.entries:
455 456 fp.write(encodedir('\n'.join(self.entries) + '\n'))
456 457 fp.close()
457 458 self._dirty = False
458 459
459 460 def add(self, fn):
460 461 if self.entries is None:
461 462 self._load()
462 463 if fn not in self.entries:
463 464 self._dirty = True
464 465 self.entries.add(fn)
465 466
466 467 def remove(self, fn):
467 468 if self.entries is None:
468 469 self._load()
469 470 try:
470 471 self.entries.remove(fn)
471 472 self._dirty = True
472 473 except KeyError:
473 474 pass
474 475
475 476 def __contains__(self, fn):
476 477 if self.entries is None:
477 478 self._load()
478 479 return fn in self.entries
479 480
480 481 def __iter__(self):
481 482 if self.entries is None:
482 483 self._load()
483 484 return iter(self.entries)
484 485
485 486 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
486 487 def __init__(self, vfs, fnc, encode):
487 488 vfsmod.proxyvfs.__init__(self, vfs)
488 489 self.fncache = fnc
489 490 self.encode = encode
490 491
491 492 def __call__(self, path, mode='r', *args, **kw):
492 493 encoded = self.encode(path)
493 494 if mode not in ('r', 'rb') and (path.startswith('data/') or
494 495 path.startswith('meta/')):
495 496 # do not trigger a fncache load when adding a file that already is
496 497 # known to exist.
497 498 notload = self.fncache.entries is None and self.vfs.exists(encoded)
498 499 if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
499 500 # when appending to an existing file, if the file has size zero,
500 501 # it should be considered as missing. Such zero-size files are
501 502 # the result of truncation when a transaction is aborted.
502 503 notload = False
503 504 if not notload:
504 505 self.fncache.add(path)
505 506 return self.vfs(encoded, mode, *args, **kw)
506 507
507 508 def join(self, path):
508 509 if path:
509 510 return self.vfs.join(self.encode(path))
510 511 else:
511 512 return self.vfs.join(path)
512 513
513 514 class fncachestore(basicstore):
514 515 def __init__(self, path, vfstype, dotencode):
515 516 if dotencode:
516 517 encode = _pathencode
517 518 else:
518 519 encode = _plainhybridencode
519 520 self.encode = encode
520 521 vfs = vfstype(path + '/store')
521 522 self.path = vfs.base
522 523 self.pathsep = self.path + '/'
523 524 self.createmode = _calcmode(vfs)
524 525 vfs.createmode = self.createmode
525 526 self.rawvfs = vfs
526 527 fnc = fncache(vfs)
527 528 self.fncache = fnc
528 529 self.vfs = _fncachevfs(vfs, fnc, encode)
529 530 self.opener = self.vfs
530 531
531 532 def join(self, f):
532 533 return self.pathsep + self.encode(f)
533 534
534 535 def getsize(self, path):
535 536 return self.rawvfs.stat(path).st_size
536 537
537 538 def datafiles(self):
538 539 for f in sorted(self.fncache):
539 540 ef = self.encode(f)
540 541 try:
541 542 yield f, ef, self.getsize(ef)
542 543 except OSError as err:
543 544 if err.errno != errno.ENOENT:
544 545 raise
545 546
546 547 def copylist(self):
547 548 d = ('data meta dh fncache phaseroots obsstore'
548 549 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
549 550 return (['requires', '00changelog.i'] +
550 551 ['store/' + f for f in d.split()])
551 552
552 553 def write(self, tr):
553 554 self.fncache.write(tr)
554 555
555 556 def invalidatecaches(self):
556 557 self.fncache.entries = None
557 558
558 559 def markremoved(self, fn):
559 560 self.fncache.remove(fn)
560 561
561 562 def _exists(self, f):
562 563 ef = self.encode(f)
563 564 try:
564 565 self.getsize(ef)
565 566 return True
566 567 except OSError as err:
567 568 if err.errno != errno.ENOENT:
568 569 raise
569 570 # nonexistent entry
570 571 return False
571 572
572 573 def __contains__(self, path):
573 574 '''Checks if the store contains path'''
574 575 path = "/".join(("data", path))
575 576 # check for files (exact match)
576 577 e = path + '.i'
577 578 if e in self.fncache and self._exists(e):
578 579 return True
579 580 # now check for directories (prefix match)
580 581 if not path.endswith('/'):
581 582 path += '/'
582 583 for e in self.fncache:
583 584 if e.startswith(path) and self._exists(e):
584 585 return True
585 586 return False
586 587
587 588 def store(requirements, path, vfstype):
588 589 if 'store' in requirements:
589 590 if 'fncache' in requirements:
590 591 return fncachestore(path, vfstype, 'dotencode' in requirements)
591 592 return encodedstore(path, vfstype)
592 593 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now