##// END OF EJS Templates
store: write fncache only once if there are both adds and removes...
Pulkit Goyal -
r40779:df8ed31a default
parent child Browse files
Show More
@@ -1,627 +1,629 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 node,
19 19 policy,
20 20 pycompat,
21 21 util,
22 22 vfs as vfsmod,
23 23 )
24 24
25 25 parsers = policy.importmod(r'parsers')
26 26
27 27 def _matchtrackedpath(path, matcher):
28 28 """parses a fncache entry and returns whether the entry is tracking a path
29 29 matched by matcher or not.
30 30
31 31 If matcher is None, returns True"""
32 32
33 33 if matcher is None:
34 34 return True
35 35 path = decodedir(path)
36 36 if path.startswith('data/'):
37 37 return matcher(path[len('data/'):-len('.i')])
38 38 elif path.startswith('meta/'):
39 39 return matcher.visitdir(path[len('meta/'):-len('/00manifest.i')] or '.')
40 40
41 41 raise error.ProgrammingError("cannot decode path %s" % path)
42 42
43 43 # This avoids a collision between a file named foo and a dir named
44 44 # foo.i or foo.d
45 45 def _encodedir(path):
46 46 '''
47 47 >>> _encodedir(b'data/foo.i')
48 48 'data/foo.i'
49 49 >>> _encodedir(b'data/foo.i/bla.i')
50 50 'data/foo.i.hg/bla.i'
51 51 >>> _encodedir(b'data/foo.i.hg/bla.i')
52 52 'data/foo.i.hg.hg/bla.i'
53 53 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
54 54 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
55 55 '''
56 56 return (path
57 57 .replace(".hg/", ".hg.hg/")
58 58 .replace(".i/", ".i.hg/")
59 59 .replace(".d/", ".d.hg/"))
60 60
61 61 encodedir = getattr(parsers, 'encodedir', _encodedir)
62 62
63 63 def decodedir(path):
64 64 '''
65 65 >>> decodedir(b'data/foo.i')
66 66 'data/foo.i'
67 67 >>> decodedir(b'data/foo.i.hg/bla.i')
68 68 'data/foo.i/bla.i'
69 69 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
70 70 'data/foo.i.hg/bla.i'
71 71 '''
72 72 if ".hg/" not in path:
73 73 return path
74 74 return (path
75 75 .replace(".d.hg/", ".d/")
76 76 .replace(".i.hg/", ".i/")
77 77 .replace(".hg.hg/", ".hg/"))
78 78
79 79 def _reserved():
80 80 ''' characters that are problematic for filesystems
81 81
82 82 * ascii escapes (0..31)
83 83 * ascii hi (126..255)
84 84 * windows specials
85 85
86 86 these characters will be escaped by encodefunctions
87 87 '''
88 88 winreserved = [ord(x) for x in u'\\:*?"<>|']
89 89 for x in range(32):
90 90 yield x
91 91 for x in range(126, 256):
92 92 yield x
93 93 for x in winreserved:
94 94 yield x
95 95
96 96 def _buildencodefun():
97 97 '''
98 98 >>> enc, dec = _buildencodefun()
99 99
100 100 >>> enc(b'nothing/special.txt')
101 101 'nothing/special.txt'
102 102 >>> dec(b'nothing/special.txt')
103 103 'nothing/special.txt'
104 104
105 105 >>> enc(b'HELLO')
106 106 '_h_e_l_l_o'
107 107 >>> dec(b'_h_e_l_l_o')
108 108 'HELLO'
109 109
110 110 >>> enc(b'hello:world?')
111 111 'hello~3aworld~3f'
112 112 >>> dec(b'hello~3aworld~3f')
113 113 'hello:world?'
114 114
115 115 >>> enc(b'the\\x07quick\\xADshot')
116 116 'the~07quick~adshot'
117 117 >>> dec(b'the~07quick~adshot')
118 118 'the\\x07quick\\xadshot'
119 119 '''
120 120 e = '_'
121 121 xchr = pycompat.bytechr
122 122 asciistr = list(map(xchr, range(127)))
123 123 capitals = list(range(ord("A"), ord("Z") + 1))
124 124
125 125 cmap = dict((x, x) for x in asciistr)
126 126 for x in _reserved():
127 127 cmap[xchr(x)] = "~%02x" % x
128 128 for x in capitals + [ord(e)]:
129 129 cmap[xchr(x)] = e + xchr(x).lower()
130 130
131 131 dmap = {}
132 132 for k, v in cmap.iteritems():
133 133 dmap[v] = k
134 134 def decode(s):
135 135 i = 0
136 136 while i < len(s):
137 137 for l in pycompat.xrange(1, 4):
138 138 try:
139 139 yield dmap[s[i:i + l]]
140 140 i += l
141 141 break
142 142 except KeyError:
143 143 pass
144 144 else:
145 145 raise KeyError
146 146 return (lambda s: ''.join([cmap[s[c:c + 1]]
147 147 for c in pycompat.xrange(len(s))]),
148 148 lambda s: ''.join(list(decode(s))))
149 149
150 150 _encodefname, _decodefname = _buildencodefun()
151 151
152 152 def encodefilename(s):
153 153 '''
154 154 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
155 155 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
156 156 '''
157 157 return _encodefname(encodedir(s))
158 158
159 159 def decodefilename(s):
160 160 '''
161 161 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
162 162 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
163 163 '''
164 164 return decodedir(_decodefname(s))
165 165
166 166 def _buildlowerencodefun():
167 167 '''
168 168 >>> f = _buildlowerencodefun()
169 169 >>> f(b'nothing/special.txt')
170 170 'nothing/special.txt'
171 171 >>> f(b'HELLO')
172 172 'hello'
173 173 >>> f(b'hello:world?')
174 174 'hello~3aworld~3f'
175 175 >>> f(b'the\\x07quick\\xADshot')
176 176 'the~07quick~adshot'
177 177 '''
178 178 xchr = pycompat.bytechr
179 179 cmap = dict([(xchr(x), xchr(x)) for x in pycompat.xrange(127)])
180 180 for x in _reserved():
181 181 cmap[xchr(x)] = "~%02x" % x
182 182 for x in range(ord("A"), ord("Z") + 1):
183 183 cmap[xchr(x)] = xchr(x).lower()
184 184 def lowerencode(s):
185 185 return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
186 186 return lowerencode
187 187
188 188 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
189 189
190 190 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
191 191 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
192 192 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
193 193 def _auxencode(path, dotencode):
194 194 '''
195 195 Encodes filenames containing names reserved by Windows or which end in
196 196 period or space. Does not touch other single reserved characters c.
197 197 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
198 198 Additionally encodes space or period at the beginning, if dotencode is
199 199 True. Parameter path is assumed to be all lowercase.
200 200 A segment only needs encoding if a reserved name appears as a
201 201 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
202 202 doesn't need encoding.
203 203
204 204 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
205 205 >>> _auxencode(s.split(b'/'), True)
206 206 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
207 207 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
208 208 >>> _auxencode(s.split(b'/'), False)
209 209 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
210 210 >>> _auxencode([b'foo. '], True)
211 211 ['foo.~20']
212 212 >>> _auxencode([b' .foo'], True)
213 213 ['~20.foo']
214 214 '''
215 215 for i, n in enumerate(path):
216 216 if not n:
217 217 continue
218 218 if dotencode and n[0] in '. ':
219 219 n = "~%02x" % ord(n[0:1]) + n[1:]
220 220 path[i] = n
221 221 else:
222 222 l = n.find('.')
223 223 if l == -1:
224 224 l = len(n)
225 225 if ((l == 3 and n[:3] in _winres3) or
226 226 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
227 227 and n[:3] in _winres4)):
228 228 # encode third letter ('aux' -> 'au~78')
229 229 ec = "~%02x" % ord(n[2:3])
230 230 n = n[0:2] + ec + n[3:]
231 231 path[i] = n
232 232 if n[-1] in '. ':
233 233 # encode last period or space ('foo...' -> 'foo..~2e')
234 234 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
235 235 return path
236 236
237 237 _maxstorepathlen = 120
238 238 _dirprefixlen = 8
239 239 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
240 240
241 241 def _hashencode(path, dotencode):
242 242 digest = node.hex(hashlib.sha1(path).digest())
243 243 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
244 244 parts = _auxencode(le, dotencode)
245 245 basename = parts[-1]
246 246 _root, ext = os.path.splitext(basename)
247 247 sdirs = []
248 248 sdirslen = 0
249 249 for p in parts[:-1]:
250 250 d = p[:_dirprefixlen]
251 251 if d[-1] in '. ':
252 252 # Windows can't access dirs ending in period or space
253 253 d = d[:-1] + '_'
254 254 if sdirslen == 0:
255 255 t = len(d)
256 256 else:
257 257 t = sdirslen + 1 + len(d)
258 258 if t > _maxshortdirslen:
259 259 break
260 260 sdirs.append(d)
261 261 sdirslen = t
262 262 dirs = '/'.join(sdirs)
263 263 if len(dirs) > 0:
264 264 dirs += '/'
265 265 res = 'dh/' + dirs + digest + ext
266 266 spaceleft = _maxstorepathlen - len(res)
267 267 if spaceleft > 0:
268 268 filler = basename[:spaceleft]
269 269 res = 'dh/' + dirs + filler + digest + ext
270 270 return res
271 271
272 272 def _hybridencode(path, dotencode):
273 273 '''encodes path with a length limit
274 274
275 275 Encodes all paths that begin with 'data/', according to the following.
276 276
277 277 Default encoding (reversible):
278 278
279 279 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
280 280 characters are encoded as '~xx', where xx is the two digit hex code
281 281 of the character (see encodefilename).
282 282 Relevant path components consisting of Windows reserved filenames are
283 283 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
284 284
285 285 Hashed encoding (not reversible):
286 286
287 287 If the default-encoded path is longer than _maxstorepathlen, a
288 288 non-reversible hybrid hashing of the path is done instead.
289 289 This encoding uses up to _dirprefixlen characters of all directory
290 290 levels of the lowerencoded path, but not more levels than can fit into
291 291 _maxshortdirslen.
292 292 Then follows the filler followed by the sha digest of the full path.
293 293 The filler is the beginning of the basename of the lowerencoded path
294 294 (the basename is everything after the last path separator). The filler
295 295 is as long as possible, filling in characters from the basename until
296 296 the encoded path has _maxstorepathlen characters (or all chars of the
297 297 basename have been taken).
298 298 The extension (e.g. '.i' or '.d') is preserved.
299 299
300 300 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
301 301 encoding was used.
302 302 '''
303 303 path = encodedir(path)
304 304 ef = _encodefname(path).split('/')
305 305 res = '/'.join(_auxencode(ef, dotencode))
306 306 if len(res) > _maxstorepathlen:
307 307 res = _hashencode(path, dotencode)
308 308 return res
309 309
310 310 def _pathencode(path):
311 311 de = encodedir(path)
312 312 if len(path) > _maxstorepathlen:
313 313 return _hashencode(de, True)
314 314 ef = _encodefname(de).split('/')
315 315 res = '/'.join(_auxencode(ef, True))
316 316 if len(res) > _maxstorepathlen:
317 317 return _hashencode(de, True)
318 318 return res
319 319
320 320 _pathencode = getattr(parsers, 'pathencode', _pathencode)
321 321
322 322 def _plainhybridencode(f):
323 323 return _hybridencode(f, False)
324 324
325 325 def _calcmode(vfs):
326 326 try:
327 327 # files in .hg/ will be created using this mode
328 328 mode = vfs.stat().st_mode
329 329 # avoid some useless chmods
330 330 if (0o777 & ~util.umask) == (0o777 & mode):
331 331 mode = None
332 332 except OSError:
333 333 mode = None
334 334 return mode
335 335
336 336 _data = ('narrowspec data meta 00manifest.d 00manifest.i'
337 337 ' 00changelog.d 00changelog.i phaseroots obsstore')
338 338
339 339 def isrevlog(f, kind, st):
340 340 return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
341 341
342 342 class basicstore(object):
343 343 '''base class for local repository stores'''
344 344 def __init__(self, path, vfstype):
345 345 vfs = vfstype(path)
346 346 self.path = vfs.base
347 347 self.createmode = _calcmode(vfs)
348 348 vfs.createmode = self.createmode
349 349 self.rawvfs = vfs
350 350 self.vfs = vfsmod.filtervfs(vfs, encodedir)
351 351 self.opener = self.vfs
352 352
353 353 def join(self, f):
354 354 return self.path + '/' + encodedir(f)
355 355
356 356 def _walk(self, relpath, recurse, filefilter=isrevlog):
357 357 '''yields (unencoded, encoded, size)'''
358 358 path = self.path
359 359 if relpath:
360 360 path += '/' + relpath
361 361 striplen = len(self.path) + 1
362 362 l = []
363 363 if self.rawvfs.isdir(path):
364 364 visit = [path]
365 365 readdir = self.rawvfs.readdir
366 366 while visit:
367 367 p = visit.pop()
368 368 for f, kind, st in readdir(p, stat=True):
369 369 fp = p + '/' + f
370 370 if filefilter(f, kind, st):
371 371 n = util.pconvert(fp[striplen:])
372 372 l.append((decodedir(n), n, st.st_size))
373 373 elif kind == stat.S_IFDIR and recurse:
374 374 visit.append(fp)
375 375 l.sort()
376 376 return l
377 377
378 378 def datafiles(self, matcher=None):
379 379 return self._walk('data', True) + self._walk('meta', True)
380 380
381 381 def topfiles(self):
382 382 # yield manifest before changelog
383 383 return reversed(self._walk('', False))
384 384
385 385 def walk(self, matcher=None):
386 386 '''yields (unencoded, encoded, size)
387 387
388 388 if a matcher is passed, storage files of only those tracked paths
389 389 are passed with matches the matcher
390 390 '''
391 391 # yield data files first
392 392 for x in self.datafiles(matcher):
393 393 yield x
394 394 for x in self.topfiles():
395 395 yield x
396 396
397 397 def copylist(self):
398 398 return ['requires'] + _data.split()
399 399
400 400 def write(self, tr):
401 401 pass
402 402
403 403 def invalidatecaches(self):
404 404 pass
405 405
406 406 def markremoved(self, fn):
407 407 pass
408 408
409 409 def __contains__(self, path):
410 410 '''Checks if the store contains path'''
411 411 path = "/".join(("data", path))
412 412 # file?
413 413 if self.vfs.exists(path + ".i"):
414 414 return True
415 415 # dir?
416 416 if not path.endswith("/"):
417 417 path = path + "/"
418 418 return self.vfs.exists(path)
419 419
420 420 class encodedstore(basicstore):
421 421 def __init__(self, path, vfstype):
422 422 vfs = vfstype(path + '/store')
423 423 self.path = vfs.base
424 424 self.createmode = _calcmode(vfs)
425 425 vfs.createmode = self.createmode
426 426 self.rawvfs = vfs
427 427 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
428 428 self.opener = self.vfs
429 429
430 430 def datafiles(self, matcher=None):
431 431 for a, b, size in super(encodedstore, self).datafiles():
432 432 try:
433 433 a = decodefilename(a)
434 434 except KeyError:
435 435 a = None
436 436 if a is not None and not _matchtrackedpath(a, matcher):
437 437 continue
438 438 yield a, b, size
439 439
440 440 def join(self, f):
441 441 return self.path + '/' + encodefilename(f)
442 442
443 443 def copylist(self):
444 444 return (['requires', '00changelog.i'] +
445 445 ['store/' + f for f in _data.split()])
446 446
447 447 class fncache(object):
448 448 # the filename used to be partially encoded
449 449 # hence the encodedir/decodedir dance
450 450 def __init__(self, vfs):
451 451 self.vfs = vfs
452 452 self.entries = None
453 453 self._dirty = False
454 454 # set of new additions to fncache
455 455 self.addls = set()
456 456
457 457 def _load(self):
458 458 '''fill the entries from the fncache file'''
459 459 self._dirty = False
460 460 try:
461 461 fp = self.vfs('fncache', mode='rb')
462 462 except IOError:
463 463 # skip nonexistent file
464 464 self.entries = set()
465 465 return
466 466 self.entries = set(decodedir(fp.read()).splitlines())
467 467 if '' in self.entries:
468 468 fp.seek(0)
469 469 for n, line in enumerate(util.iterfile(fp)):
470 470 if not line.rstrip('\n'):
471 471 t = _('invalid entry in fncache, line %d') % (n + 1)
472 472 raise error.Abort(t)
473 473 fp.close()
474 474
475 475 def write(self, tr):
476 476 if self._dirty:
477 477 assert self.entries is not None
478 self.entries = self.entries | self.addls
479 self.addls = set()
478 480 tr.addbackup('fncache')
479 481 fp = self.vfs('fncache', mode='wb', atomictemp=True)
480 482 if self.entries:
481 483 fp.write(encodedir('\n'.join(self.entries) + '\n'))
482 484 fp.close()
483 485 self._dirty = False
484 486 if self.addls:
485 487 # if we have just new entries, let's append them to the fncache
486 488 tr.addbackup('fncache')
487 489 fp = self.vfs('fncache', mode='ab', atomictemp=True)
488 490 if self.addls:
489 491 fp.write(encodedir('\n'.join(self.addls) + '\n'))
490 492 fp.close()
491 493 self.entries = None
492 494 self.addls = set()
493 495
494 496 def add(self, fn):
495 497 if self.entries is None:
496 498 self._load()
497 499 if fn not in self.entries:
498 500 self.addls.add(fn)
499 501
500 502 def remove(self, fn):
501 503 if self.entries is None:
502 504 self._load()
503 505 if fn in self.addls:
504 506 self.addls.remove(fn)
505 507 return
506 508 try:
507 509 self.entries.remove(fn)
508 510 self._dirty = True
509 511 except KeyError:
510 512 pass
511 513
512 514 def __contains__(self, fn):
513 515 if fn in self.addls:
514 516 return True
515 517 if self.entries is None:
516 518 self._load()
517 519 return fn in self.entries
518 520
519 521 def __iter__(self):
520 522 if self.entries is None:
521 523 self._load()
522 524 return iter(self.entries | self.addls)
523 525
524 526 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
525 527 def __init__(self, vfs, fnc, encode):
526 528 vfsmod.proxyvfs.__init__(self, vfs)
527 529 self.fncache = fnc
528 530 self.encode = encode
529 531
530 532 def __call__(self, path, mode='r', *args, **kw):
531 533 encoded = self.encode(path)
532 534 if mode not in ('r', 'rb') and (path.startswith('data/') or
533 535 path.startswith('meta/')):
534 536 # do not trigger a fncache load when adding a file that already is
535 537 # known to exist.
536 538 notload = self.fncache.entries is None and self.vfs.exists(encoded)
537 539 if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
538 540 # when appending to an existing file, if the file has size zero,
539 541 # it should be considered as missing. Such zero-size files are
540 542 # the result of truncation when a transaction is aborted.
541 543 notload = False
542 544 if not notload:
543 545 self.fncache.add(path)
544 546 return self.vfs(encoded, mode, *args, **kw)
545 547
546 548 def join(self, path):
547 549 if path:
548 550 return self.vfs.join(self.encode(path))
549 551 else:
550 552 return self.vfs.join(path)
551 553
552 554 class fncachestore(basicstore):
553 555 def __init__(self, path, vfstype, dotencode):
554 556 if dotencode:
555 557 encode = _pathencode
556 558 else:
557 559 encode = _plainhybridencode
558 560 self.encode = encode
559 561 vfs = vfstype(path + '/store')
560 562 self.path = vfs.base
561 563 self.pathsep = self.path + '/'
562 564 self.createmode = _calcmode(vfs)
563 565 vfs.createmode = self.createmode
564 566 self.rawvfs = vfs
565 567 fnc = fncache(vfs)
566 568 self.fncache = fnc
567 569 self.vfs = _fncachevfs(vfs, fnc, encode)
568 570 self.opener = self.vfs
569 571
570 572 def join(self, f):
571 573 return self.pathsep + self.encode(f)
572 574
573 575 def getsize(self, path):
574 576 return self.rawvfs.stat(path).st_size
575 577
576 578 def datafiles(self, matcher=None):
577 579 for f in sorted(self.fncache):
578 580 if not _matchtrackedpath(f, matcher):
579 581 continue
580 582 ef = self.encode(f)
581 583 try:
582 584 yield f, ef, self.getsize(ef)
583 585 except OSError as err:
584 586 if err.errno != errno.ENOENT:
585 587 raise
586 588
587 589 def copylist(self):
588 590 d = ('narrowspec data meta dh fncache phaseroots obsstore'
589 591 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
590 592 return (['requires', '00changelog.i'] +
591 593 ['store/' + f for f in d.split()])
592 594
593 595 def write(self, tr):
594 596 self.fncache.write(tr)
595 597
596 598 def invalidatecaches(self):
597 599 self.fncache.entries = None
598 600 self.fncache.addls = set()
599 601
600 602 def markremoved(self, fn):
601 603 self.fncache.remove(fn)
602 604
603 605 def _exists(self, f):
604 606 ef = self.encode(f)
605 607 try:
606 608 self.getsize(ef)
607 609 return True
608 610 except OSError as err:
609 611 if err.errno != errno.ENOENT:
610 612 raise
611 613 # nonexistent entry
612 614 return False
613 615
614 616 def __contains__(self, path):
615 617 '''Checks if the store contains path'''
616 618 path = "/".join(("data", path))
617 619 # check for files (exact match)
618 620 e = path + '.i'
619 621 if e in self.fncache and self._exists(e):
620 622 return True
621 623 # now check for directories (prefix match)
622 624 if not path.endswith('/'):
623 625 path += '/'
624 626 for e in self.fncache:
625 627 if e.startswith(path) and self._exists(e):
626 628 return True
627 629 return False
General Comments 0
You need to be logged in to leave comments. Login now