##// END OF EJS Templates
store: don't read the whole fncache in memory...
Pulkit Goyal -
r42144:a5648708 default
parent child Browse files
Show More
@@ -1,633 +1,650 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 import functools
11 12 import hashlib
12 13 import os
13 14 import stat
14 15
15 16 from .i18n import _
16 17 from . import (
17 18 error,
18 19 node,
19 20 policy,
20 21 pycompat,
21 22 util,
22 23 vfs as vfsmod,
23 24 )
24 25
25 26 parsers = policy.importmod(r'parsers')
27 # how much bytes should be read from fncache in one read
28 # It is done to prevent loading large fncache files into memory
29 fncache_chunksize = 10 ** 6
26 30
27 31 def _matchtrackedpath(path, matcher):
28 32 """parses a fncache entry and returns whether the entry is tracking a path
29 33 matched by matcher or not.
30 34
31 35 If matcher is None, returns True"""
32 36
33 37 if matcher is None:
34 38 return True
35 39 path = decodedir(path)
36 40 if path.startswith('data/'):
37 41 return matcher(path[len('data/'):-len('.i')])
38 42 elif path.startswith('meta/'):
39 43 return matcher.visitdir(path[len('meta/'):-len('/00manifest.i')] or '.')
40 44
41 45 raise error.ProgrammingError("cannot decode path %s" % path)
42 46
43 47 # This avoids a collision between a file named foo and a dir named
44 48 # foo.i or foo.d
45 49 def _encodedir(path):
46 50 '''
47 51 >>> _encodedir(b'data/foo.i')
48 52 'data/foo.i'
49 53 >>> _encodedir(b'data/foo.i/bla.i')
50 54 'data/foo.i.hg/bla.i'
51 55 >>> _encodedir(b'data/foo.i.hg/bla.i')
52 56 'data/foo.i.hg.hg/bla.i'
53 57 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
54 58 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
55 59 '''
56 60 return (path
57 61 .replace(".hg/", ".hg.hg/")
58 62 .replace(".i/", ".i.hg/")
59 63 .replace(".d/", ".d.hg/"))
60 64
61 65 encodedir = getattr(parsers, 'encodedir', _encodedir)
62 66
63 67 def decodedir(path):
64 68 '''
65 69 >>> decodedir(b'data/foo.i')
66 70 'data/foo.i'
67 71 >>> decodedir(b'data/foo.i.hg/bla.i')
68 72 'data/foo.i/bla.i'
69 73 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
70 74 'data/foo.i.hg/bla.i'
71 75 '''
72 76 if ".hg/" not in path:
73 77 return path
74 78 return (path
75 79 .replace(".d.hg/", ".d/")
76 80 .replace(".i.hg/", ".i/")
77 81 .replace(".hg.hg/", ".hg/"))
78 82
79 83 def _reserved():
80 84 ''' characters that are problematic for filesystems
81 85
82 86 * ascii escapes (0..31)
83 87 * ascii hi (126..255)
84 88 * windows specials
85 89
86 90 these characters will be escaped by encodefunctions
87 91 '''
88 92 winreserved = [ord(x) for x in u'\\:*?"<>|']
89 93 for x in range(32):
90 94 yield x
91 95 for x in range(126, 256):
92 96 yield x
93 97 for x in winreserved:
94 98 yield x
95 99
96 100 def _buildencodefun():
97 101 '''
98 102 >>> enc, dec = _buildencodefun()
99 103
100 104 >>> enc(b'nothing/special.txt')
101 105 'nothing/special.txt'
102 106 >>> dec(b'nothing/special.txt')
103 107 'nothing/special.txt'
104 108
105 109 >>> enc(b'HELLO')
106 110 '_h_e_l_l_o'
107 111 >>> dec(b'_h_e_l_l_o')
108 112 'HELLO'
109 113
110 114 >>> enc(b'hello:world?')
111 115 'hello~3aworld~3f'
112 116 >>> dec(b'hello~3aworld~3f')
113 117 'hello:world?'
114 118
115 119 >>> enc(b'the\\x07quick\\xADshot')
116 120 'the~07quick~adshot'
117 121 >>> dec(b'the~07quick~adshot')
118 122 'the\\x07quick\\xadshot'
119 123 '''
120 124 e = '_'
121 125 xchr = pycompat.bytechr
122 126 asciistr = list(map(xchr, range(127)))
123 127 capitals = list(range(ord("A"), ord("Z") + 1))
124 128
125 129 cmap = dict((x, x) for x in asciistr)
126 130 for x in _reserved():
127 131 cmap[xchr(x)] = "~%02x" % x
128 132 for x in capitals + [ord(e)]:
129 133 cmap[xchr(x)] = e + xchr(x).lower()
130 134
131 135 dmap = {}
132 136 for k, v in cmap.iteritems():
133 137 dmap[v] = k
134 138 def decode(s):
135 139 i = 0
136 140 while i < len(s):
137 141 for l in pycompat.xrange(1, 4):
138 142 try:
139 143 yield dmap[s[i:i + l]]
140 144 i += l
141 145 break
142 146 except KeyError:
143 147 pass
144 148 else:
145 149 raise KeyError
146 150 return (lambda s: ''.join([cmap[s[c:c + 1]]
147 151 for c in pycompat.xrange(len(s))]),
148 152 lambda s: ''.join(list(decode(s))))
149 153
150 154 _encodefname, _decodefname = _buildencodefun()
151 155
152 156 def encodefilename(s):
153 157 '''
154 158 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
155 159 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
156 160 '''
157 161 return _encodefname(encodedir(s))
158 162
159 163 def decodefilename(s):
160 164 '''
161 165 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
162 166 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
163 167 '''
164 168 return decodedir(_decodefname(s))
165 169
166 170 def _buildlowerencodefun():
167 171 '''
168 172 >>> f = _buildlowerencodefun()
169 173 >>> f(b'nothing/special.txt')
170 174 'nothing/special.txt'
171 175 >>> f(b'HELLO')
172 176 'hello'
173 177 >>> f(b'hello:world?')
174 178 'hello~3aworld~3f'
175 179 >>> f(b'the\\x07quick\\xADshot')
176 180 'the~07quick~adshot'
177 181 '''
178 182 xchr = pycompat.bytechr
179 183 cmap = dict([(xchr(x), xchr(x)) for x in pycompat.xrange(127)])
180 184 for x in _reserved():
181 185 cmap[xchr(x)] = "~%02x" % x
182 186 for x in range(ord("A"), ord("Z") + 1):
183 187 cmap[xchr(x)] = xchr(x).lower()
184 188 def lowerencode(s):
185 189 return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
186 190 return lowerencode
187 191
188 192 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
189 193
190 194 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
191 195 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
192 196 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
193 197 def _auxencode(path, dotencode):
194 198 '''
195 199 Encodes filenames containing names reserved by Windows or which end in
196 200 period or space. Does not touch other single reserved characters c.
197 201 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
198 202 Additionally encodes space or period at the beginning, if dotencode is
199 203 True. Parameter path is assumed to be all lowercase.
200 204 A segment only needs encoding if a reserved name appears as a
201 205 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
202 206 doesn't need encoding.
203 207
204 208 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
205 209 >>> _auxencode(s.split(b'/'), True)
206 210 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
207 211 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
208 212 >>> _auxencode(s.split(b'/'), False)
209 213 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
210 214 >>> _auxencode([b'foo. '], True)
211 215 ['foo.~20']
212 216 >>> _auxencode([b' .foo'], True)
213 217 ['~20.foo']
214 218 '''
215 219 for i, n in enumerate(path):
216 220 if not n:
217 221 continue
218 222 if dotencode and n[0] in '. ':
219 223 n = "~%02x" % ord(n[0:1]) + n[1:]
220 224 path[i] = n
221 225 else:
222 226 l = n.find('.')
223 227 if l == -1:
224 228 l = len(n)
225 229 if ((l == 3 and n[:3] in _winres3) or
226 230 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
227 231 and n[:3] in _winres4)):
228 232 # encode third letter ('aux' -> 'au~78')
229 233 ec = "~%02x" % ord(n[2:3])
230 234 n = n[0:2] + ec + n[3:]
231 235 path[i] = n
232 236 if n[-1] in '. ':
233 237 # encode last period or space ('foo...' -> 'foo..~2e')
234 238 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
235 239 return path
236 240
237 241 _maxstorepathlen = 120
238 242 _dirprefixlen = 8
239 243 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
240 244
241 245 def _hashencode(path, dotencode):
242 246 digest = node.hex(hashlib.sha1(path).digest())
243 247 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
244 248 parts = _auxencode(le, dotencode)
245 249 basename = parts[-1]
246 250 _root, ext = os.path.splitext(basename)
247 251 sdirs = []
248 252 sdirslen = 0
249 253 for p in parts[:-1]:
250 254 d = p[:_dirprefixlen]
251 255 if d[-1] in '. ':
252 256 # Windows can't access dirs ending in period or space
253 257 d = d[:-1] + '_'
254 258 if sdirslen == 0:
255 259 t = len(d)
256 260 else:
257 261 t = sdirslen + 1 + len(d)
258 262 if t > _maxshortdirslen:
259 263 break
260 264 sdirs.append(d)
261 265 sdirslen = t
262 266 dirs = '/'.join(sdirs)
263 267 if len(dirs) > 0:
264 268 dirs += '/'
265 269 res = 'dh/' + dirs + digest + ext
266 270 spaceleft = _maxstorepathlen - len(res)
267 271 if spaceleft > 0:
268 272 filler = basename[:spaceleft]
269 273 res = 'dh/' + dirs + filler + digest + ext
270 274 return res
271 275
272 276 def _hybridencode(path, dotencode):
273 277 '''encodes path with a length limit
274 278
275 279 Encodes all paths that begin with 'data/', according to the following.
276 280
277 281 Default encoding (reversible):
278 282
279 283 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
280 284 characters are encoded as '~xx', where xx is the two digit hex code
281 285 of the character (see encodefilename).
282 286 Relevant path components consisting of Windows reserved filenames are
283 287 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
284 288
285 289 Hashed encoding (not reversible):
286 290
287 291 If the default-encoded path is longer than _maxstorepathlen, a
288 292 non-reversible hybrid hashing of the path is done instead.
289 293 This encoding uses up to _dirprefixlen characters of all directory
290 294 levels of the lowerencoded path, but not more levels than can fit into
291 295 _maxshortdirslen.
292 296 Then follows the filler followed by the sha digest of the full path.
293 297 The filler is the beginning of the basename of the lowerencoded path
294 298 (the basename is everything after the last path separator). The filler
295 299 is as long as possible, filling in characters from the basename until
296 300 the encoded path has _maxstorepathlen characters (or all chars of the
297 301 basename have been taken).
298 302 The extension (e.g. '.i' or '.d') is preserved.
299 303
300 304 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
301 305 encoding was used.
302 306 '''
303 307 path = encodedir(path)
304 308 ef = _encodefname(path).split('/')
305 309 res = '/'.join(_auxencode(ef, dotencode))
306 310 if len(res) > _maxstorepathlen:
307 311 res = _hashencode(path, dotencode)
308 312 return res
309 313
310 314 def _pathencode(path):
311 315 de = encodedir(path)
312 316 if len(path) > _maxstorepathlen:
313 317 return _hashencode(de, True)
314 318 ef = _encodefname(de).split('/')
315 319 res = '/'.join(_auxencode(ef, True))
316 320 if len(res) > _maxstorepathlen:
317 321 return _hashencode(de, True)
318 322 return res
319 323
320 324 _pathencode = getattr(parsers, 'pathencode', _pathencode)
321 325
322 326 def _plainhybridencode(f):
323 327 return _hybridencode(f, False)
324 328
325 329 def _calcmode(vfs):
326 330 try:
327 331 # files in .hg/ will be created using this mode
328 332 mode = vfs.stat().st_mode
329 333 # avoid some useless chmods
330 334 if (0o777 & ~util.umask) == (0o777 & mode):
331 335 mode = None
332 336 except OSError:
333 337 mode = None
334 338 return mode
335 339
336 340 _data = ('narrowspec data meta 00manifest.d 00manifest.i'
337 341 ' 00changelog.d 00changelog.i phaseroots obsstore')
338 342
339 343 def isrevlog(f, kind, st):
340 344 return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
341 345
342 346 class basicstore(object):
343 347 '''base class for local repository stores'''
344 348 def __init__(self, path, vfstype):
345 349 vfs = vfstype(path)
346 350 self.path = vfs.base
347 351 self.createmode = _calcmode(vfs)
348 352 vfs.createmode = self.createmode
349 353 self.rawvfs = vfs
350 354 self.vfs = vfsmod.filtervfs(vfs, encodedir)
351 355 self.opener = self.vfs
352 356
353 357 def join(self, f):
354 358 return self.path + '/' + encodedir(f)
355 359
356 360 def _walk(self, relpath, recurse, filefilter=isrevlog):
357 361 '''yields (unencoded, encoded, size)'''
358 362 path = self.path
359 363 if relpath:
360 364 path += '/' + relpath
361 365 striplen = len(self.path) + 1
362 366 l = []
363 367 if self.rawvfs.isdir(path):
364 368 visit = [path]
365 369 readdir = self.rawvfs.readdir
366 370 while visit:
367 371 p = visit.pop()
368 372 for f, kind, st in readdir(p, stat=True):
369 373 fp = p + '/' + f
370 374 if filefilter(f, kind, st):
371 375 n = util.pconvert(fp[striplen:])
372 376 l.append((decodedir(n), n, st.st_size))
373 377 elif kind == stat.S_IFDIR and recurse:
374 378 visit.append(fp)
375 379 l.sort()
376 380 return l
377 381
378 382 def datafiles(self, matcher=None):
379 383 return self._walk('data', True) + self._walk('meta', True)
380 384
381 385 def topfiles(self):
382 386 # yield manifest before changelog
383 387 return reversed(self._walk('', False))
384 388
385 389 def walk(self, matcher=None):
386 390 '''yields (unencoded, encoded, size)
387 391
388 392 if a matcher is passed, storage files of only those tracked paths
389 393 are passed with matches the matcher
390 394 '''
391 395 # yield data files first
392 396 for x in self.datafiles(matcher):
393 397 yield x
394 398 for x in self.topfiles():
395 399 yield x
396 400
397 401 def copylist(self):
398 402 return ['requires'] + _data.split()
399 403
400 404 def write(self, tr):
401 405 pass
402 406
403 407 def invalidatecaches(self):
404 408 pass
405 409
406 410 def markremoved(self, fn):
407 411 pass
408 412
409 413 def __contains__(self, path):
410 414 '''Checks if the store contains path'''
411 415 path = "/".join(("data", path))
412 416 # file?
413 417 if self.vfs.exists(path + ".i"):
414 418 return True
415 419 # dir?
416 420 if not path.endswith("/"):
417 421 path = path + "/"
418 422 return self.vfs.exists(path)
419 423
420 424 class encodedstore(basicstore):
421 425 def __init__(self, path, vfstype):
422 426 vfs = vfstype(path + '/store')
423 427 self.path = vfs.base
424 428 self.createmode = _calcmode(vfs)
425 429 vfs.createmode = self.createmode
426 430 self.rawvfs = vfs
427 431 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
428 432 self.opener = self.vfs
429 433
430 434 def datafiles(self, matcher=None):
431 435 for a, b, size in super(encodedstore, self).datafiles():
432 436 try:
433 437 a = decodefilename(a)
434 438 except KeyError:
435 439 a = None
436 440 if a is not None and not _matchtrackedpath(a, matcher):
437 441 continue
438 442 yield a, b, size
439 443
440 444 def join(self, f):
441 445 return self.path + '/' + encodefilename(f)
442 446
443 447 def copylist(self):
444 448 return (['requires', '00changelog.i'] +
445 449 ['store/' + f for f in _data.split()])
446 450
447 451 class fncache(object):
448 452 # the filename used to be partially encoded
449 453 # hence the encodedir/decodedir dance
450 454 def __init__(self, vfs):
451 455 self.vfs = vfs
452 456 self.entries = None
453 457 self._dirty = False
454 458 # set of new additions to fncache
455 459 self.addls = set()
456 460
457 461 def _load(self):
458 462 '''fill the entries from the fncache file'''
459 463 self._dirty = False
460 464 try:
461 465 fp = self.vfs('fncache', mode='rb')
462 466 except IOError:
463 467 # skip nonexistent file
464 468 self.entries = set()
465 469 return
466 self.entries = set(decodedir(fp.read()).splitlines())
470
471 self.entries = set()
472 chunk = b''
473 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
474 chunk += c
475 try:
476 p = chunk.rindex(b'\n')
477 self.entries.update(decodedir(chunk[:p + 1]).splitlines())
478 chunk = chunk[p + 1:]
479 except ValueError:
480 # substring '\n' not found, maybe the entry is bigger than the
481 # chunksize, so let's keep iterating
482 pass
483
467 484 self._checkentries(fp)
468 485 fp.close()
469 486
470 487 def _checkentries(self, fp):
471 488 """ make sure there is no empty string in entries """
472 489 if '' in self.entries:
473 490 fp.seek(0)
474 491 for n, line in enumerate(util.iterfile(fp)):
475 492 if not line.rstrip('\n'):
476 493 t = _('invalid entry in fncache, line %d') % (n + 1)
477 494 raise error.Abort(t)
478 495
479 496 def write(self, tr):
480 497 if self._dirty:
481 498 assert self.entries is not None
482 499 self.entries = self.entries | self.addls
483 500 self.addls = set()
484 501 tr.addbackup('fncache')
485 502 fp = self.vfs('fncache', mode='wb', atomictemp=True)
486 503 if self.entries:
487 504 fp.write(encodedir('\n'.join(self.entries) + '\n'))
488 505 fp.close()
489 506 self._dirty = False
490 507 if self.addls:
491 508 # if we have just new entries, let's append them to the fncache
492 509 tr.addbackup('fncache')
493 510 fp = self.vfs('fncache', mode='ab', atomictemp=True)
494 511 if self.addls:
495 512 fp.write(encodedir('\n'.join(self.addls) + '\n'))
496 513 fp.close()
497 514 self.entries = None
498 515 self.addls = set()
499 516
500 517 def add(self, fn):
501 518 if self.entries is None:
502 519 self._load()
503 520 if fn not in self.entries:
504 521 self.addls.add(fn)
505 522
506 523 def remove(self, fn):
507 524 if self.entries is None:
508 525 self._load()
509 526 if fn in self.addls:
510 527 self.addls.remove(fn)
511 528 return
512 529 try:
513 530 self.entries.remove(fn)
514 531 self._dirty = True
515 532 except KeyError:
516 533 pass
517 534
518 535 def __contains__(self, fn):
519 536 if fn in self.addls:
520 537 return True
521 538 if self.entries is None:
522 539 self._load()
523 540 return fn in self.entries
524 541
525 542 def __iter__(self):
526 543 if self.entries is None:
527 544 self._load()
528 545 return iter(self.entries | self.addls)
529 546
530 547 class _fncachevfs(vfsmod.proxyvfs):
531 548 def __init__(self, vfs, fnc, encode):
532 549 vfsmod.proxyvfs.__init__(self, vfs)
533 550 self.fncache = fnc
534 551 self.encode = encode
535 552
536 553 def __call__(self, path, mode='r', *args, **kw):
537 554 encoded = self.encode(path)
538 555 if mode not in ('r', 'rb') and (path.startswith('data/') or
539 556 path.startswith('meta/')):
540 557 # do not trigger a fncache load when adding a file that already is
541 558 # known to exist.
542 559 notload = self.fncache.entries is None and self.vfs.exists(encoded)
543 560 if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
544 561 # when appending to an existing file, if the file has size zero,
545 562 # it should be considered as missing. Such zero-size files are
546 563 # the result of truncation when a transaction is aborted.
547 564 notload = False
548 565 if not notload:
549 566 self.fncache.add(path)
550 567 return self.vfs(encoded, mode, *args, **kw)
551 568
552 569 def join(self, path):
553 570 if path:
554 571 return self.vfs.join(self.encode(path))
555 572 else:
556 573 return self.vfs.join(path)
557 574
558 575 class fncachestore(basicstore):
559 576 def __init__(self, path, vfstype, dotencode):
560 577 if dotencode:
561 578 encode = _pathencode
562 579 else:
563 580 encode = _plainhybridencode
564 581 self.encode = encode
565 582 vfs = vfstype(path + '/store')
566 583 self.path = vfs.base
567 584 self.pathsep = self.path + '/'
568 585 self.createmode = _calcmode(vfs)
569 586 vfs.createmode = self.createmode
570 587 self.rawvfs = vfs
571 588 fnc = fncache(vfs)
572 589 self.fncache = fnc
573 590 self.vfs = _fncachevfs(vfs, fnc, encode)
574 591 self.opener = self.vfs
575 592
576 593 def join(self, f):
577 594 return self.pathsep + self.encode(f)
578 595
579 596 def getsize(self, path):
580 597 return self.rawvfs.stat(path).st_size
581 598
582 599 def datafiles(self, matcher=None):
583 600 for f in sorted(self.fncache):
584 601 if not _matchtrackedpath(f, matcher):
585 602 continue
586 603 ef = self.encode(f)
587 604 try:
588 605 yield f, ef, self.getsize(ef)
589 606 except OSError as err:
590 607 if err.errno != errno.ENOENT:
591 608 raise
592 609
593 610 def copylist(self):
594 611 d = ('narrowspec data meta dh fncache phaseroots obsstore'
595 612 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
596 613 return (['requires', '00changelog.i'] +
597 614 ['store/' + f for f in d.split()])
598 615
599 616 def write(self, tr):
600 617 self.fncache.write(tr)
601 618
602 619 def invalidatecaches(self):
603 620 self.fncache.entries = None
604 621 self.fncache.addls = set()
605 622
606 623 def markremoved(self, fn):
607 624 self.fncache.remove(fn)
608 625
609 626 def _exists(self, f):
610 627 ef = self.encode(f)
611 628 try:
612 629 self.getsize(ef)
613 630 return True
614 631 except OSError as err:
615 632 if err.errno != errno.ENOENT:
616 633 raise
617 634 # nonexistent entry
618 635 return False
619 636
620 637 def __contains__(self, path):
621 638 '''Checks if the store contains path'''
622 639 path = "/".join(("data", path))
623 640 # check for files (exact match)
624 641 e = path + '.i'
625 642 if e in self.fncache and self._exists(e):
626 643 return True
627 644 # now check for directories (prefix match)
628 645 if not path.endswith('/'):
629 646 path += '/'
630 647 for e in self.fncache:
631 648 if e.startswith(path) and self._exists(e):
632 649 return True
633 650 return False
@@ -1,518 +1,532 b''
1 1 #require repofncache
2 2
3 An extension which will set fncache chunksize to 1 byte to make sure that logic
4 does not break
5
6 $ cat > chunksize.py <<EOF
7 > from __future__ import absolute_import
8 > from mercurial import store
9 > store.fncache_chunksize = 1
10 > EOF
11
12 $ cat >> $HGRCPATH <<EOF
13 > [extensions]
14 > chunksize = $TESTTMP/chunksize.py
15 > EOF
16
3 17 Init repo1:
4 18
5 19 $ hg init repo1
6 20 $ cd repo1
7 21 $ echo "some text" > a
8 22 $ hg add
9 23 adding a
10 24 $ hg ci -m first
11 25 $ cat .hg/store/fncache | sort
12 26 data/a.i
13 27
14 28 Testing a.i/b:
15 29
16 30 $ mkdir a.i
17 31 $ echo "some other text" > a.i/b
18 32 $ hg add
19 33 adding a.i/b
20 34 $ hg ci -m second
21 35 $ cat .hg/store/fncache | sort
22 36 data/a.i
23 37 data/a.i.hg/b.i
24 38
25 39 Testing a.i.hg/c:
26 40
27 41 $ mkdir a.i.hg
28 42 $ echo "yet another text" > a.i.hg/c
29 43 $ hg add
30 44 adding a.i.hg/c
31 45 $ hg ci -m third
32 46 $ cat .hg/store/fncache | sort
33 47 data/a.i
34 48 data/a.i.hg.hg/c.i
35 49 data/a.i.hg/b.i
36 50
37 51 Testing verify:
38 52
39 53 $ hg verify
40 54 checking changesets
41 55 checking manifests
42 56 crosschecking files in changesets and manifests
43 57 checking files
44 58 checked 3 changesets with 3 changes to 3 files
45 59
46 60 $ rm .hg/store/fncache
47 61
48 62 $ hg verify
49 63 checking changesets
50 64 checking manifests
51 65 crosschecking files in changesets and manifests
52 66 checking files
53 67 warning: revlog 'data/a.i' not in fncache!
54 68 warning: revlog 'data/a.i.hg/c.i' not in fncache!
55 69 warning: revlog 'data/a.i/b.i' not in fncache!
56 70 checked 3 changesets with 3 changes to 3 files
57 71 3 warnings encountered!
58 72 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
59 73
60 74 Follow the hint to make sure it works
61 75
62 76 $ hg debugrebuildfncache
63 77 adding data/a.i
64 78 adding data/a.i.hg/c.i
65 79 adding data/a.i/b.i
66 80 3 items added, 0 removed from fncache
67 81
68 82 $ hg verify
69 83 checking changesets
70 84 checking manifests
71 85 crosschecking files in changesets and manifests
72 86 checking files
73 87 checked 3 changesets with 3 changes to 3 files
74 88
75 89 $ cd ..
76 90
77 91 Non store repo:
78 92
79 93 $ hg --config format.usestore=False init foo
80 94 $ cd foo
81 95 $ mkdir tst.d
82 96 $ echo foo > tst.d/foo
83 97 $ hg ci -Amfoo
84 98 adding tst.d/foo
85 99 $ find .hg | sort
86 100 .hg
87 101 .hg/00changelog.i
88 102 .hg/00manifest.i
89 103 .hg/cache
90 104 .hg/cache/branch2-served
91 105 .hg/cache/rbc-names-v1
92 106 .hg/cache/rbc-revs-v1
93 107 .hg/data
94 108 .hg/data/tst.d.hg
95 109 .hg/data/tst.d.hg/foo.i
96 110 .hg/dirstate
97 111 .hg/fsmonitor.state (fsmonitor !)
98 112 .hg/last-message.txt
99 113 .hg/phaseroots
100 114 .hg/requires
101 115 .hg/undo
102 116 .hg/undo.backup.dirstate
103 117 .hg/undo.backupfiles
104 118 .hg/undo.bookmarks
105 119 .hg/undo.branch
106 120 .hg/undo.desc
107 121 .hg/undo.dirstate
108 122 .hg/undo.phaseroots
109 123 .hg/wcache
110 124 .hg/wcache/checkisexec (execbit !)
111 125 .hg/wcache/checklink (symlink !)
112 126 .hg/wcache/checklink-target (symlink !)
113 127 .hg/wcache/manifestfulltextcache (reporevlogstore !)
114 128 $ cd ..
115 129
116 130 Non fncache repo:
117 131
118 132 $ hg --config format.usefncache=False init bar
119 133 $ cd bar
120 134 $ mkdir tst.d
121 135 $ echo foo > tst.d/Foo
122 136 $ hg ci -Amfoo
123 137 adding tst.d/Foo
124 138 $ find .hg | sort
125 139 .hg
126 140 .hg/00changelog.i
127 141 .hg/cache
128 142 .hg/cache/branch2-served
129 143 .hg/cache/rbc-names-v1
130 144 .hg/cache/rbc-revs-v1
131 145 .hg/dirstate
132 146 .hg/fsmonitor.state (fsmonitor !)
133 147 .hg/last-message.txt
134 148 .hg/requires
135 149 .hg/store
136 150 .hg/store/00changelog.i
137 151 .hg/store/00manifest.i
138 152 .hg/store/data
139 153 .hg/store/data/tst.d.hg
140 154 .hg/store/data/tst.d.hg/_foo.i
141 155 .hg/store/phaseroots
142 156 .hg/store/undo
143 157 .hg/store/undo.backupfiles
144 158 .hg/store/undo.phaseroots
145 159 .hg/undo.backup.dirstate
146 160 .hg/undo.bookmarks
147 161 .hg/undo.branch
148 162 .hg/undo.desc
149 163 .hg/undo.dirstate
150 164 .hg/wcache
151 165 .hg/wcache/checkisexec (execbit !)
152 166 .hg/wcache/checklink (symlink !)
153 167 .hg/wcache/checklink-target (symlink !)
154 168 .hg/wcache/manifestfulltextcache (reporevlogstore !)
155 169 $ cd ..
156 170
157 171 Encoding of reserved / long paths in the store
158 172
159 173 $ hg init r2
160 174 $ cd r2
161 175 $ cat <<EOF > .hg/hgrc
162 176 > [ui]
163 177 > portablefilenames = ignore
164 178 > EOF
165 179
166 180 $ hg import -q --bypass - <<EOF
167 181 > # HG changeset patch
168 182 > # User test
169 183 > # Date 0 0
170 184 > # Node ID 1c7a2f7cb77be1a0def34e4c7cabc562ad98fbd7
171 185 > # Parent 0000000000000000000000000000000000000000
172 186 > 1
173 187 >
174 188 > diff --git a/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz b/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz
175 189 > new file mode 100644
176 190 > --- /dev/null
177 191 > +++ b/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/xxxxxxxxx-xxxxxxxxx-xxxxxxxxx-123456789-12.3456789-12345-ABCDEFGHIJKLMNOPRSTUVWXYZ-abcdefghjiklmnopqrstuvwxyz
178 192 > @@ -0,0 +1,1 @@
179 193 > +foo
180 194 > diff --git a/AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH/TENTH/ELEVENTH/LOREMIPSUM.TXT b/AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH/TENTH/ELEVENTH/LOREMIPSUM.TXT
181 195 > new file mode 100644
182 196 > --- /dev/null
183 197 > +++ b/AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH/TENTH/ELEVENTH/LOREMIPSUM.TXT
184 198 > @@ -0,0 +1,1 @@
185 199 > +foo
186 200 > diff --git a/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt b/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt
187 201 > new file mode 100644
188 202 > --- /dev/null
189 203 > +++ b/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt
190 204 > @@ -0,0 +1,1 @@
191 205 > +foo
192 206 > diff --git a/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c b/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c
193 207 > new file mode 100644
194 208 > --- /dev/null
195 209 > +++ b/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c
196 210 > @@ -0,0 +1,1 @@
197 211 > +foo
198 212 > diff --git a/enterprise/openesbaddons/contrib-imola/corba-bc/netbeansplugin/wsdlExtension/src/main/java/META-INF/services/org.netbeans.modules.xml.wsdl.bindingsupport.spi.ExtensibilityElementTemplateProvider b/enterprise/openesbaddons/contrib-imola/corba-bc/netbeansplugin/wsdlExtension/src/main/java/META-INF/services/org.netbeans.modules.xml.wsdl.bindingsupport.spi.ExtensibilityElementTemplateProvider
199 213 > new file mode 100644
200 214 > --- /dev/null
201 215 > +++ b/enterprise/openesbaddons/contrib-imola/corba-bc/netbeansplugin/wsdlExtension/src/main/java/META-INF/services/org.netbeans.modules.xml.wsdl.bindingsupport.spi.ExtensibilityElementTemplateProvider
202 216 > @@ -0,0 +1,1 @@
203 217 > +foo
204 218 > EOF
205 219
206 220 $ find .hg/store -name *.i | sort
207 221 .hg/store/00changelog.i
208 222 .hg/store/00manifest.i
209 223 .hg/store/data/bla.aux/pr~6e/_p_r_n/lpt/co~6d3/nu~6c/coma/foo._n_u_l/normal.c.i
210 224 .hg/store/dh/12345678/12345678/12345678/12345678/12345678/12345678/12345678/12345/xxxxxx168e07b38e65eff86ab579afaaa8e30bfbe0f35f.i
211 225 .hg/store/dh/au~78/second/x.prn/fourth/fi~3afth/sixth/seventh/eighth/nineth/tenth/loremia20419e358ddff1bf8751e38288aff1d7c32ec05.i
212 226 .hg/store/dh/enterpri/openesba/contrib-/corba-bc/netbeans/wsdlexte/src/main/java/org.net7018f27961fdf338a598a40c4683429e7ffb9743.i
213 227 .hg/store/dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilename0d8e1f4187c650e2f1fdca9fd90f786bc0976b6b.i
214 228
215 229 $ cd ..
216 230
217 231 Aborting lock does not prevent fncache writes
218 232
219 233 $ cat > exceptionext.py <<EOF
220 234 > from __future__ import absolute_import
221 235 > import os
222 236 > from mercurial import commands, error, extensions
223 237 >
224 238 > def lockexception(orig, vfs, lockname, wait, releasefn, *args, **kwargs):
225 239 > def releasewrap():
226 240 > l.held = False # ensure __del__ is a noop
227 241 > raise error.Abort("forced lock failure")
228 242 > l = orig(vfs, lockname, wait, releasewrap, *args, **kwargs)
229 243 > return l
230 244 >
231 245 > def reposetup(ui, repo):
232 246 > extensions.wrapfunction(repo, '_lock', lockexception)
233 247 >
234 248 > cmdtable = {}
235 249 >
236 250 > # wrap "commit" command to prevent wlock from being '__del__()'-ed
237 251 > # at the end of dispatching (for intentional "forced lcok failure")
238 252 > def commitwrap(orig, ui, repo, *pats, **opts):
239 253 > repo = repo.unfiltered() # to use replaced repo._lock certainly
240 254 > wlock = repo.wlock()
241 255 > try:
242 256 > return orig(ui, repo, *pats, **opts)
243 257 > finally:
244 258 > # multiple 'relase()' is needed for complete releasing wlock,
245 259 > # because "forced" abort at last releasing store lock
246 260 > # prevents wlock from being released at same 'lockmod.release()'
247 261 > for i in range(wlock.held):
248 262 > wlock.release()
249 263 >
250 264 > def extsetup(ui):
251 265 > extensions.wrapcommand(commands.table, b"commit", commitwrap)
252 266 > EOF
253 267 $ extpath=`pwd`/exceptionext.py
254 268 $ hg init fncachetxn
255 269 $ cd fncachetxn
256 270 $ printf "[extensions]\nexceptionext=$extpath\n" >> .hg/hgrc
257 271 $ touch y
258 272 $ hg ci -qAm y
259 273 abort: forced lock failure
260 274 [255]
261 275 $ cat .hg/store/fncache
262 276 data/y.i
263 277
264 278 Aborting transaction prevents fncache change
265 279
266 280 $ cat > ../exceptionext.py <<EOF
267 281 > from __future__ import absolute_import
268 282 > import os
269 283 > from mercurial import commands, error, extensions, localrepo
270 284 >
271 285 > def wrapper(orig, self, *args, **kwargs):
272 286 > tr = orig(self, *args, **kwargs)
273 287 > def fail(tr):
274 288 > raise error.Abort(b"forced transaction failure")
275 289 > # zzz prefix to ensure it sorted after store.write
276 290 > tr.addfinalize(b'zzz-forcefails', fail)
277 291 > return tr
278 292 >
279 293 > def uisetup(ui):
280 294 > extensions.wrapfunction(
281 295 > localrepo.localrepository, b'transaction', wrapper)
282 296 >
283 297 > cmdtable = {}
284 298 >
285 299 > EOF
286 300
287 301 Clean cached version
288 302 $ rm -f "${extpath}c"
289 303 $ rm -Rf "`dirname $extpath`/__pycache__"
290 304
291 305 $ touch z
292 306 $ hg ci -qAm z
293 307 transaction abort!
294 308 rollback completed
295 309 abort: forced transaction failure
296 310 [255]
297 311 $ cat .hg/store/fncache
298 312 data/y.i
299 313
300 314 Aborted transactions can be recovered later
301 315
302 316 $ cat > ../exceptionext.py <<EOF
303 317 > from __future__ import absolute_import
304 318 > import os
305 319 > from mercurial import (
306 320 > commands,
307 321 > error,
308 322 > extensions,
309 323 > localrepo,
310 324 > transaction,
311 325 > )
312 326 >
313 327 > def trwrapper(orig, self, *args, **kwargs):
314 328 > tr = orig(self, *args, **kwargs)
315 329 > def fail(tr):
316 330 > raise error.Abort(b"forced transaction failure")
317 331 > # zzz prefix to ensure it sorted after store.write
318 332 > tr.addfinalize(b'zzz-forcefails', fail)
319 333 > return tr
320 334 >
321 335 > def abortwrapper(orig, self, *args, **kwargs):
322 336 > raise error.Abort(b"forced transaction failure")
323 337 >
324 338 > def uisetup(ui):
325 339 > extensions.wrapfunction(localrepo.localrepository, 'transaction',
326 340 > trwrapper)
327 341 > extensions.wrapfunction(transaction.transaction, '_abort',
328 342 > abortwrapper)
329 343 >
330 344 > cmdtable = {}
331 345 >
332 346 > EOF
333 347
334 348 Clean cached versions
335 349 $ rm -f "${extpath}c"
336 350 $ rm -Rf "`dirname $extpath`/__pycache__"
337 351
338 352 $ hg up -q 1
339 353 $ touch z
340 354 $ hg ci -qAm z 2>/dev/null
341 355 [255]
342 356 $ cat .hg/store/fncache | sort
343 357 data/y.i
344 358 data/z.i
345 359 $ hg recover
346 360 rolling back interrupted transaction
347 361 checking changesets
348 362 checking manifests
349 363 crosschecking files in changesets and manifests
350 364 checking files
351 365 checked 1 changesets with 1 changes to 1 files
352 366 $ cat .hg/store/fncache
353 367 data/y.i
354 368
355 369 $ cd ..
356 370
357 371 debugrebuildfncache does nothing unless repo has fncache requirement
358 372
359 373 $ hg --config format.usefncache=false init nofncache
360 374 $ cd nofncache
361 375 $ hg debugrebuildfncache
362 376 (not rebuilding fncache because repository does not support fncache)
363 377
364 378 $ cd ..
365 379
366 380 debugrebuildfncache works on empty repository
367 381
368 382 $ hg init empty
369 383 $ cd empty
370 384 $ hg debugrebuildfncache
371 385 fncache already up to date
372 386 $ cd ..
373 387
374 388 debugrebuildfncache on an up to date repository no-ops
375 389
376 390 $ hg init repo
377 391 $ cd repo
378 392 $ echo initial > foo
379 393 $ echo initial > .bar
380 394 $ hg commit -A -m initial
381 395 adding .bar
382 396 adding foo
383 397
384 398 $ cat .hg/store/fncache | sort
385 399 data/.bar.i
386 400 data/foo.i
387 401
388 402 $ hg debugrebuildfncache
389 403 fncache already up to date
390 404
391 405 debugrebuildfncache restores deleted fncache file
392 406
393 407 $ rm -f .hg/store/fncache
394 408 $ hg debugrebuildfncache
395 409 adding data/.bar.i
396 410 adding data/foo.i
397 411 2 items added, 0 removed from fncache
398 412
399 413 $ cat .hg/store/fncache | sort
400 414 data/.bar.i
401 415 data/foo.i
402 416
403 417 Rebuild after rebuild should no-op
404 418
405 419 $ hg debugrebuildfncache
406 420 fncache already up to date
407 421
408 422 A single missing file should get restored, an extra file should be removed
409 423
410 424 $ cat > .hg/store/fncache << EOF
411 425 > data/foo.i
412 426 > data/bad-entry.i
413 427 > EOF
414 428
415 429 $ hg debugrebuildfncache
416 430 removing data/bad-entry.i
417 431 adding data/.bar.i
418 432 1 items added, 1 removed from fncache
419 433
420 434 $ cat .hg/store/fncache | sort
421 435 data/.bar.i
422 436 data/foo.i
423 437
424 438 $ cd ..
425 439
426 440 Try a simple variation without dotencode to ensure fncache is ignorant of encoding
427 441
428 442 $ hg --config format.dotencode=false init nodotencode
429 443 $ cd nodotencode
430 444 $ echo initial > foo
431 445 $ echo initial > .bar
432 446 $ hg commit -A -m initial
433 447 adding .bar
434 448 adding foo
435 449
436 450 $ cat .hg/store/fncache | sort
437 451 data/.bar.i
438 452 data/foo.i
439 453
440 454 $ rm .hg/store/fncache
441 455 $ hg debugrebuildfncache
442 456 adding data/.bar.i
443 457 adding data/foo.i
444 458 2 items added, 0 removed from fncache
445 459
446 460 $ cat .hg/store/fncache | sort
447 461 data/.bar.i
448 462 data/foo.i
449 463
450 464 $ cd ..
451 465
452 466 In repositories that have accumulated a large number of files over time, the
453 467 fncache file is going to be large. If we possibly can avoid loading it, so much the better.
454 468 The cache should not loaded when committing changes to existing files, or when unbundling
455 469 changesets that only contain changes to existing files:
456 470
457 471 $ cat > fncacheloadwarn.py << EOF
458 472 > from __future__ import absolute_import
459 473 > from mercurial import extensions, localrepo
460 474 >
461 475 > def extsetup(ui):
462 476 > def wrapstore(orig, requirements, *args):
463 477 > store = orig(requirements, *args)
464 478 > if b'store' in requirements and b'fncache' in requirements:
465 479 > instrumentfncachestore(store, ui)
466 480 > return store
467 481 > extensions.wrapfunction(localrepo, 'makestore', wrapstore)
468 482 >
469 483 > def instrumentfncachestore(fncachestore, ui):
470 484 > class instrumentedfncache(type(fncachestore.fncache)):
471 485 > def _load(self):
472 486 > ui.warn(b'fncache load triggered!\n')
473 487 > super(instrumentedfncache, self)._load()
474 488 > fncachestore.fncache.__class__ = instrumentedfncache
475 489 > EOF
476 490
477 491 $ fncachextpath=`pwd`/fncacheloadwarn.py
478 492 $ hg init nofncacheload
479 493 $ cd nofncacheload
480 494 $ printf "[extensions]\nfncacheloadwarn=$fncachextpath\n" >> .hg/hgrc
481 495
482 496 A new file should trigger a load, as we'd want to update the fncache set in that case:
483 497
484 498 $ touch foo
485 499 $ hg ci -qAm foo
486 500 fncache load triggered!
487 501
488 502 But modifying that file should not:
489 503
490 504 $ echo bar >> foo
491 505 $ hg ci -qm foo
492 506
493 507 If a transaction has been aborted, the zero-size truncated index file will
494 508 not prevent the fncache from being loaded; rather than actually abort
495 509 a transaction, we simulate the situation by creating a zero-size index file:
496 510
497 511 $ touch .hg/store/data/bar.i
498 512 $ touch bar
499 513 $ hg ci -qAm bar
500 514 fncache load triggered!
501 515
502 516 Unbundling should follow the same rules; existing files should not cause a load:
503 517
504 518 $ hg clone -q . tobundle
505 519 $ echo 'new line' > tobundle/bar
506 520 $ hg -R tobundle ci -qm bar
507 521 $ hg -R tobundle bundle -q barupdated.hg
508 522 $ hg unbundle -q barupdated.hg
509 523
510 524 but adding new files should:
511 525
512 526 $ touch tobundle/newfile
513 527 $ hg -R tobundle ci -qAm newfile
514 528 $ hg -R tobundle bundle -q newfile.hg
515 529 $ hg unbundle -q newfile.hg
516 530 fncache load triggered!
517 531
518 532 $ cd ..
General Comments 0
You need to be logged in to leave comments. Login now