##// END OF EJS Templates
store: cleanup what is recognized as a revlog file...
marmoute -
r51362:53af67c7 default
parent child Browse files
Show More
@@ -1,856 +1,855 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13
14 14 from .i18n import _
15 15 from .pycompat import getattr
16 16 from .node import hex
17 17 from . import (
18 18 changelog,
19 19 error,
20 20 manifest,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 vfs as vfsmod,
25 25 )
26 26 from .utils import hashutil
27 27
28 28 parsers = policy.importmod('parsers')
29 29 # how much bytes should be read from fncache in one read
30 30 # It is done to prevent loading large fncache files into memory
31 31 fncache_chunksize = 10 ** 6
32 32
33 33
34 34 def _matchtrackedpath(path, matcher):
35 35 """parses a fncache entry and returns whether the entry is tracking a path
36 36 matched by matcher or not.
37 37
38 38 If matcher is None, returns True"""
39 39
40 40 if matcher is None:
41 41 return True
42 42 path = decodedir(path)
43 43 if path.startswith(b'data/'):
44 44 return matcher(path[len(b'data/') : -len(b'.i')])
45 45 elif path.startswith(b'meta/'):
46 46 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47 47
48 48 raise error.ProgrammingError(b"cannot decode path %s" % path)
49 49
50 50
51 51 # This avoids a collision between a file named foo and a dir named
52 52 # foo.i or foo.d
53 53 def _encodedir(path):
54 54 """
55 55 >>> _encodedir(b'data/foo.i')
56 56 'data/foo.i'
57 57 >>> _encodedir(b'data/foo.i/bla.i')
58 58 'data/foo.i.hg/bla.i'
59 59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 60 'data/foo.i.hg.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 63 """
64 64 return (
65 65 path.replace(b".hg/", b".hg.hg/")
66 66 .replace(b".i/", b".i.hg/")
67 67 .replace(b".d/", b".d.hg/")
68 68 )
69 69
70 70
71 71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 72
73 73
74 74 def decodedir(path):
75 75 """
76 76 >>> decodedir(b'data/foo.i')
77 77 'data/foo.i'
78 78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 79 'data/foo.i/bla.i'
80 80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 81 'data/foo.i.hg/bla.i'
82 82 """
83 83 if b".hg/" not in path:
84 84 return path
85 85 return (
86 86 path.replace(b".d.hg/", b".d/")
87 87 .replace(b".i.hg/", b".i/")
88 88 .replace(b".hg.hg/", b".hg/")
89 89 )
90 90
91 91
92 92 def _reserved():
93 93 """characters that are problematic for filesystems
94 94
95 95 * ascii escapes (0..31)
96 96 * ascii hi (126..255)
97 97 * windows specials
98 98
99 99 these characters will be escaped by encodefunctions
100 100 """
101 101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 102 for x in range(32):
103 103 yield x
104 104 for x in range(126, 256):
105 105 yield x
106 106 for x in winreserved:
107 107 yield x
108 108
109 109
110 110 def _buildencodefun():
111 111 """
112 112 >>> enc, dec = _buildencodefun()
113 113
114 114 >>> enc(b'nothing/special.txt')
115 115 'nothing/special.txt'
116 116 >>> dec(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118
119 119 >>> enc(b'HELLO')
120 120 '_h_e_l_l_o'
121 121 >>> dec(b'_h_e_l_l_o')
122 122 'HELLO'
123 123
124 124 >>> enc(b'hello:world?')
125 125 'hello~3aworld~3f'
126 126 >>> dec(b'hello~3aworld~3f')
127 127 'hello:world?'
128 128
129 129 >>> enc(b'the\\x07quick\\xADshot')
130 130 'the~07quick~adshot'
131 131 >>> dec(b'the~07quick~adshot')
132 132 'the\\x07quick\\xadshot'
133 133 """
134 134 e = b'_'
135 135 xchr = pycompat.bytechr
136 136 asciistr = list(map(xchr, range(127)))
137 137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 138
139 139 cmap = {x: x for x in asciistr}
140 140 for x in _reserved():
141 141 cmap[xchr(x)] = b"~%02x" % x
142 142 for x in capitals + [ord(e)]:
143 143 cmap[xchr(x)] = e + xchr(x).lower()
144 144
145 145 dmap = {}
146 146 for k, v in cmap.items():
147 147 dmap[v] = k
148 148
149 149 def decode(s):
150 150 i = 0
151 151 while i < len(s):
152 152 for l in range(1, 4):
153 153 try:
154 154 yield dmap[s[i : i + l]]
155 155 i += l
156 156 break
157 157 except KeyError:
158 158 pass
159 159 else:
160 160 raise KeyError
161 161
162 162 return (
163 163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 164 lambda s: b''.join(list(decode(s))),
165 165 )
166 166
167 167
168 168 _encodefname, _decodefname = _buildencodefun()
169 169
170 170
171 171 def encodefilename(s):
172 172 """
173 173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 175 """
176 176 return _encodefname(encodedir(s))
177 177
178 178
179 179 def decodefilename(s):
180 180 """
181 181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 183 """
184 184 return decodedir(_decodefname(s))
185 185
186 186
187 187 def _buildlowerencodefun():
188 188 """
189 189 >>> f = _buildlowerencodefun()
190 190 >>> f(b'nothing/special.txt')
191 191 'nothing/special.txt'
192 192 >>> f(b'HELLO')
193 193 'hello'
194 194 >>> f(b'hello:world?')
195 195 'hello~3aworld~3f'
196 196 >>> f(b'the\\x07quick\\xADshot')
197 197 'the~07quick~adshot'
198 198 """
199 199 xchr = pycompat.bytechr
200 200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 201 for x in _reserved():
202 202 cmap[xchr(x)] = b"~%02x" % x
203 203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 204 cmap[xchr(x)] = xchr(x).lower()
205 205
206 206 def lowerencode(s):
207 207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 208
209 209 return lowerencode
210 210
211 211
212 212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 213
214 214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 217
218 218
219 219 def _auxencode(path, dotencode):
220 220 """
221 221 Encodes filenames containing names reserved by Windows or which end in
222 222 period or space. Does not touch other single reserved characters c.
223 223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 224 Additionally encodes space or period at the beginning, if dotencode is
225 225 True. Parameter path is assumed to be all lowercase.
226 226 A segment only needs encoding if a reserved name appears as a
227 227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 228 doesn't need encoding.
229 229
230 230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 231 >>> _auxencode(s.split(b'/'), True)
232 232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 234 >>> _auxencode(s.split(b'/'), False)
235 235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 236 >>> _auxencode([b'foo. '], True)
237 237 ['foo.~20']
238 238 >>> _auxencode([b' .foo'], True)
239 239 ['~20.foo']
240 240 """
241 241 for i, n in enumerate(path):
242 242 if not n:
243 243 continue
244 244 if dotencode and n[0] in b'. ':
245 245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 246 path[i] = n
247 247 else:
248 248 l = n.find(b'.')
249 249 if l == -1:
250 250 l = len(n)
251 251 if (l == 3 and n[:3] in _winres3) or (
252 252 l == 4
253 253 and n[3:4] <= b'9'
254 254 and n[3:4] >= b'1'
255 255 and n[:3] in _winres4
256 256 ):
257 257 # encode third letter ('aux' -> 'au~78')
258 258 ec = b"~%02x" % ord(n[2:3])
259 259 n = n[0:2] + ec + n[3:]
260 260 path[i] = n
261 261 if n[-1] in b'. ':
262 262 # encode last period or space ('foo...' -> 'foo..~2e')
263 263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 264 return path
265 265
266 266
267 267 _maxstorepathlen = 120
268 268 _dirprefixlen = 8
269 269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 270
271 271
272 272 def _hashencode(path, dotencode):
273 273 digest = hex(hashutil.sha1(path).digest())
274 274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 275 parts = _auxencode(le, dotencode)
276 276 basename = parts[-1]
277 277 _root, ext = os.path.splitext(basename)
278 278 sdirs = []
279 279 sdirslen = 0
280 280 for p in parts[:-1]:
281 281 d = p[:_dirprefixlen]
282 282 if d[-1] in b'. ':
283 283 # Windows can't access dirs ending in period or space
284 284 d = d[:-1] + b'_'
285 285 if sdirslen == 0:
286 286 t = len(d)
287 287 else:
288 288 t = sdirslen + 1 + len(d)
289 289 if t > _maxshortdirslen:
290 290 break
291 291 sdirs.append(d)
292 292 sdirslen = t
293 293 dirs = b'/'.join(sdirs)
294 294 if len(dirs) > 0:
295 295 dirs += b'/'
296 296 res = b'dh/' + dirs + digest + ext
297 297 spaceleft = _maxstorepathlen - len(res)
298 298 if spaceleft > 0:
299 299 filler = basename[:spaceleft]
300 300 res = b'dh/' + dirs + filler + digest + ext
301 301 return res
302 302
303 303
304 304 def _hybridencode(path, dotencode):
305 305 """encodes path with a length limit
306 306
307 307 Encodes all paths that begin with 'data/', according to the following.
308 308
309 309 Default encoding (reversible):
310 310
311 311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 312 characters are encoded as '~xx', where xx is the two digit hex code
313 313 of the character (see encodefilename).
314 314 Relevant path components consisting of Windows reserved filenames are
315 315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 316
317 317 Hashed encoding (not reversible):
318 318
319 319 If the default-encoded path is longer than _maxstorepathlen, a
320 320 non-reversible hybrid hashing of the path is done instead.
321 321 This encoding uses up to _dirprefixlen characters of all directory
322 322 levels of the lowerencoded path, but not more levels than can fit into
323 323 _maxshortdirslen.
324 324 Then follows the filler followed by the sha digest of the full path.
325 325 The filler is the beginning of the basename of the lowerencoded path
326 326 (the basename is everything after the last path separator). The filler
327 327 is as long as possible, filling in characters from the basename until
328 328 the encoded path has _maxstorepathlen characters (or all chars of the
329 329 basename have been taken).
330 330 The extension (e.g. '.i' or '.d') is preserved.
331 331
332 332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 333 encoding was used.
334 334 """
335 335 path = encodedir(path)
336 336 ef = _encodefname(path).split(b'/')
337 337 res = b'/'.join(_auxencode(ef, dotencode))
338 338 if len(res) > _maxstorepathlen:
339 339 res = _hashencode(path, dotencode)
340 340 return res
341 341
342 342
343 343 def _pathencode(path):
344 344 de = encodedir(path)
345 345 if len(path) > _maxstorepathlen:
346 346 return _hashencode(de, True)
347 347 ef = _encodefname(de).split(b'/')
348 348 res = b'/'.join(_auxencode(ef, True))
349 349 if len(res) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 return res
352 352
353 353
354 354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 355
356 356
357 357 def _plainhybridencode(f):
358 358 return _hybridencode(f, False)
359 359
360 360
361 361 def _calcmode(vfs):
362 362 try:
363 363 # files in .hg/ will be created using this mode
364 364 mode = vfs.stat().st_mode
365 365 # avoid some useless chmods
366 366 if (0o777 & ~util.umask) == (0o777 & mode):
367 367 mode = None
368 368 except OSError:
369 369 mode = None
370 370 return mode
371 371
372 372
373 373 _data = [
374 374 b'bookmarks',
375 375 b'narrowspec',
376 376 b'data',
377 377 b'meta',
378 378 b'00manifest.d',
379 379 b'00manifest.i',
380 380 b'00changelog.d',
381 381 b'00changelog.i',
382 382 b'phaseroots',
383 383 b'obsstore',
384 384 b'requires',
385 385 ]
386 386
387 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 388 REVLOG_FILES_OTHER_EXT = (
389 389 b'.idx',
390 390 b'.d',
391 391 b'.dat',
392 392 b'.n',
393 393 b'.nd',
394 394 b'.sda',
395 b'd.tmpcensored',
396 395 )
397 396 # files that are "volatile" and might change between listing and streaming
398 397 #
399 398 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 399 # deleted.
401 400 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 401
403 402 # some exception to the above matching
404 403 #
405 404 # XXX This is currently not in use because of issue6542
406 405 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 406
408 407
409 408 def is_revlog(f, kind, st):
410 409 if kind != stat.S_IFREG:
411 410 return None
412 411 return revlog_type(f)
413 412
414 413
415 414 def revlog_type(f):
416 415 # XXX we need to filter `undo.` created by the transaction here, however
417 416 # being naive about it also filter revlog for `undo.*` files, leading to
418 417 # issue6542. So we no longer use EXCLUDED.
419 418 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 419 return FILEFLAGS_REVLOG_MAIN
421 420 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 421 t = FILETYPE_FILELOG_OTHER
423 422 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 423 t |= FILEFLAGS_VOLATILE
425 424 return t
426 425 return None
427 426
428 427
429 428 # the file is part of changelog data
430 429 FILEFLAGS_CHANGELOG = 1 << 13
431 430 # the file is part of manifest data
432 431 FILEFLAGS_MANIFESTLOG = 1 << 12
433 432 # the file is part of filelog data
434 433 FILEFLAGS_FILELOG = 1 << 11
435 434 # file that are not directly part of a revlog
436 435 FILEFLAGS_OTHER = 1 << 10
437 436
438 437 # the main entry point for a revlog
439 438 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 439 # a secondary file for a revlog
441 440 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 441
443 442 # files that are "volatile" and might change between listing and streaming
444 443 FILEFLAGS_VOLATILE = 1 << 20
445 444
446 445 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 446 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 447 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 448 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 449 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 450 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 451 FILETYPE_OTHER = FILEFLAGS_OTHER
453 452
454 453
455 454 class basicstore:
456 455 '''base class for local repository stores'''
457 456
458 457 def __init__(self, path, vfstype):
459 458 vfs = vfstype(path)
460 459 self.path = vfs.base
461 460 self.createmode = _calcmode(vfs)
462 461 vfs.createmode = self.createmode
463 462 self.rawvfs = vfs
464 463 self.vfs = vfsmod.filtervfs(vfs, encodedir)
465 464 self.opener = self.vfs
466 465
467 466 def join(self, f):
468 467 return self.path + b'/' + encodedir(f)
469 468
470 469 def _walk(self, relpath, recurse):
471 470 '''yields (revlog_type, unencoded, size)'''
472 471 path = self.path
473 472 if relpath:
474 473 path += b'/' + relpath
475 474 striplen = len(self.path) + 1
476 475 l = []
477 476 if self.rawvfs.isdir(path):
478 477 visit = [path]
479 478 readdir = self.rawvfs.readdir
480 479 while visit:
481 480 p = visit.pop()
482 481 for f, kind, st in readdir(p, stat=True):
483 482 fp = p + b'/' + f
484 483 rl_type = is_revlog(f, kind, st)
485 484 if rl_type is not None:
486 485 n = util.pconvert(fp[striplen:])
487 486 l.append((rl_type, decodedir(n), st.st_size))
488 487 elif kind == stat.S_IFDIR and recurse:
489 488 visit.append(fp)
490 489 l.sort()
491 490 return l
492 491
493 492 def changelog(self, trypending, concurrencychecker=None):
494 493 return changelog.changelog(
495 494 self.vfs,
496 495 trypending=trypending,
497 496 concurrencychecker=concurrencychecker,
498 497 )
499 498
500 499 def manifestlog(self, repo, storenarrowmatch):
501 500 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
502 501 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
503 502
504 503 def datafiles(self, matcher=None, undecodable=None):
505 504 """Like walk, but excluding the changelog and root manifest.
506 505
507 506 When [undecodable] is None, revlogs names that can't be
508 507 decoded cause an exception. When it is provided, it should
509 508 be a list and the filenames that can't be decoded are added
510 509 to it instead. This is very rarely needed."""
511 510 files = self._walk(b'data', True) + self._walk(b'meta', True)
512 511 for (t, u, s) in files:
513 512 if t is not None:
514 513 yield (FILEFLAGS_FILELOG | t, u, s)
515 514
516 515 def topfiles(self):
517 516 # yield manifest before changelog
518 517 files = reversed(self._walk(b'', False))
519 518 for (t, u, s) in files:
520 519 if u.startswith(b'00changelog'):
521 520 yield (FILEFLAGS_CHANGELOG | t, u, s)
522 521 elif u.startswith(b'00manifest'):
523 522 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
524 523 else:
525 524 yield (FILETYPE_OTHER | t, u, s)
526 525
527 526 def walk(self, matcher=None):
528 527 """return files related to data storage (ie: revlogs)
529 528
530 529 yields (file_type, unencoded, size)
531 530
532 531 if a matcher is passed, storage files of only those tracked paths
533 532 are passed with matches the matcher
534 533 """
535 534 # yield data files first
536 535 for x in self.datafiles(matcher):
537 536 yield x
538 537 for x in self.topfiles():
539 538 yield x
540 539
541 540 def copylist(self):
542 541 return _data
543 542
544 543 def write(self, tr):
545 544 pass
546 545
547 546 def invalidatecaches(self):
548 547 pass
549 548
550 549 def markremoved(self, fn):
551 550 pass
552 551
553 552 def __contains__(self, path):
554 553 '''Checks if the store contains path'''
555 554 path = b"/".join((b"data", path))
556 555 # file?
557 556 if self.vfs.exists(path + b".i"):
558 557 return True
559 558 # dir?
560 559 if not path.endswith(b"/"):
561 560 path = path + b"/"
562 561 return self.vfs.exists(path)
563 562
564 563
565 564 class encodedstore(basicstore):
566 565 def __init__(self, path, vfstype):
567 566 vfs = vfstype(path + b'/store')
568 567 self.path = vfs.base
569 568 self.createmode = _calcmode(vfs)
570 569 vfs.createmode = self.createmode
571 570 self.rawvfs = vfs
572 571 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
573 572 self.opener = self.vfs
574 573
575 574 # note: topfiles would also need a decode phase. It is just that in
576 575 # practice we do not have any file outside of `data/` that needs encoding.
577 576 # However that might change so we should probably add a test and encoding
578 577 # decoding for it too. see issue6548
579 578
580 579 def datafiles(self, matcher=None, undecodable=None):
581 580 for t, f1, size in super(encodedstore, self).datafiles():
582 581 try:
583 582 f2 = decodefilename(f1)
584 583 except KeyError:
585 584 if undecodable is None:
586 585 msg = _(b'undecodable revlog name %s') % f1
587 586 raise error.StorageError(msg)
588 587 else:
589 588 undecodable.append(f1)
590 589 continue
591 590 if not _matchtrackedpath(f2, matcher):
592 591 continue
593 592 yield t, f2, size
594 593
595 594 def join(self, f):
596 595 return self.path + b'/' + encodefilename(f)
597 596
598 597 def copylist(self):
599 598 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
600 599
601 600
602 601 class fncache:
603 602 # the filename used to be partially encoded
604 603 # hence the encodedir/decodedir dance
605 604 def __init__(self, vfs):
606 605 self.vfs = vfs
607 606 self._ignores = set()
608 607 self.entries = None
609 608 self._dirty = False
610 609 # set of new additions to fncache
611 610 self.addls = set()
612 611
613 612 def ensureloaded(self, warn=None):
614 613 """read the fncache file if not already read.
615 614
616 615 If the file on disk is corrupted, raise. If warn is provided,
617 616 warn and keep going instead."""
618 617 if self.entries is None:
619 618 self._load(warn)
620 619
621 620 def _load(self, warn=None):
622 621 '''fill the entries from the fncache file'''
623 622 self._dirty = False
624 623 try:
625 624 fp = self.vfs(b'fncache', mode=b'rb')
626 625 except IOError:
627 626 # skip nonexistent file
628 627 self.entries = set()
629 628 return
630 629
631 630 self.entries = set()
632 631 chunk = b''
633 632 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
634 633 chunk += c
635 634 try:
636 635 p = chunk.rindex(b'\n')
637 636 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
638 637 chunk = chunk[p + 1 :]
639 638 except ValueError:
640 639 # substring '\n' not found, maybe the entry is bigger than the
641 640 # chunksize, so let's keep iterating
642 641 pass
643 642
644 643 if chunk:
645 644 msg = _(b"fncache does not ends with a newline")
646 645 if warn:
647 646 warn(msg + b'\n')
648 647 else:
649 648 raise error.Abort(
650 649 msg,
651 650 hint=_(
652 651 b"use 'hg debugrebuildfncache' to "
653 652 b"rebuild the fncache"
654 653 ),
655 654 )
656 655 self._checkentries(fp, warn)
657 656 fp.close()
658 657
659 658 def _checkentries(self, fp, warn):
660 659 """make sure there is no empty string in entries"""
661 660 if b'' in self.entries:
662 661 fp.seek(0)
663 662 for n, line in enumerate(fp):
664 663 if not line.rstrip(b'\n'):
665 664 t = _(b'invalid entry in fncache, line %d') % (n + 1)
666 665 if warn:
667 666 warn(t + b'\n')
668 667 else:
669 668 raise error.Abort(t)
670 669
671 670 def write(self, tr):
672 671 if self._dirty:
673 672 assert self.entries is not None
674 673 self.entries = self.entries | self.addls
675 674 self.addls = set()
676 675 tr.addbackup(b'fncache')
677 676 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
678 677 if self.entries:
679 678 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
680 679 fp.close()
681 680 self._dirty = False
682 681 if self.addls:
683 682 # if we have just new entries, let's append them to the fncache
684 683 tr.addbackup(b'fncache')
685 684 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
686 685 if self.addls:
687 686 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
688 687 fp.close()
689 688 self.entries = None
690 689 self.addls = set()
691 690
692 691 def addignore(self, fn):
693 692 self._ignores.add(fn)
694 693
695 694 def add(self, fn):
696 695 if fn in self._ignores:
697 696 return
698 697 if self.entries is None:
699 698 self._load()
700 699 if fn not in self.entries:
701 700 self.addls.add(fn)
702 701
703 702 def remove(self, fn):
704 703 if self.entries is None:
705 704 self._load()
706 705 if fn in self.addls:
707 706 self.addls.remove(fn)
708 707 return
709 708 try:
710 709 self.entries.remove(fn)
711 710 self._dirty = True
712 711 except KeyError:
713 712 pass
714 713
715 714 def __contains__(self, fn):
716 715 if fn in self.addls:
717 716 return True
718 717 if self.entries is None:
719 718 self._load()
720 719 return fn in self.entries
721 720
722 721 def __iter__(self):
723 722 if self.entries is None:
724 723 self._load()
725 724 return iter(self.entries | self.addls)
726 725
727 726
728 727 class _fncachevfs(vfsmod.proxyvfs):
729 728 def __init__(self, vfs, fnc, encode):
730 729 vfsmod.proxyvfs.__init__(self, vfs)
731 730 self.fncache = fnc
732 731 self.encode = encode
733 732
734 733 def __call__(self, path, mode=b'r', *args, **kw):
735 734 encoded = self.encode(path)
736 735 if (
737 736 mode not in (b'r', b'rb')
738 737 and (path.startswith(b'data/') or path.startswith(b'meta/'))
739 738 and revlog_type(path) is not None
740 739 ):
741 740 # do not trigger a fncache load when adding a file that already is
742 741 # known to exist.
743 742 notload = self.fncache.entries is None and self.vfs.exists(encoded)
744 743 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
745 744 # when appending to an existing file, if the file has size zero,
746 745 # it should be considered as missing. Such zero-size files are
747 746 # the result of truncation when a transaction is aborted.
748 747 notload = False
749 748 if not notload:
750 749 self.fncache.add(path)
751 750 return self.vfs(encoded, mode, *args, **kw)
752 751
753 752 def join(self, path):
754 753 if path:
755 754 return self.vfs.join(self.encode(path))
756 755 else:
757 756 return self.vfs.join(path)
758 757
759 758 def register_file(self, path):
760 759 """generic hook point to lets fncache steer its stew"""
761 760 if path.startswith(b'data/') or path.startswith(b'meta/'):
762 761 self.fncache.add(path)
763 762
764 763
765 764 class fncachestore(basicstore):
766 765 def __init__(self, path, vfstype, dotencode):
767 766 if dotencode:
768 767 encode = _pathencode
769 768 else:
770 769 encode = _plainhybridencode
771 770 self.encode = encode
772 771 vfs = vfstype(path + b'/store')
773 772 self.path = vfs.base
774 773 self.pathsep = self.path + b'/'
775 774 self.createmode = _calcmode(vfs)
776 775 vfs.createmode = self.createmode
777 776 self.rawvfs = vfs
778 777 fnc = fncache(vfs)
779 778 self.fncache = fnc
780 779 self.vfs = _fncachevfs(vfs, fnc, encode)
781 780 self.opener = self.vfs
782 781
783 782 def join(self, f):
784 783 return self.pathsep + self.encode(f)
785 784
786 785 def getsize(self, path):
787 786 return self.rawvfs.stat(path).st_size
788 787
789 788 def datafiles(self, matcher=None, undecodable=None):
790 789 for f in sorted(self.fncache):
791 790 if not _matchtrackedpath(f, matcher):
792 791 continue
793 792 ef = self.encode(f)
794 793 t = revlog_type(f)
795 794 if t is None:
796 795 # Note: this should not be in the fncache then…
797 796 #
798 797 # However the fncache might contains such file added by
799 798 # previous version of Mercurial.
800 799 continue
801 800 t |= FILEFLAGS_FILELOG
802 801 try:
803 802 yield t, f, self.getsize(ef)
804 803 except FileNotFoundError:
805 804 pass
806 805
807 806 def copylist(self):
808 807 d = (
809 808 b'bookmarks',
810 809 b'narrowspec',
811 810 b'data',
812 811 b'meta',
813 812 b'dh',
814 813 b'fncache',
815 814 b'phaseroots',
816 815 b'obsstore',
817 816 b'00manifest.d',
818 817 b'00manifest.i',
819 818 b'00changelog.d',
820 819 b'00changelog.i',
821 820 b'requires',
822 821 )
823 822 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
824 823
825 824 def write(self, tr):
826 825 self.fncache.write(tr)
827 826
828 827 def invalidatecaches(self):
829 828 self.fncache.entries = None
830 829 self.fncache.addls = set()
831 830
832 831 def markremoved(self, fn):
833 832 self.fncache.remove(fn)
834 833
835 834 def _exists(self, f):
836 835 ef = self.encode(f)
837 836 try:
838 837 self.getsize(ef)
839 838 return True
840 839 except FileNotFoundError:
841 840 return False
842 841
843 842 def __contains__(self, path):
844 843 '''Checks if the store contains path'''
845 844 path = b"/".join((b"data", path))
846 845 # check for files (exact match)
847 846 e = path + b'.i'
848 847 if e in self.fncache and self._exists(e):
849 848 return True
850 849 # now check for directories (prefix match)
851 850 if not path.endswith(b'/'):
852 851 path += b'/'
853 852 for e in self.fncache:
854 853 if e.startswith(path) and self._exists(e):
855 854 return True
856 855 return False
General Comments 0
You need to be logged in to leave comments. Login now