##// END OF EJS Templates
py3: fix for Python 3.12 emitting SyntaxWarning on invalid escape sequences...
Mads Kiilerich -
r51245:4be9ecc9 stable
parent child Browse files
Show More
@@ -1,848 +1,848
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13
14 14 from .i18n import _
15 15 from .pycompat import getattr
16 16 from .node import hex
17 17 from . import (
18 18 changelog,
19 19 error,
20 20 manifest,
21 21 policy,
22 22 pycompat,
23 23 util,
24 24 vfs as vfsmod,
25 25 )
26 26 from .utils import hashutil
27 27
28 28 parsers = policy.importmod('parsers')
29 29 # how much bytes should be read from fncache in one read
30 30 # It is done to prevent loading large fncache files into memory
31 31 fncache_chunksize = 10 ** 6
32 32
33 33
34 34 def _matchtrackedpath(path, matcher):
35 35 """parses a fncache entry and returns whether the entry is tracking a path
36 36 matched by matcher or not.
37 37
38 38 If matcher is None, returns True"""
39 39
40 40 if matcher is None:
41 41 return True
42 42 path = decodedir(path)
43 43 if path.startswith(b'data/'):
44 44 return matcher(path[len(b'data/') : -len(b'.i')])
45 45 elif path.startswith(b'meta/'):
46 46 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47 47
48 48 raise error.ProgrammingError(b"cannot decode path %s" % path)
49 49
50 50
51 51 # This avoids a collision between a file named foo and a dir named
52 52 # foo.i or foo.d
53 53 def _encodedir(path):
54 54 """
55 55 >>> _encodedir(b'data/foo.i')
56 56 'data/foo.i'
57 57 >>> _encodedir(b'data/foo.i/bla.i')
58 58 'data/foo.i.hg/bla.i'
59 59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 60 'data/foo.i.hg.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 63 """
64 64 return (
65 65 path.replace(b".hg/", b".hg.hg/")
66 66 .replace(b".i/", b".i.hg/")
67 67 .replace(b".d/", b".d.hg/")
68 68 )
69 69
70 70
71 71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 72
73 73
74 74 def decodedir(path):
75 75 """
76 76 >>> decodedir(b'data/foo.i')
77 77 'data/foo.i'
78 78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 79 'data/foo.i/bla.i'
80 80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 81 'data/foo.i.hg/bla.i'
82 82 """
83 83 if b".hg/" not in path:
84 84 return path
85 85 return (
86 86 path.replace(b".d.hg/", b".d/")
87 87 .replace(b".i.hg/", b".i/")
88 88 .replace(b".hg.hg/", b".hg/")
89 89 )
90 90
91 91
92 92 def _reserved():
93 93 """characters that are problematic for filesystems
94 94
95 95 * ascii escapes (0..31)
96 96 * ascii hi (126..255)
97 97 * windows specials
98 98
99 99 these characters will be escaped by encodefunctions
100 100 """
101 101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 102 for x in range(32):
103 103 yield x
104 104 for x in range(126, 256):
105 105 yield x
106 106 for x in winreserved:
107 107 yield x
108 108
109 109
110 110 def _buildencodefun():
111 111 """
112 112 >>> enc, dec = _buildencodefun()
113 113
114 114 >>> enc(b'nothing/special.txt')
115 115 'nothing/special.txt'
116 116 >>> dec(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118
119 119 >>> enc(b'HELLO')
120 120 '_h_e_l_l_o'
121 121 >>> dec(b'_h_e_l_l_o')
122 122 'HELLO'
123 123
124 124 >>> enc(b'hello:world?')
125 125 'hello~3aworld~3f'
126 126 >>> dec(b'hello~3aworld~3f')
127 127 'hello:world?'
128 128
129 129 >>> enc(b'the\\x07quick\\xADshot')
130 130 'the~07quick~adshot'
131 131 >>> dec(b'the~07quick~adshot')
132 132 'the\\x07quick\\xadshot'
133 133 """
134 134 e = b'_'
135 135 xchr = pycompat.bytechr
136 136 asciistr = list(map(xchr, range(127)))
137 137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 138
139 139 cmap = {x: x for x in asciistr}
140 140 for x in _reserved():
141 141 cmap[xchr(x)] = b"~%02x" % x
142 142 for x in capitals + [ord(e)]:
143 143 cmap[xchr(x)] = e + xchr(x).lower()
144 144
145 145 dmap = {}
146 146 for k, v in cmap.items():
147 147 dmap[v] = k
148 148
149 149 def decode(s):
150 150 i = 0
151 151 while i < len(s):
152 152 for l in range(1, 4):
153 153 try:
154 154 yield dmap[s[i : i + l]]
155 155 i += l
156 156 break
157 157 except KeyError:
158 158 pass
159 159 else:
160 160 raise KeyError
161 161
162 162 return (
163 163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 164 lambda s: b''.join(list(decode(s))),
165 165 )
166 166
167 167
168 168 _encodefname, _decodefname = _buildencodefun()
169 169
170 170
171 171 def encodefilename(s):
172 172 """
173 173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 175 """
176 176 return _encodefname(encodedir(s))
177 177
178 178
179 179 def decodefilename(s):
180 180 """
181 181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 183 """
184 184 return decodedir(_decodefname(s))
185 185
186 186
187 187 def _buildlowerencodefun():
188 188 """
189 189 >>> f = _buildlowerencodefun()
190 190 >>> f(b'nothing/special.txt')
191 191 'nothing/special.txt'
192 192 >>> f(b'HELLO')
193 193 'hello'
194 194 >>> f(b'hello:world?')
195 195 'hello~3aworld~3f'
196 196 >>> f(b'the\\x07quick\\xADshot')
197 197 'the~07quick~adshot'
198 198 """
199 199 xchr = pycompat.bytechr
200 200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 201 for x in _reserved():
202 202 cmap[xchr(x)] = b"~%02x" % x
203 203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 204 cmap[xchr(x)] = xchr(x).lower()
205 205
206 206 def lowerencode(s):
207 207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 208
209 209 return lowerencode
210 210
211 211
212 212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 213
214 214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 217
218 218
219 219 def _auxencode(path, dotencode):
220 220 """
221 221 Encodes filenames containing names reserved by Windows or which end in
222 222 period or space. Does not touch other single reserved characters c.
223 223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 224 Additionally encodes space or period at the beginning, if dotencode is
225 225 True. Parameter path is assumed to be all lowercase.
226 226 A segment only needs encoding if a reserved name appears as a
227 227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 228 doesn't need encoding.
229 229
230 230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 231 >>> _auxencode(s.split(b'/'), True)
232 232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 234 >>> _auxencode(s.split(b'/'), False)
235 235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 236 >>> _auxencode([b'foo. '], True)
237 237 ['foo.~20']
238 238 >>> _auxencode([b' .foo'], True)
239 239 ['~20.foo']
240 240 """
241 241 for i, n in enumerate(path):
242 242 if not n:
243 243 continue
244 244 if dotencode and n[0] in b'. ':
245 245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 246 path[i] = n
247 247 else:
248 248 l = n.find(b'.')
249 249 if l == -1:
250 250 l = len(n)
251 251 if (l == 3 and n[:3] in _winres3) or (
252 252 l == 4
253 253 and n[3:4] <= b'9'
254 254 and n[3:4] >= b'1'
255 255 and n[:3] in _winres4
256 256 ):
257 257 # encode third letter ('aux' -> 'au~78')
258 258 ec = b"~%02x" % ord(n[2:3])
259 259 n = n[0:2] + ec + n[3:]
260 260 path[i] = n
261 261 if n[-1] in b'. ':
262 262 # encode last period or space ('foo...' -> 'foo..~2e')
263 263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 264 return path
265 265
266 266
267 267 _maxstorepathlen = 120
268 268 _dirprefixlen = 8
269 269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 270
271 271
272 272 def _hashencode(path, dotencode):
273 273 digest = hex(hashutil.sha1(path).digest())
274 274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 275 parts = _auxencode(le, dotencode)
276 276 basename = parts[-1]
277 277 _root, ext = os.path.splitext(basename)
278 278 sdirs = []
279 279 sdirslen = 0
280 280 for p in parts[:-1]:
281 281 d = p[:_dirprefixlen]
282 282 if d[-1] in b'. ':
283 283 # Windows can't access dirs ending in period or space
284 284 d = d[:-1] + b'_'
285 285 if sdirslen == 0:
286 286 t = len(d)
287 287 else:
288 288 t = sdirslen + 1 + len(d)
289 289 if t > _maxshortdirslen:
290 290 break
291 291 sdirs.append(d)
292 292 sdirslen = t
293 293 dirs = b'/'.join(sdirs)
294 294 if len(dirs) > 0:
295 295 dirs += b'/'
296 296 res = b'dh/' + dirs + digest + ext
297 297 spaceleft = _maxstorepathlen - len(res)
298 298 if spaceleft > 0:
299 299 filler = basename[:spaceleft]
300 300 res = b'dh/' + dirs + filler + digest + ext
301 301 return res
302 302
303 303
304 304 def _hybridencode(path, dotencode):
305 305 """encodes path with a length limit
306 306
307 307 Encodes all paths that begin with 'data/', according to the following.
308 308
309 309 Default encoding (reversible):
310 310
311 311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 312 characters are encoded as '~xx', where xx is the two digit hex code
313 313 of the character (see encodefilename).
314 314 Relevant path components consisting of Windows reserved filenames are
315 315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 316
317 317 Hashed encoding (not reversible):
318 318
319 319 If the default-encoded path is longer than _maxstorepathlen, a
320 320 non-reversible hybrid hashing of the path is done instead.
321 321 This encoding uses up to _dirprefixlen characters of all directory
322 322 levels of the lowerencoded path, but not more levels than can fit into
323 323 _maxshortdirslen.
324 324 Then follows the filler followed by the sha digest of the full path.
325 325 The filler is the beginning of the basename of the lowerencoded path
326 326 (the basename is everything after the last path separator). The filler
327 327 is as long as possible, filling in characters from the basename until
328 328 the encoded path has _maxstorepathlen characters (or all chars of the
329 329 basename have been taken).
330 330 The extension (e.g. '.i' or '.d') is preserved.
331 331
332 332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 333 encoding was used.
334 334 """
335 335 path = encodedir(path)
336 336 ef = _encodefname(path).split(b'/')
337 337 res = b'/'.join(_auxencode(ef, dotencode))
338 338 if len(res) > _maxstorepathlen:
339 339 res = _hashencode(path, dotencode)
340 340 return res
341 341
342 342
343 343 def _pathencode(path):
344 344 de = encodedir(path)
345 345 if len(path) > _maxstorepathlen:
346 346 return _hashencode(de, True)
347 347 ef = _encodefname(de).split(b'/')
348 348 res = b'/'.join(_auxencode(ef, True))
349 349 if len(res) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 return res
352 352
353 353
354 354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 355
356 356
357 357 def _plainhybridencode(f):
358 358 return _hybridencode(f, False)
359 359
360 360
361 361 def _calcmode(vfs):
362 362 try:
363 363 # files in .hg/ will be created using this mode
364 364 mode = vfs.stat().st_mode
365 365 # avoid some useless chmods
366 366 if (0o777 & ~util.umask) == (0o777 & mode):
367 367 mode = None
368 368 except OSError:
369 369 mode = None
370 370 return mode
371 371
372 372
373 373 _data = [
374 374 b'bookmarks',
375 375 b'narrowspec',
376 376 b'data',
377 377 b'meta',
378 378 b'00manifest.d',
379 379 b'00manifest.i',
380 380 b'00changelog.d',
381 381 b'00changelog.i',
382 382 b'phaseroots',
383 383 b'obsstore',
384 384 b'requires',
385 385 ]
386 386
387 387 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
388 388 REVLOG_FILES_OTHER_EXT = (
389 389 b'.idx',
390 390 b'.d',
391 391 b'.dat',
392 392 b'.n',
393 393 b'.nd',
394 394 b'.sda',
395 395 b'd.tmpcensored',
396 396 )
397 397 # files that are "volatile" and might change between listing and streaming
398 398 #
399 399 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 400 # deleted.
401 401 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 402
403 403 # some exception to the above matching
404 404 #
405 405 # XXX This is currently not in use because of issue6542
406 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
406 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 407
408 408
409 409 def is_revlog(f, kind, st):
410 410 if kind != stat.S_IFREG:
411 411 return None
412 412 return revlog_type(f)
413 413
414 414
415 415 def revlog_type(f):
416 416 # XXX we need to filter `undo.` created by the transaction here, however
417 417 # being naive about it also filter revlog for `undo.*` files, leading to
418 418 # issue6542. So we no longer use EXCLUDED.
419 419 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 420 return FILEFLAGS_REVLOG_MAIN
421 421 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 422 t = FILETYPE_FILELOG_OTHER
423 423 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 424 t |= FILEFLAGS_VOLATILE
425 425 return t
426 426 return None
427 427
428 428
429 429 # the file is part of changelog data
430 430 FILEFLAGS_CHANGELOG = 1 << 13
431 431 # the file is part of manifest data
432 432 FILEFLAGS_MANIFESTLOG = 1 << 12
433 433 # the file is part of filelog data
434 434 FILEFLAGS_FILELOG = 1 << 11
435 435 # file that are not directly part of a revlog
436 436 FILEFLAGS_OTHER = 1 << 10
437 437
438 438 # the main entry point for a revlog
439 439 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 440 # a secondary file for a revlog
441 441 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 442
443 443 # files that are "volatile" and might change between listing and streaming
444 444 FILEFLAGS_VOLATILE = 1 << 20
445 445
446 446 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 447 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 448 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 449 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 450 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 451 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 452 FILETYPE_OTHER = FILEFLAGS_OTHER
453 453
454 454
455 455 class basicstore:
456 456 '''base class for local repository stores'''
457 457
458 458 def __init__(self, path, vfstype):
459 459 vfs = vfstype(path)
460 460 self.path = vfs.base
461 461 self.createmode = _calcmode(vfs)
462 462 vfs.createmode = self.createmode
463 463 self.rawvfs = vfs
464 464 self.vfs = vfsmod.filtervfs(vfs, encodedir)
465 465 self.opener = self.vfs
466 466
467 467 def join(self, f):
468 468 return self.path + b'/' + encodedir(f)
469 469
470 470 def _walk(self, relpath, recurse):
471 471 '''yields (revlog_type, unencoded, size)'''
472 472 path = self.path
473 473 if relpath:
474 474 path += b'/' + relpath
475 475 striplen = len(self.path) + 1
476 476 l = []
477 477 if self.rawvfs.isdir(path):
478 478 visit = [path]
479 479 readdir = self.rawvfs.readdir
480 480 while visit:
481 481 p = visit.pop()
482 482 for f, kind, st in readdir(p, stat=True):
483 483 fp = p + b'/' + f
484 484 rl_type = is_revlog(f, kind, st)
485 485 if rl_type is not None:
486 486 n = util.pconvert(fp[striplen:])
487 487 l.append((rl_type, decodedir(n), st.st_size))
488 488 elif kind == stat.S_IFDIR and recurse:
489 489 visit.append(fp)
490 490 l.sort()
491 491 return l
492 492
493 493 def changelog(self, trypending, concurrencychecker=None):
494 494 return changelog.changelog(
495 495 self.vfs,
496 496 trypending=trypending,
497 497 concurrencychecker=concurrencychecker,
498 498 )
499 499
500 500 def manifestlog(self, repo, storenarrowmatch):
501 501 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
502 502 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
503 503
504 504 def datafiles(self, matcher=None, undecodable=None):
505 505 """Like walk, but excluding the changelog and root manifest.
506 506
507 507 When [undecodable] is None, revlogs names that can't be
508 508 decoded cause an exception. When it is provided, it should
509 509 be a list and the filenames that can't be decoded are added
510 510 to it instead. This is very rarely needed."""
511 511 files = self._walk(b'data', True) + self._walk(b'meta', True)
512 512 for (t, u, s) in files:
513 513 yield (FILEFLAGS_FILELOG | t, u, s)
514 514
515 515 def topfiles(self):
516 516 # yield manifest before changelog
517 517 files = reversed(self._walk(b'', False))
518 518 for (t, u, s) in files:
519 519 if u.startswith(b'00changelog'):
520 520 yield (FILEFLAGS_CHANGELOG | t, u, s)
521 521 elif u.startswith(b'00manifest'):
522 522 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
523 523 else:
524 524 yield (FILETYPE_OTHER | t, u, s)
525 525
526 526 def walk(self, matcher=None):
527 527 """return file related to data storage (ie: revlogs)
528 528
529 529 yields (file_type, unencoded, size)
530 530
531 531 if a matcher is passed, storage files of only those tracked paths
532 532 are passed with matches the matcher
533 533 """
534 534 # yield data files first
535 535 for x in self.datafiles(matcher):
536 536 yield x
537 537 for x in self.topfiles():
538 538 yield x
539 539
540 540 def copylist(self):
541 541 return _data
542 542
543 543 def write(self, tr):
544 544 pass
545 545
546 546 def invalidatecaches(self):
547 547 pass
548 548
549 549 def markremoved(self, fn):
550 550 pass
551 551
552 552 def __contains__(self, path):
553 553 '''Checks if the store contains path'''
554 554 path = b"/".join((b"data", path))
555 555 # file?
556 556 if self.vfs.exists(path + b".i"):
557 557 return True
558 558 # dir?
559 559 if not path.endswith(b"/"):
560 560 path = path + b"/"
561 561 return self.vfs.exists(path)
562 562
563 563
564 564 class encodedstore(basicstore):
565 565 def __init__(self, path, vfstype):
566 566 vfs = vfstype(path + b'/store')
567 567 self.path = vfs.base
568 568 self.createmode = _calcmode(vfs)
569 569 vfs.createmode = self.createmode
570 570 self.rawvfs = vfs
571 571 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
572 572 self.opener = self.vfs
573 573
574 574 # note: topfiles would also need a decode phase. It is just that in
575 575 # practice we do not have any file outside of `data/` that needs encoding.
576 576 # However that might change so we should probably add a test and encoding
577 577 # decoding for it too. see issue6548
578 578
579 579 def datafiles(self, matcher=None, undecodable=None):
580 580 for t, f1, size in super(encodedstore, self).datafiles():
581 581 try:
582 582 f2 = decodefilename(f1)
583 583 except KeyError:
584 584 if undecodable is None:
585 585 msg = _(b'undecodable revlog name %s') % f1
586 586 raise error.StorageError(msg)
587 587 else:
588 588 undecodable.append(f1)
589 589 continue
590 590 if not _matchtrackedpath(f2, matcher):
591 591 continue
592 592 yield t, f2, size
593 593
594 594 def join(self, f):
595 595 return self.path + b'/' + encodefilename(f)
596 596
597 597 def copylist(self):
598 598 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
599 599
600 600
601 601 class fncache:
602 602 # the filename used to be partially encoded
603 603 # hence the encodedir/decodedir dance
604 604 def __init__(self, vfs):
605 605 self.vfs = vfs
606 606 self._ignores = set()
607 607 self.entries = None
608 608 self._dirty = False
609 609 # set of new additions to fncache
610 610 self.addls = set()
611 611
612 612 def ensureloaded(self, warn=None):
613 613 """read the fncache file if not already read.
614 614
615 615 If the file on disk is corrupted, raise. If warn is provided,
616 616 warn and keep going instead."""
617 617 if self.entries is None:
618 618 self._load(warn)
619 619
620 620 def _load(self, warn=None):
621 621 '''fill the entries from the fncache file'''
622 622 self._dirty = False
623 623 try:
624 624 fp = self.vfs(b'fncache', mode=b'rb')
625 625 except IOError:
626 626 # skip nonexistent file
627 627 self.entries = set()
628 628 return
629 629
630 630 self.entries = set()
631 631 chunk = b''
632 632 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
633 633 chunk += c
634 634 try:
635 635 p = chunk.rindex(b'\n')
636 636 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
637 637 chunk = chunk[p + 1 :]
638 638 except ValueError:
639 639 # substring '\n' not found, maybe the entry is bigger than the
640 640 # chunksize, so let's keep iterating
641 641 pass
642 642
643 643 if chunk:
644 644 msg = _(b"fncache does not ends with a newline")
645 645 if warn:
646 646 warn(msg + b'\n')
647 647 else:
648 648 raise error.Abort(
649 649 msg,
650 650 hint=_(
651 651 b"use 'hg debugrebuildfncache' to "
652 652 b"rebuild the fncache"
653 653 ),
654 654 )
655 655 self._checkentries(fp, warn)
656 656 fp.close()
657 657
658 658 def _checkentries(self, fp, warn):
659 659 """make sure there is no empty string in entries"""
660 660 if b'' in self.entries:
661 661 fp.seek(0)
662 662 for n, line in enumerate(fp):
663 663 if not line.rstrip(b'\n'):
664 664 t = _(b'invalid entry in fncache, line %d') % (n + 1)
665 665 if warn:
666 666 warn(t + b'\n')
667 667 else:
668 668 raise error.Abort(t)
669 669
670 670 def write(self, tr):
671 671 if self._dirty:
672 672 assert self.entries is not None
673 673 self.entries = self.entries | self.addls
674 674 self.addls = set()
675 675 tr.addbackup(b'fncache')
676 676 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
677 677 if self.entries:
678 678 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
679 679 fp.close()
680 680 self._dirty = False
681 681 if self.addls:
682 682 # if we have just new entries, let's append them to the fncache
683 683 tr.addbackup(b'fncache')
684 684 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
685 685 if self.addls:
686 686 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
687 687 fp.close()
688 688 self.entries = None
689 689 self.addls = set()
690 690
691 691 def addignore(self, fn):
692 692 self._ignores.add(fn)
693 693
694 694 def add(self, fn):
695 695 if fn in self._ignores:
696 696 return
697 697 if self.entries is None:
698 698 self._load()
699 699 if fn not in self.entries:
700 700 self.addls.add(fn)
701 701
702 702 def remove(self, fn):
703 703 if self.entries is None:
704 704 self._load()
705 705 if fn in self.addls:
706 706 self.addls.remove(fn)
707 707 return
708 708 try:
709 709 self.entries.remove(fn)
710 710 self._dirty = True
711 711 except KeyError:
712 712 pass
713 713
714 714 def __contains__(self, fn):
715 715 if fn in self.addls:
716 716 return True
717 717 if self.entries is None:
718 718 self._load()
719 719 return fn in self.entries
720 720
721 721 def __iter__(self):
722 722 if self.entries is None:
723 723 self._load()
724 724 return iter(self.entries | self.addls)
725 725
726 726
727 727 class _fncachevfs(vfsmod.proxyvfs):
728 728 def __init__(self, vfs, fnc, encode):
729 729 vfsmod.proxyvfs.__init__(self, vfs)
730 730 self.fncache = fnc
731 731 self.encode = encode
732 732
733 733 def __call__(self, path, mode=b'r', *args, **kw):
734 734 encoded = self.encode(path)
735 735 if mode not in (b'r', b'rb') and (
736 736 path.startswith(b'data/') or path.startswith(b'meta/')
737 737 ):
738 738 # do not trigger a fncache load when adding a file that already is
739 739 # known to exist.
740 740 notload = self.fncache.entries is None and self.vfs.exists(encoded)
741 741 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
742 742 # when appending to an existing file, if the file has size zero,
743 743 # it should be considered as missing. Such zero-size files are
744 744 # the result of truncation when a transaction is aborted.
745 745 notload = False
746 746 if not notload:
747 747 self.fncache.add(path)
748 748 return self.vfs(encoded, mode, *args, **kw)
749 749
750 750 def join(self, path):
751 751 if path:
752 752 return self.vfs.join(self.encode(path))
753 753 else:
754 754 return self.vfs.join(path)
755 755
756 756 def register_file(self, path):
757 757 """generic hook point to lets fncache steer its stew"""
758 758 if path.startswith(b'data/') or path.startswith(b'meta/'):
759 759 self.fncache.add(path)
760 760
761 761
762 762 class fncachestore(basicstore):
763 763 def __init__(self, path, vfstype, dotencode):
764 764 if dotencode:
765 765 encode = _pathencode
766 766 else:
767 767 encode = _plainhybridencode
768 768 self.encode = encode
769 769 vfs = vfstype(path + b'/store')
770 770 self.path = vfs.base
771 771 self.pathsep = self.path + b'/'
772 772 self.createmode = _calcmode(vfs)
773 773 vfs.createmode = self.createmode
774 774 self.rawvfs = vfs
775 775 fnc = fncache(vfs)
776 776 self.fncache = fnc
777 777 self.vfs = _fncachevfs(vfs, fnc, encode)
778 778 self.opener = self.vfs
779 779
780 780 def join(self, f):
781 781 return self.pathsep + self.encode(f)
782 782
783 783 def getsize(self, path):
784 784 return self.rawvfs.stat(path).st_size
785 785
786 786 def datafiles(self, matcher=None, undecodable=None):
787 787 for f in sorted(self.fncache):
788 788 if not _matchtrackedpath(f, matcher):
789 789 continue
790 790 ef = self.encode(f)
791 791 try:
792 792 t = revlog_type(f)
793 793 assert t is not None, f
794 794 t |= FILEFLAGS_FILELOG
795 795 yield t, f, self.getsize(ef)
796 796 except FileNotFoundError:
797 797 pass
798 798
799 799 def copylist(self):
800 800 d = (
801 801 b'bookmarks',
802 802 b'narrowspec',
803 803 b'data',
804 804 b'meta',
805 805 b'dh',
806 806 b'fncache',
807 807 b'phaseroots',
808 808 b'obsstore',
809 809 b'00manifest.d',
810 810 b'00manifest.i',
811 811 b'00changelog.d',
812 812 b'00changelog.i',
813 813 b'requires',
814 814 )
815 815 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
816 816
817 817 def write(self, tr):
818 818 self.fncache.write(tr)
819 819
820 820 def invalidatecaches(self):
821 821 self.fncache.entries = None
822 822 self.fncache.addls = set()
823 823
824 824 def markremoved(self, fn):
825 825 self.fncache.remove(fn)
826 826
827 827 def _exists(self, f):
828 828 ef = self.encode(f)
829 829 try:
830 830 self.getsize(ef)
831 831 return True
832 832 except FileNotFoundError:
833 833 return False
834 834
835 835 def __contains__(self, path):
836 836 '''Checks if the store contains path'''
837 837 path = b"/".join((b"data", path))
838 838 # check for files (exact match)
839 839 e = path + b'.i'
840 840 if e in self.fncache and self._exists(e):
841 841 return True
842 842 # now check for directories (prefix match)
843 843 if not path.endswith(b'/'):
844 844 path += b'/'
845 845 for e in self.fncache:
846 846 if e.startswith(path) and self._exists(e):
847 847 return True
848 848 return False
General Comments 0
You need to be logged in to leave comments. Login now