##// END OF EJS Templates
store: change `_walk` return to `(filename, (type, size))`...
marmoute -
r51371:1c0244a8 default
parent child Browse files
Show More
@@ -1,967 +1,971 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # files that are "volatile" and might change between listing and streaming
399 399 #
400 400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 401 # deleted.
402 402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403 403
404 404 # some exception to the above matching
405 405 #
406 406 # XXX This is currently not in use because of issue6542
407 407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408 408
409 409
410 410 def is_revlog(f, kind, st):
411 411 if kind != stat.S_IFREG:
412 412 return None
413 413 return revlog_type(f)
414 414
415 415
416 416 def revlog_type(f):
417 417 # XXX we need to filter `undo.` created by the transaction here, however
418 418 # being naive about it also filter revlog for `undo.*` files, leading to
419 419 # issue6542. So we no longer use EXCLUDED.
420 420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 421 return FILEFLAGS_REVLOG_MAIN
422 422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 423 t = FILETYPE_FILELOG_OTHER
424 424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 425 t |= FILEFLAGS_VOLATILE
426 426 return t
427 427 return None
428 428
429 429
430 430 # the file is part of changelog data
431 431 FILEFLAGS_CHANGELOG = 1 << 13
432 432 # the file is part of manifest data
433 433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 434 # the file is part of filelog data
435 435 FILEFLAGS_FILELOG = 1 << 11
436 436 # file that are not directly part of a revlog
437 437 FILEFLAGS_OTHER = 1 << 10
438 438
439 439 # the main entry point for a revlog
440 440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 441 # a secondary file for a revlog
442 442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443 443
444 444 # files that are "volatile" and might change between listing and streaming
445 445 FILEFLAGS_VOLATILE = 1 << 20
446 446
447 447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 453 FILETYPE_OTHER = FILEFLAGS_OTHER
454 454
455 455
456 456 @attr.s(slots=True, init=False)
457 457 class BaseStoreEntry:
458 458 """An entry in the store
459 459
460 460 This is returned by `store.walk` and represent some data in the store."""
461 461
462 462 unencoded_path = attr.ib()
463 463 _is_volatile = attr.ib(default=False)
464 464 _file_size = attr.ib(default=None)
465 465
466 466 def __init__(
467 467 self,
468 468 unencoded_path,
469 469 is_volatile=False,
470 470 file_size=None,
471 471 ):
472 472 self.unencoded_path = unencoded_path
473 473 self._is_volatile = is_volatile
474 474 self._file_size = file_size
475 475
476 476 def files(self):
477 477 return [
478 478 StoreFile(
479 479 unencoded_path=self.unencoded_path,
480 480 file_size=self._file_size,
481 481 is_volatile=self._is_volatile,
482 482 )
483 483 ]
484 484
485 485
486 486 @attr.s(slots=True, init=False)
487 487 class SimpleStoreEntry(BaseStoreEntry):
488 488 """A generic entry in the store"""
489 489
490 490 is_revlog = False
491 491
492 492
493 493 @attr.s(slots=True, init=False)
494 494 class RevlogStoreEntry(BaseStoreEntry):
495 495 """A revlog entry in the store"""
496 496
497 497 is_revlog = True
498 498 revlog_type = attr.ib(default=None)
499 499 is_revlog_main = attr.ib(default=None)
500 500
501 501 def __init__(
502 502 self,
503 503 unencoded_path,
504 504 revlog_type,
505 505 is_revlog_main=False,
506 506 is_volatile=False,
507 507 file_size=None,
508 508 ):
509 509 super().__init__(
510 510 unencoded_path=unencoded_path,
511 511 is_volatile=is_volatile,
512 512 file_size=file_size,
513 513 )
514 514 self.revlog_type = revlog_type
515 515 self.is_revlog_main = is_revlog_main
516 516
517 517
518 518 @attr.s(slots=True)
519 519 class StoreFile:
520 520 """a file matching an entry"""
521 521
522 522 unencoded_path = attr.ib()
523 523 _file_size = attr.ib(default=False)
524 524 is_volatile = attr.ib(default=False)
525 525
526 526 def file_size(self, vfs):
527 527 if self._file_size is not None:
528 528 return self._file_size
529 529 try:
530 530 return vfs.stat(self.unencoded_path).st_size
531 531 except FileNotFoundError:
532 532 return 0
533 533
534 534
535 535 class basicstore:
536 536 '''base class for local repository stores'''
537 537
538 538 def __init__(self, path, vfstype):
539 539 vfs = vfstype(path)
540 540 self.path = vfs.base
541 541 self.createmode = _calcmode(vfs)
542 542 vfs.createmode = self.createmode
543 543 self.rawvfs = vfs
544 544 self.vfs = vfsmod.filtervfs(vfs, encodedir)
545 545 self.opener = self.vfs
546 546
547 547 def join(self, f):
548 548 return self.path + b'/' + encodedir(f)
549 549
550 550 def _walk(self, relpath, recurse):
551 551 '''yields (revlog_type, unencoded, size)'''
552 552 path = self.path
553 553 if relpath:
554 554 path += b'/' + relpath
555 555 striplen = len(self.path) + 1
556 556 l = []
557 557 if self.rawvfs.isdir(path):
558 558 visit = [path]
559 559 readdir = self.rawvfs.readdir
560 560 while visit:
561 561 p = visit.pop()
562 562 for f, kind, st in readdir(p, stat=True):
563 563 fp = p + b'/' + f
564 564 rl_type = is_revlog(f, kind, st)
565 565 if rl_type is not None:
566 566 n = util.pconvert(fp[striplen:])
567 l.append((rl_type, decodedir(n), st.st_size))
567 l.append((decodedir(n), (rl_type, st.st_size)))
568 568 elif kind == stat.S_IFDIR and recurse:
569 569 visit.append(fp)
570
570 571 l.sort()
571 572 return l
572 573
573 574 def changelog(self, trypending, concurrencychecker=None):
574 575 return changelog.changelog(
575 576 self.vfs,
576 577 trypending=trypending,
577 578 concurrencychecker=concurrencychecker,
578 579 )
579 580
580 581 def manifestlog(self, repo, storenarrowmatch):
581 582 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
582 583 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
583 584
584 585 def datafiles(
585 586 self, matcher=None, undecodable=None
586 587 ) -> Generator[BaseStoreEntry, None, None]:
587 588 """Like walk, but excluding the changelog and root manifest.
588 589
589 590 When [undecodable] is None, revlogs names that can't be
590 591 decoded cause an exception. When it is provided, it should
591 592 be a list and the filenames that can't be decoded are added
592 593 to it instead. This is very rarely needed."""
593 594 files = self._walk(b'data', True) + self._walk(b'meta', True)
594 for (t, u, s) in files:
595 for u, (t, s) in files:
595 596 if t is not None:
596 597 yield RevlogStoreEntry(
597 598 unencoded_path=u,
598 599 revlog_type=FILEFLAGS_FILELOG,
599 600 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
600 601 is_volatile=bool(t & FILEFLAGS_VOLATILE),
601 602 file_size=s,
602 603 )
603 604
604 605 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
605 606 # yield manifest before changelog
606 files = reversed(self._walk(b'', False))
607 for (t, u, s) in files:
607 files = self._walk(b'', False)
608 # key is (type, path) (keeping ordering so we get 00changelog.i last)
609 type_key = lambda x: (x[1][0], x[0])
610 files = sorted(files, reverse=True, key=type_key)
611 for u, (t, s) in files:
608 612 if u.startswith(b'00changelog'):
609 613 yield RevlogStoreEntry(
610 614 unencoded_path=u,
611 615 revlog_type=FILEFLAGS_CHANGELOG,
612 616 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
613 617 is_volatile=bool(t & FILEFLAGS_VOLATILE),
614 618 file_size=s,
615 619 )
616 620 elif u.startswith(b'00manifest'):
617 621 yield RevlogStoreEntry(
618 622 unencoded_path=u,
619 623 revlog_type=FILEFLAGS_MANIFESTLOG,
620 624 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
621 625 is_volatile=bool(t & FILEFLAGS_VOLATILE),
622 626 file_size=s,
623 627 )
624 628 else:
625 629 yield SimpleStoreEntry(
626 630 unencoded_path=u,
627 631 is_volatile=bool(t & FILEFLAGS_VOLATILE),
628 632 file_size=s,
629 633 )
630 634
631 635 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
632 636 """return files related to data storage (ie: revlogs)
633 637
634 638 yields (file_type, unencoded, size)
635 639
636 640 if a matcher is passed, storage files of only those tracked paths
637 641 are passed with matches the matcher
638 642 """
639 643 # yield data files first
640 644 for x in self.datafiles(matcher):
641 645 yield x
642 646 for x in self.topfiles():
643 647 yield x
644 648
645 649 def copylist(self):
646 650 return _data
647 651
648 652 def write(self, tr):
649 653 pass
650 654
651 655 def invalidatecaches(self):
652 656 pass
653 657
654 658 def markremoved(self, fn):
655 659 pass
656 660
657 661 def __contains__(self, path):
658 662 '''Checks if the store contains path'''
659 663 path = b"/".join((b"data", path))
660 664 # file?
661 665 if self.vfs.exists(path + b".i"):
662 666 return True
663 667 # dir?
664 668 if not path.endswith(b"/"):
665 669 path = path + b"/"
666 670 return self.vfs.exists(path)
667 671
668 672
669 673 class encodedstore(basicstore):
670 674 def __init__(self, path, vfstype):
671 675 vfs = vfstype(path + b'/store')
672 676 self.path = vfs.base
673 677 self.createmode = _calcmode(vfs)
674 678 vfs.createmode = self.createmode
675 679 self.rawvfs = vfs
676 680 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
677 681 self.opener = self.vfs
678 682
679 683 # note: topfiles would also need a decode phase. It is just that in
680 684 # practice we do not have any file outside of `data/` that needs encoding.
681 685 # However that might change so we should probably add a test and encoding
682 686 # decoding for it too. see issue6548
683 687
684 688 def datafiles(
685 689 self, matcher=None, undecodable=None
686 690 ) -> Generator[BaseStoreEntry, None, None]:
687 691 for entry in super(encodedstore, self).datafiles():
688 692 try:
689 693 f1 = entry.unencoded_path
690 694 f2 = decodefilename(f1)
691 695 except KeyError:
692 696 if undecodable is None:
693 697 msg = _(b'undecodable revlog name %s') % f1
694 698 raise error.StorageError(msg)
695 699 else:
696 700 undecodable.append(f1)
697 701 continue
698 702 if not _matchtrackedpath(f2, matcher):
699 703 continue
700 704 entry.unencoded_path = f2
701 705 yield entry
702 706
703 707 def join(self, f):
704 708 return self.path + b'/' + encodefilename(f)
705 709
706 710 def copylist(self):
707 711 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
708 712
709 713
710 714 class fncache:
711 715 # the filename used to be partially encoded
712 716 # hence the encodedir/decodedir dance
713 717 def __init__(self, vfs):
714 718 self.vfs = vfs
715 719 self._ignores = set()
716 720 self.entries = None
717 721 self._dirty = False
718 722 # set of new additions to fncache
719 723 self.addls = set()
720 724
721 725 def ensureloaded(self, warn=None):
722 726 """read the fncache file if not already read.
723 727
724 728 If the file on disk is corrupted, raise. If warn is provided,
725 729 warn and keep going instead."""
726 730 if self.entries is None:
727 731 self._load(warn)
728 732
729 733 def _load(self, warn=None):
730 734 '''fill the entries from the fncache file'''
731 735 self._dirty = False
732 736 try:
733 737 fp = self.vfs(b'fncache', mode=b'rb')
734 738 except IOError:
735 739 # skip nonexistent file
736 740 self.entries = set()
737 741 return
738 742
739 743 self.entries = set()
740 744 chunk = b''
741 745 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
742 746 chunk += c
743 747 try:
744 748 p = chunk.rindex(b'\n')
745 749 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
746 750 chunk = chunk[p + 1 :]
747 751 except ValueError:
748 752 # substring '\n' not found, maybe the entry is bigger than the
749 753 # chunksize, so let's keep iterating
750 754 pass
751 755
752 756 if chunk:
753 757 msg = _(b"fncache does not ends with a newline")
754 758 if warn:
755 759 warn(msg + b'\n')
756 760 else:
757 761 raise error.Abort(
758 762 msg,
759 763 hint=_(
760 764 b"use 'hg debugrebuildfncache' to "
761 765 b"rebuild the fncache"
762 766 ),
763 767 )
764 768 self._checkentries(fp, warn)
765 769 fp.close()
766 770
767 771 def _checkentries(self, fp, warn):
768 772 """make sure there is no empty string in entries"""
769 773 if b'' in self.entries:
770 774 fp.seek(0)
771 775 for n, line in enumerate(fp):
772 776 if not line.rstrip(b'\n'):
773 777 t = _(b'invalid entry in fncache, line %d') % (n + 1)
774 778 if warn:
775 779 warn(t + b'\n')
776 780 else:
777 781 raise error.Abort(t)
778 782
779 783 def write(self, tr):
780 784 if self._dirty:
781 785 assert self.entries is not None
782 786 self.entries = self.entries | self.addls
783 787 self.addls = set()
784 788 tr.addbackup(b'fncache')
785 789 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
786 790 if self.entries:
787 791 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
788 792 fp.close()
789 793 self._dirty = False
790 794 if self.addls:
791 795 # if we have just new entries, let's append them to the fncache
792 796 tr.addbackup(b'fncache')
793 797 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
794 798 if self.addls:
795 799 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
796 800 fp.close()
797 801 self.entries = None
798 802 self.addls = set()
799 803
800 804 def addignore(self, fn):
801 805 self._ignores.add(fn)
802 806
803 807 def add(self, fn):
804 808 if fn in self._ignores:
805 809 return
806 810 if self.entries is None:
807 811 self._load()
808 812 if fn not in self.entries:
809 813 self.addls.add(fn)
810 814
811 815 def remove(self, fn):
812 816 if self.entries is None:
813 817 self._load()
814 818 if fn in self.addls:
815 819 self.addls.remove(fn)
816 820 return
817 821 try:
818 822 self.entries.remove(fn)
819 823 self._dirty = True
820 824 except KeyError:
821 825 pass
822 826
823 827 def __contains__(self, fn):
824 828 if fn in self.addls:
825 829 return True
826 830 if self.entries is None:
827 831 self._load()
828 832 return fn in self.entries
829 833
830 834 def __iter__(self):
831 835 if self.entries is None:
832 836 self._load()
833 837 return iter(self.entries | self.addls)
834 838
835 839
836 840 class _fncachevfs(vfsmod.proxyvfs):
837 841 def __init__(self, vfs, fnc, encode):
838 842 vfsmod.proxyvfs.__init__(self, vfs)
839 843 self.fncache = fnc
840 844 self.encode = encode
841 845
842 846 def __call__(self, path, mode=b'r', *args, **kw):
843 847 encoded = self.encode(path)
844 848 if (
845 849 mode not in (b'r', b'rb')
846 850 and (path.startswith(b'data/') or path.startswith(b'meta/'))
847 851 and revlog_type(path) is not None
848 852 ):
849 853 # do not trigger a fncache load when adding a file that already is
850 854 # known to exist.
851 855 notload = self.fncache.entries is None and self.vfs.exists(encoded)
852 856 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
853 857 # when appending to an existing file, if the file has size zero,
854 858 # it should be considered as missing. Such zero-size files are
855 859 # the result of truncation when a transaction is aborted.
856 860 notload = False
857 861 if not notload:
858 862 self.fncache.add(path)
859 863 return self.vfs(encoded, mode, *args, **kw)
860 864
861 865 def join(self, path):
862 866 if path:
863 867 return self.vfs.join(self.encode(path))
864 868 else:
865 869 return self.vfs.join(path)
866 870
867 871 def register_file(self, path):
868 872 """generic hook point to lets fncache steer its stew"""
869 873 if path.startswith(b'data/') or path.startswith(b'meta/'):
870 874 self.fncache.add(path)
871 875
872 876
873 877 class fncachestore(basicstore):
874 878 def __init__(self, path, vfstype, dotencode):
875 879 if dotencode:
876 880 encode = _pathencode
877 881 else:
878 882 encode = _plainhybridencode
879 883 self.encode = encode
880 884 vfs = vfstype(path + b'/store')
881 885 self.path = vfs.base
882 886 self.pathsep = self.path + b'/'
883 887 self.createmode = _calcmode(vfs)
884 888 vfs.createmode = self.createmode
885 889 self.rawvfs = vfs
886 890 fnc = fncache(vfs)
887 891 self.fncache = fnc
888 892 self.vfs = _fncachevfs(vfs, fnc, encode)
889 893 self.opener = self.vfs
890 894
891 895 def join(self, f):
892 896 return self.pathsep + self.encode(f)
893 897
894 898 def getsize(self, path):
895 899 return self.rawvfs.stat(path).st_size
896 900
897 901 def datafiles(
898 902 self, matcher=None, undecodable=None
899 903 ) -> Generator[BaseStoreEntry, None, None]:
900 904 for f in sorted(self.fncache):
901 905 if not _matchtrackedpath(f, matcher):
902 906 continue
903 907 ef = self.encode(f)
904 908 t = revlog_type(f)
905 909 if t is None:
906 910 # Note: this should not be in the fncache then…
907 911 #
908 912 # However the fncache might contains such file added by
909 913 # previous version of Mercurial.
910 914 continue
911 915 yield RevlogStoreEntry(
912 916 unencoded_path=f,
913 917 revlog_type=FILEFLAGS_FILELOG,
914 918 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
915 919 is_volatile=bool(t & FILEFLAGS_VOLATILE),
916 920 )
917 921
918 922 def copylist(self):
919 923 d = (
920 924 b'bookmarks',
921 925 b'narrowspec',
922 926 b'data',
923 927 b'meta',
924 928 b'dh',
925 929 b'fncache',
926 930 b'phaseroots',
927 931 b'obsstore',
928 932 b'00manifest.d',
929 933 b'00manifest.i',
930 934 b'00changelog.d',
931 935 b'00changelog.i',
932 936 b'requires',
933 937 )
934 938 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
935 939
936 940 def write(self, tr):
937 941 self.fncache.write(tr)
938 942
939 943 def invalidatecaches(self):
940 944 self.fncache.entries = None
941 945 self.fncache.addls = set()
942 946
943 947 def markremoved(self, fn):
944 948 self.fncache.remove(fn)
945 949
946 950 def _exists(self, f):
947 951 ef = self.encode(f)
948 952 try:
949 953 self.getsize(ef)
950 954 return True
951 955 except FileNotFoundError:
952 956 return False
953 957
954 958 def __contains__(self, path):
955 959 '''Checks if the store contains path'''
956 960 path = b"/".join((b"data", path))
957 961 # check for files (exact match)
958 962 e = path + b'.i'
959 963 if e in self.fncache and self._exists(e):
960 964 return True
961 965 # now check for directories (prefix match)
962 966 if not path.endswith(b'/'):
963 967 path += b'/'
964 968 for e in self.fncache:
965 969 if e.startswith(path) and self._exists(e):
966 970 return True
967 971 return False
General Comments 0
You need to be logged in to leave comments. Login now