##// END OF EJS Templates
store: actually tag tree manifest revlogs as manifest revlogs...
marmoute -
r51375:1fc25227 default
parent child Browse files
Show More
@@ -1,1031 +1,1043 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 399 REVLOG_FILES_LONG_EXT = (
400 400 b'.nd',
401 401 b'.idx',
402 402 b'.dat',
403 403 b'.sda',
404 404 )
405 405 # files that are "volatile" and might change between listing and streaming
406 406 #
407 407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 408 # deleted.
409 409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410 410
411 411 # some exception to the above matching
412 412 #
413 413 # XXX This is currently not in use because of issue6542
414 414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415 415
416 416
417 417 def is_revlog(f, kind, st):
418 418 if kind != stat.S_IFREG:
419 419 return None
420 420 return revlog_type(f)
421 421
422 422
423 423 def revlog_type(f):
424 424 # XXX we need to filter `undo.` created by the transaction here, however
425 425 # being naive about it also filter revlog for `undo.*` files, leading to
426 426 # issue6542. So we no longer use EXCLUDED.
427 427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 428 return FILEFLAGS_REVLOG_MAIN
429 429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 430 t = FILETYPE_FILELOG_OTHER
431 431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 432 t |= FILEFLAGS_VOLATILE
433 433 return t
434 434 return None
435 435
436 436
437 437 # the file is part of changelog data
438 438 FILEFLAGS_CHANGELOG = 1 << 13
439 439 # the file is part of manifest data
440 440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 441 # the file is part of filelog data
442 442 FILEFLAGS_FILELOG = 1 << 11
443 443 # file that are not directly part of a revlog
444 444 FILEFLAGS_OTHER = 1 << 10
445 445
446 446 # the main entry point for a revlog
447 447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 448 # a secondary file for a revlog
449 449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450 450
451 451 # files that are "volatile" and might change between listing and streaming
452 452 FILEFLAGS_VOLATILE = 1 << 20
453 453
454 454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 460 FILETYPE_OTHER = FILEFLAGS_OTHER
461 461
462 462
463 463 @attr.s(slots=True, init=False)
464 464 class BaseStoreEntry:
465 465 """An entry in the store
466 466
467 467 This is returned by `store.walk` and represent some data in the store."""
468 468
469 469 unencoded_path = attr.ib()
470 470 _is_volatile = attr.ib(default=False)
471 471 _file_size = attr.ib(default=None)
472 472
473 473 def __init__(
474 474 self,
475 475 unencoded_path,
476 476 is_volatile=False,
477 477 file_size=None,
478 478 ):
479 479 self.unencoded_path = unencoded_path
480 480 self._is_volatile = is_volatile
481 481 self._file_size = file_size
482 482
483 483 def files(self):
484 484 return [
485 485 StoreFile(
486 486 unencoded_path=self.unencoded_path,
487 487 file_size=self._file_size,
488 488 is_volatile=self._is_volatile,
489 489 )
490 490 ]
491 491
492 492
493 493 @attr.s(slots=True, init=False)
494 494 class SimpleStoreEntry(BaseStoreEntry):
495 495 """A generic entry in the store"""
496 496
497 497 is_revlog = False
498 498
499 499
500 500 @attr.s(slots=True, init=False)
501 501 class RevlogStoreEntry(BaseStoreEntry):
502 502 """A revlog entry in the store"""
503 503
504 504 is_revlog = True
505 505 revlog_type = attr.ib(default=None)
506 506 is_revlog_main = attr.ib(default=None)
507 507
508 508 def __init__(
509 509 self,
510 510 unencoded_path,
511 511 revlog_type,
512 512 is_revlog_main=False,
513 513 is_volatile=False,
514 514 file_size=None,
515 515 ):
516 516 super().__init__(
517 517 unencoded_path=unencoded_path,
518 518 is_volatile=is_volatile,
519 519 file_size=file_size,
520 520 )
521 521 self.revlog_type = revlog_type
522 522 self.is_revlog_main = is_revlog_main
523 523
524 524
525 525 @attr.s(slots=True)
526 526 class StoreFile:
527 527 """a file matching an entry"""
528 528
529 529 unencoded_path = attr.ib()
530 530 _file_size = attr.ib(default=False)
531 531 is_volatile = attr.ib(default=False)
532 532
533 533 def file_size(self, vfs):
534 534 if self._file_size is not None:
535 535 return self._file_size
536 536 try:
537 537 return vfs.stat(self.unencoded_path).st_size
538 538 except FileNotFoundError:
539 539 return 0
540 540
541 541
542 542 def _gather_revlog(files_data):
543 543 """group files per revlog prefix
544 544
545 545 The returns a two level nested dict. The top level key is the revlog prefix
546 546 without extension, the second level is all the file "suffix" that were
547 547 seen for this revlog and arbitrary file data as value.
548 548 """
549 549 revlogs = collections.defaultdict(dict)
550 550 for u, value in files_data:
551 551 name, ext = _split_revlog_ext(u)
552 552 revlogs[name][ext] = value
553 553 return sorted(revlogs.items())
554 554
555 555
556 556 def _split_revlog_ext(filename):
557 557 """split the revlog file prefix from the variable extension"""
558 558 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 559 char = b'-'
560 560 else:
561 561 char = b'.'
562 562 idx = filename.rfind(char)
563 563 return filename[:idx], filename[idx:]
564 564
565 565
566 566 def _ext_key(ext):
567 567 """a key to order revlog suffix
568 568
569 569 important to issue .i after other entry."""
570 570 # the only important part of this order is to keep the `.i` last.
571 571 if ext.endswith(b'.n'):
572 572 return (0, ext)
573 573 elif ext.endswith(b'.nd'):
574 574 return (10, ext)
575 575 elif ext.endswith(b'.d'):
576 576 return (20, ext)
577 577 elif ext.endswith(b'.i'):
578 578 return (50, ext)
579 579 else:
580 580 return (40, ext)
581 581
582 582
583 583 class basicstore:
584 584 '''base class for local repository stores'''
585 585
586 586 def __init__(self, path, vfstype):
587 587 vfs = vfstype(path)
588 588 self.path = vfs.base
589 589 self.createmode = _calcmode(vfs)
590 590 vfs.createmode = self.createmode
591 591 self.rawvfs = vfs
592 592 self.vfs = vfsmod.filtervfs(vfs, encodedir)
593 593 self.opener = self.vfs
594 594
595 595 def join(self, f):
596 596 return self.path + b'/' + encodedir(f)
597 597
598 598 def _walk(self, relpath, recurse):
599 599 '''yields (revlog_type, unencoded, size)'''
600 600 path = self.path
601 601 if relpath:
602 602 path += b'/' + relpath
603 603 striplen = len(self.path) + 1
604 604 l = []
605 605 if self.rawvfs.isdir(path):
606 606 visit = [path]
607 607 readdir = self.rawvfs.readdir
608 608 while visit:
609 609 p = visit.pop()
610 610 for f, kind, st in readdir(p, stat=True):
611 611 fp = p + b'/' + f
612 612 rl_type = is_revlog(f, kind, st)
613 613 if rl_type is not None:
614 614 n = util.pconvert(fp[striplen:])
615 615 l.append((decodedir(n), (rl_type, st.st_size)))
616 616 elif kind == stat.S_IFDIR and recurse:
617 617 visit.append(fp)
618 618
619 619 l.sort()
620 620 return l
621 621
622 622 def changelog(self, trypending, concurrencychecker=None):
623 623 return changelog.changelog(
624 624 self.vfs,
625 625 trypending=trypending,
626 626 concurrencychecker=concurrencychecker,
627 627 )
628 628
629 629 def manifestlog(self, repo, storenarrowmatch):
630 630 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
631 631 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
632 632
633 633 def datafiles(
634 634 self, matcher=None, undecodable=None
635 635 ) -> Generator[BaseStoreEntry, None, None]:
636 636 """Like walk, but excluding the changelog and root manifest.
637 637
638 638 When [undecodable] is None, revlogs names that can't be
639 639 decoded cause an exception. When it is provided, it should
640 640 be a list and the filenames that can't be decoded are added
641 641 to it instead. This is very rarely needed."""
642 files = self._walk(b'data', True) + self._walk(b'meta', True)
643 files = (f for f in files if f[1][0] is not None)
644 for revlog, details in _gather_revlog(files):
645 for ext, (t, s) in sorted(details.items()):
646 u = revlog + ext
647 yield RevlogStoreEntry(
648 unencoded_path=u,
649 revlog_type=FILEFLAGS_FILELOG,
650 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
651 is_volatile=bool(t & FILEFLAGS_VOLATILE),
652 file_size=s,
653 )
642 dirs = [
643 (b'data', FILEFLAGS_FILELOG),
644 (b'meta', FILEFLAGS_MANIFESTLOG),
645 ]
646 for base_dir, rl_type in dirs:
647 files = self._walk(base_dir, True)
648 files = (f for f in files if f[1][0] is not None)
649 for revlog, details in _gather_revlog(files):
650 for ext, (t, s) in sorted(details.items()):
651 u = revlog + ext
652 yield RevlogStoreEntry(
653 unencoded_path=u,
654 revlog_type=rl_type,
655 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
656 is_volatile=bool(t & FILEFLAGS_VOLATILE),
657 file_size=s,
658 )
654 659
655 660 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
656 661 files = reversed(self._walk(b'', False))
657 662
658 663 changelogs = collections.defaultdict(dict)
659 664 manifestlogs = collections.defaultdict(dict)
660 665
661 666 for u, (t, s) in files:
662 667 if u.startswith(b'00changelog'):
663 668 name, ext = _split_revlog_ext(u)
664 669 changelogs[name][ext] = (t, s)
665 670 elif u.startswith(b'00manifest'):
666 671 name, ext = _split_revlog_ext(u)
667 672 manifestlogs[name][ext] = (t, s)
668 673 else:
669 674 yield SimpleStoreEntry(
670 675 unencoded_path=u,
671 676 is_volatile=bool(t & FILEFLAGS_VOLATILE),
672 677 file_size=s,
673 678 )
674 679 # yield manifest before changelog
675 680 top_rl = [
676 681 (manifestlogs, FILEFLAGS_MANIFESTLOG),
677 682 (changelogs, FILEFLAGS_CHANGELOG),
678 683 ]
679 684 assert len(manifestlogs) <= 1
680 685 assert len(changelogs) <= 1
681 686 for data, revlog_type in top_rl:
682 687 for revlog, details in sorted(data.items()):
683 688 # (keeping ordering so we get 00changelog.i last)
684 689 key = lambda x: _ext_key(x[0])
685 690 for ext, (t, s) in sorted(details.items(), key=key):
686 691 u = revlog + ext
687 692 yield RevlogStoreEntry(
688 693 unencoded_path=u,
689 694 revlog_type=revlog_type,
690 695 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
691 696 is_volatile=bool(t & FILEFLAGS_VOLATILE),
692 697 file_size=s,
693 698 )
694 699
695 700 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
696 701 """return files related to data storage (ie: revlogs)
697 702
698 703 yields (file_type, unencoded, size)
699 704
700 705 if a matcher is passed, storage files of only those tracked paths
701 706 are passed with matches the matcher
702 707 """
703 708 # yield data files first
704 709 for x in self.datafiles(matcher):
705 710 yield x
706 711 for x in self.topfiles():
707 712 yield x
708 713
709 714 def copylist(self):
710 715 return _data
711 716
712 717 def write(self, tr):
713 718 pass
714 719
715 720 def invalidatecaches(self):
716 721 pass
717 722
718 723 def markremoved(self, fn):
719 724 pass
720 725
721 726 def __contains__(self, path):
722 727 '''Checks if the store contains path'''
723 728 path = b"/".join((b"data", path))
724 729 # file?
725 730 if self.vfs.exists(path + b".i"):
726 731 return True
727 732 # dir?
728 733 if not path.endswith(b"/"):
729 734 path = path + b"/"
730 735 return self.vfs.exists(path)
731 736
732 737
733 738 class encodedstore(basicstore):
734 739 def __init__(self, path, vfstype):
735 740 vfs = vfstype(path + b'/store')
736 741 self.path = vfs.base
737 742 self.createmode = _calcmode(vfs)
738 743 vfs.createmode = self.createmode
739 744 self.rawvfs = vfs
740 745 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
741 746 self.opener = self.vfs
742 747
743 748 # note: topfiles would also need a decode phase. It is just that in
744 749 # practice we do not have any file outside of `data/` that needs encoding.
745 750 # However that might change so we should probably add a test and encoding
746 751 # decoding for it too. see issue6548
747 752
748 753 def datafiles(
749 754 self, matcher=None, undecodable=None
750 755 ) -> Generator[BaseStoreEntry, None, None]:
751 756 for entry in super(encodedstore, self).datafiles():
752 757 try:
753 758 f1 = entry.unencoded_path
754 759 f2 = decodefilename(f1)
755 760 except KeyError:
756 761 if undecodable is None:
757 762 msg = _(b'undecodable revlog name %s') % f1
758 763 raise error.StorageError(msg)
759 764 else:
760 765 undecodable.append(f1)
761 766 continue
762 767 if not _matchtrackedpath(f2, matcher):
763 768 continue
764 769 entry.unencoded_path = f2
765 770 yield entry
766 771
767 772 def join(self, f):
768 773 return self.path + b'/' + encodefilename(f)
769 774
770 775 def copylist(self):
771 776 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
772 777
773 778
774 779 class fncache:
775 780 # the filename used to be partially encoded
776 781 # hence the encodedir/decodedir dance
777 782 def __init__(self, vfs):
778 783 self.vfs = vfs
779 784 self._ignores = set()
780 785 self.entries = None
781 786 self._dirty = False
782 787 # set of new additions to fncache
783 788 self.addls = set()
784 789
785 790 def ensureloaded(self, warn=None):
786 791 """read the fncache file if not already read.
787 792
788 793 If the file on disk is corrupted, raise. If warn is provided,
789 794 warn and keep going instead."""
790 795 if self.entries is None:
791 796 self._load(warn)
792 797
793 798 def _load(self, warn=None):
794 799 '''fill the entries from the fncache file'''
795 800 self._dirty = False
796 801 try:
797 802 fp = self.vfs(b'fncache', mode=b'rb')
798 803 except IOError:
799 804 # skip nonexistent file
800 805 self.entries = set()
801 806 return
802 807
803 808 self.entries = set()
804 809 chunk = b''
805 810 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
806 811 chunk += c
807 812 try:
808 813 p = chunk.rindex(b'\n')
809 814 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
810 815 chunk = chunk[p + 1 :]
811 816 except ValueError:
812 817 # substring '\n' not found, maybe the entry is bigger than the
813 818 # chunksize, so let's keep iterating
814 819 pass
815 820
816 821 if chunk:
817 822 msg = _(b"fncache does not ends with a newline")
818 823 if warn:
819 824 warn(msg + b'\n')
820 825 else:
821 826 raise error.Abort(
822 827 msg,
823 828 hint=_(
824 829 b"use 'hg debugrebuildfncache' to "
825 830 b"rebuild the fncache"
826 831 ),
827 832 )
828 833 self._checkentries(fp, warn)
829 834 fp.close()
830 835
831 836 def _checkentries(self, fp, warn):
832 837 """make sure there is no empty string in entries"""
833 838 if b'' in self.entries:
834 839 fp.seek(0)
835 840 for n, line in enumerate(fp):
836 841 if not line.rstrip(b'\n'):
837 842 t = _(b'invalid entry in fncache, line %d') % (n + 1)
838 843 if warn:
839 844 warn(t + b'\n')
840 845 else:
841 846 raise error.Abort(t)
842 847
843 848 def write(self, tr):
844 849 if self._dirty:
845 850 assert self.entries is not None
846 851 self.entries = self.entries | self.addls
847 852 self.addls = set()
848 853 tr.addbackup(b'fncache')
849 854 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
850 855 if self.entries:
851 856 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
852 857 fp.close()
853 858 self._dirty = False
854 859 if self.addls:
855 860 # if we have just new entries, let's append them to the fncache
856 861 tr.addbackup(b'fncache')
857 862 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
858 863 if self.addls:
859 864 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
860 865 fp.close()
861 866 self.entries = None
862 867 self.addls = set()
863 868
864 869 def addignore(self, fn):
865 870 self._ignores.add(fn)
866 871
867 872 def add(self, fn):
868 873 if fn in self._ignores:
869 874 return
870 875 if self.entries is None:
871 876 self._load()
872 877 if fn not in self.entries:
873 878 self.addls.add(fn)
874 879
875 880 def remove(self, fn):
876 881 if self.entries is None:
877 882 self._load()
878 883 if fn in self.addls:
879 884 self.addls.remove(fn)
880 885 return
881 886 try:
882 887 self.entries.remove(fn)
883 888 self._dirty = True
884 889 except KeyError:
885 890 pass
886 891
887 892 def __contains__(self, fn):
888 893 if fn in self.addls:
889 894 return True
890 895 if self.entries is None:
891 896 self._load()
892 897 return fn in self.entries
893 898
894 899 def __iter__(self):
895 900 if self.entries is None:
896 901 self._load()
897 902 return iter(self.entries | self.addls)
898 903
899 904
900 905 class _fncachevfs(vfsmod.proxyvfs):
901 906 def __init__(self, vfs, fnc, encode):
902 907 vfsmod.proxyvfs.__init__(self, vfs)
903 908 self.fncache = fnc
904 909 self.encode = encode
905 910
906 911 def __call__(self, path, mode=b'r', *args, **kw):
907 912 encoded = self.encode(path)
908 913 if (
909 914 mode not in (b'r', b'rb')
910 915 and (path.startswith(b'data/') or path.startswith(b'meta/'))
911 916 and revlog_type(path) is not None
912 917 ):
913 918 # do not trigger a fncache load when adding a file that already is
914 919 # known to exist.
915 920 notload = self.fncache.entries is None and self.vfs.exists(encoded)
916 921 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
917 922 # when appending to an existing file, if the file has size zero,
918 923 # it should be considered as missing. Such zero-size files are
919 924 # the result of truncation when a transaction is aborted.
920 925 notload = False
921 926 if not notload:
922 927 self.fncache.add(path)
923 928 return self.vfs(encoded, mode, *args, **kw)
924 929
925 930 def join(self, path):
926 931 if path:
927 932 return self.vfs.join(self.encode(path))
928 933 else:
929 934 return self.vfs.join(path)
930 935
931 936 def register_file(self, path):
932 937 """generic hook point to lets fncache steer its stew"""
933 938 if path.startswith(b'data/') or path.startswith(b'meta/'):
934 939 self.fncache.add(path)
935 940
936 941
937 942 class fncachestore(basicstore):
938 943 def __init__(self, path, vfstype, dotencode):
939 944 if dotencode:
940 945 encode = _pathencode
941 946 else:
942 947 encode = _plainhybridencode
943 948 self.encode = encode
944 949 vfs = vfstype(path + b'/store')
945 950 self.path = vfs.base
946 951 self.pathsep = self.path + b'/'
947 952 self.createmode = _calcmode(vfs)
948 953 vfs.createmode = self.createmode
949 954 self.rawvfs = vfs
950 955 fnc = fncache(vfs)
951 956 self.fncache = fnc
952 957 self.vfs = _fncachevfs(vfs, fnc, encode)
953 958 self.opener = self.vfs
954 959
955 960 def join(self, f):
956 961 return self.pathsep + self.encode(f)
957 962
958 963 def getsize(self, path):
959 964 return self.rawvfs.stat(path).st_size
960 965
961 966 def datafiles(
962 967 self, matcher=None, undecodable=None
963 968 ) -> Generator[BaseStoreEntry, None, None]:
964 969 files = ((f, revlog_type(f)) for f in self.fncache)
965 970 # Note: all files in fncache should be revlog related, However the
966 971 # fncache might contains such file added by previous version of
967 972 # Mercurial.
968 973 files = (f for f in files if f[1] is not None)
969 974 by_revlog = _gather_revlog(files)
970 975 for revlog, details in by_revlog:
976 if revlog.startswith(b'data/'):
977 rl_type = FILEFLAGS_FILELOG
978 elif revlog.startswith(b'meta/'):
979 rl_type = FILEFLAGS_MANIFESTLOG
980 else:
981 # unreachable
982 assert False, revlog
971 983 for ext, t in sorted(details.items()):
972 984 f = revlog + ext
973 985 if not _matchtrackedpath(f, matcher):
974 986 continue
975 987 yield RevlogStoreEntry(
976 988 unencoded_path=f,
977 revlog_type=FILEFLAGS_FILELOG,
989 revlog_type=rl_type,
978 990 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
979 991 is_volatile=bool(t & FILEFLAGS_VOLATILE),
980 992 )
981 993
982 994 def copylist(self):
983 995 d = (
984 996 b'bookmarks',
985 997 b'narrowspec',
986 998 b'data',
987 999 b'meta',
988 1000 b'dh',
989 1001 b'fncache',
990 1002 b'phaseroots',
991 1003 b'obsstore',
992 1004 b'00manifest.d',
993 1005 b'00manifest.i',
994 1006 b'00changelog.d',
995 1007 b'00changelog.i',
996 1008 b'requires',
997 1009 )
998 1010 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
999 1011
1000 1012 def write(self, tr):
1001 1013 self.fncache.write(tr)
1002 1014
1003 1015 def invalidatecaches(self):
1004 1016 self.fncache.entries = None
1005 1017 self.fncache.addls = set()
1006 1018
1007 1019 def markremoved(self, fn):
1008 1020 self.fncache.remove(fn)
1009 1021
1010 1022 def _exists(self, f):
1011 1023 ef = self.encode(f)
1012 1024 try:
1013 1025 self.getsize(ef)
1014 1026 return True
1015 1027 except FileNotFoundError:
1016 1028 return False
1017 1029
1018 1030 def __contains__(self, path):
1019 1031 '''Checks if the store contains path'''
1020 1032 path = b"/".join((b"data", path))
1021 1033 # check for files (exact match)
1022 1034 e = path + b'.i'
1023 1035 if e in self.fncache and self._exists(e):
1024 1036 return True
1025 1037 # now check for directories (prefix match)
1026 1038 if not path.endswith(b'/'):
1027 1039 path += b'/'
1028 1040 for e in self.fncache:
1029 1041 if e.startswith(path) and self._exists(e):
1030 1042 return True
1031 1043 return False
General Comments 0
You need to be logged in to leave comments. Login now