##// END OF EJS Templates
store: do the revlog matching on entry directly...
marmoute -
r51387:b4953fad default
parent child Browse files
Show More
@@ -1,1058 +1,1056 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 def _matchtrackedpath(path, matcher):
36 def _match_tracked_entry(entry, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 path = decodedir(path)
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 return matcher(entry.target_id)
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
51 49
52 50
53 51 # This avoids a collision between a file named foo and a dir named
54 52 # foo.i or foo.d
55 53 def _encodedir(path):
56 54 """
57 55 >>> _encodedir(b'data/foo.i')
58 56 'data/foo.i'
59 57 >>> _encodedir(b'data/foo.i/bla.i')
60 58 'data/foo.i.hg/bla.i'
61 59 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 60 'data/foo.i.hg.hg/bla.i'
63 61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 63 """
66 64 return (
67 65 path.replace(b".hg/", b".hg.hg/")
68 66 .replace(b".i/", b".i.hg/")
69 67 .replace(b".d/", b".d.hg/")
70 68 )
71 69
72 70
73 71 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 72
75 73
76 74 def decodedir(path):
77 75 """
78 76 >>> decodedir(b'data/foo.i')
79 77 'data/foo.i'
80 78 >>> decodedir(b'data/foo.i.hg/bla.i')
81 79 'data/foo.i/bla.i'
82 80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 81 'data/foo.i.hg/bla.i'
84 82 """
85 83 if b".hg/" not in path:
86 84 return path
87 85 return (
88 86 path.replace(b".d.hg/", b".d/")
89 87 .replace(b".i.hg/", b".i/")
90 88 .replace(b".hg.hg/", b".hg/")
91 89 )
92 90
93 91
94 92 def _reserved():
95 93 """characters that are problematic for filesystems
96 94
97 95 * ascii escapes (0..31)
98 96 * ascii hi (126..255)
99 97 * windows specials
100 98
101 99 these characters will be escaped by encodefunctions
102 100 """
103 101 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 102 for x in range(32):
105 103 yield x
106 104 for x in range(126, 256):
107 105 yield x
108 106 for x in winreserved:
109 107 yield x
110 108
111 109
112 110 def _buildencodefun():
113 111 """
114 112 >>> enc, dec = _buildencodefun()
115 113
116 114 >>> enc(b'nothing/special.txt')
117 115 'nothing/special.txt'
118 116 >>> dec(b'nothing/special.txt')
119 117 'nothing/special.txt'
120 118
121 119 >>> enc(b'HELLO')
122 120 '_h_e_l_l_o'
123 121 >>> dec(b'_h_e_l_l_o')
124 122 'HELLO'
125 123
126 124 >>> enc(b'hello:world?')
127 125 'hello~3aworld~3f'
128 126 >>> dec(b'hello~3aworld~3f')
129 127 'hello:world?'
130 128
131 129 >>> enc(b'the\\x07quick\\xADshot')
132 130 'the~07quick~adshot'
133 131 >>> dec(b'the~07quick~adshot')
134 132 'the\\x07quick\\xadshot'
135 133 """
136 134 e = b'_'
137 135 xchr = pycompat.bytechr
138 136 asciistr = list(map(xchr, range(127)))
139 137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 138
141 139 cmap = {x: x for x in asciistr}
142 140 for x in _reserved():
143 141 cmap[xchr(x)] = b"~%02x" % x
144 142 for x in capitals + [ord(e)]:
145 143 cmap[xchr(x)] = e + xchr(x).lower()
146 144
147 145 dmap = {}
148 146 for k, v in cmap.items():
149 147 dmap[v] = k
150 148
151 149 def decode(s):
152 150 i = 0
153 151 while i < len(s):
154 152 for l in range(1, 4):
155 153 try:
156 154 yield dmap[s[i : i + l]]
157 155 i += l
158 156 break
159 157 except KeyError:
160 158 pass
161 159 else:
162 160 raise KeyError
163 161
164 162 return (
165 163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 164 lambda s: b''.join(list(decode(s))),
167 165 )
168 166
169 167
170 168 _encodefname, _decodefname = _buildencodefun()
171 169
172 170
173 171 def encodefilename(s):
174 172 """
175 173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 175 """
178 176 return _encodefname(encodedir(s))
179 177
180 178
181 179 def decodefilename(s):
182 180 """
183 181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 183 """
186 184 return decodedir(_decodefname(s))
187 185
188 186
189 187 def _buildlowerencodefun():
190 188 """
191 189 >>> f = _buildlowerencodefun()
192 190 >>> f(b'nothing/special.txt')
193 191 'nothing/special.txt'
194 192 >>> f(b'HELLO')
195 193 'hello'
196 194 >>> f(b'hello:world?')
197 195 'hello~3aworld~3f'
198 196 >>> f(b'the\\x07quick\\xADshot')
199 197 'the~07quick~adshot'
200 198 """
201 199 xchr = pycompat.bytechr
202 200 cmap = {xchr(x): xchr(x) for x in range(127)}
203 201 for x in _reserved():
204 202 cmap[xchr(x)] = b"~%02x" % x
205 203 for x in range(ord(b"A"), ord(b"Z") + 1):
206 204 cmap[xchr(x)] = xchr(x).lower()
207 205
208 206 def lowerencode(s):
209 207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 208
211 209 return lowerencode
212 210
213 211
214 212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 213
216 214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 217
220 218
221 219 def _auxencode(path, dotencode):
222 220 """
223 221 Encodes filenames containing names reserved by Windows or which end in
224 222 period or space. Does not touch other single reserved characters c.
225 223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 224 Additionally encodes space or period at the beginning, if dotencode is
227 225 True. Parameter path is assumed to be all lowercase.
228 226 A segment only needs encoding if a reserved name appears as a
229 227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 228 doesn't need encoding.
231 229
232 230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 231 >>> _auxencode(s.split(b'/'), True)
234 232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 234 >>> _auxencode(s.split(b'/'), False)
237 235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 236 >>> _auxencode([b'foo. '], True)
239 237 ['foo.~20']
240 238 >>> _auxencode([b' .foo'], True)
241 239 ['~20.foo']
242 240 """
243 241 for i, n in enumerate(path):
244 242 if not n:
245 243 continue
246 244 if dotencode and n[0] in b'. ':
247 245 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 246 path[i] = n
249 247 else:
250 248 l = n.find(b'.')
251 249 if l == -1:
252 250 l = len(n)
253 251 if (l == 3 and n[:3] in _winres3) or (
254 252 l == 4
255 253 and n[3:4] <= b'9'
256 254 and n[3:4] >= b'1'
257 255 and n[:3] in _winres4
258 256 ):
259 257 # encode third letter ('aux' -> 'au~78')
260 258 ec = b"~%02x" % ord(n[2:3])
261 259 n = n[0:2] + ec + n[3:]
262 260 path[i] = n
263 261 if n[-1] in b'. ':
264 262 # encode last period or space ('foo...' -> 'foo..~2e')
265 263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 264 return path
267 265
268 266
269 267 _maxstorepathlen = 120
270 268 _dirprefixlen = 8
271 269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 270
273 271
274 272 def _hashencode(path, dotencode):
275 273 digest = hex(hashutil.sha1(path).digest())
276 274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 275 parts = _auxencode(le, dotencode)
278 276 basename = parts[-1]
279 277 _root, ext = os.path.splitext(basename)
280 278 sdirs = []
281 279 sdirslen = 0
282 280 for p in parts[:-1]:
283 281 d = p[:_dirprefixlen]
284 282 if d[-1] in b'. ':
285 283 # Windows can't access dirs ending in period or space
286 284 d = d[:-1] + b'_'
287 285 if sdirslen == 0:
288 286 t = len(d)
289 287 else:
290 288 t = sdirslen + 1 + len(d)
291 289 if t > _maxshortdirslen:
292 290 break
293 291 sdirs.append(d)
294 292 sdirslen = t
295 293 dirs = b'/'.join(sdirs)
296 294 if len(dirs) > 0:
297 295 dirs += b'/'
298 296 res = b'dh/' + dirs + digest + ext
299 297 spaceleft = _maxstorepathlen - len(res)
300 298 if spaceleft > 0:
301 299 filler = basename[:spaceleft]
302 300 res = b'dh/' + dirs + filler + digest + ext
303 301 return res
304 302
305 303
306 304 def _hybridencode(path, dotencode):
307 305 """encodes path with a length limit
308 306
309 307 Encodes all paths that begin with 'data/', according to the following.
310 308
311 309 Default encoding (reversible):
312 310
313 311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 312 characters are encoded as '~xx', where xx is the two digit hex code
315 313 of the character (see encodefilename).
316 314 Relevant path components consisting of Windows reserved filenames are
317 315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 316
319 317 Hashed encoding (not reversible):
320 318
321 319 If the default-encoded path is longer than _maxstorepathlen, a
322 320 non-reversible hybrid hashing of the path is done instead.
323 321 This encoding uses up to _dirprefixlen characters of all directory
324 322 levels of the lowerencoded path, but not more levels than can fit into
325 323 _maxshortdirslen.
326 324 Then follows the filler followed by the sha digest of the full path.
327 325 The filler is the beginning of the basename of the lowerencoded path
328 326 (the basename is everything after the last path separator). The filler
329 327 is as long as possible, filling in characters from the basename until
330 328 the encoded path has _maxstorepathlen characters (or all chars of the
331 329 basename have been taken).
332 330 The extension (e.g. '.i' or '.d') is preserved.
333 331
334 332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 333 encoding was used.
336 334 """
337 335 path = encodedir(path)
338 336 ef = _encodefname(path).split(b'/')
339 337 res = b'/'.join(_auxencode(ef, dotencode))
340 338 if len(res) > _maxstorepathlen:
341 339 res = _hashencode(path, dotencode)
342 340 return res
343 341
344 342
345 343 def _pathencode(path):
346 344 de = encodedir(path)
347 345 if len(path) > _maxstorepathlen:
348 346 return _hashencode(de, True)
349 347 ef = _encodefname(de).split(b'/')
350 348 res = b'/'.join(_auxencode(ef, True))
351 349 if len(res) > _maxstorepathlen:
352 350 return _hashencode(de, True)
353 351 return res
354 352
355 353
356 354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 355
358 356
359 357 def _plainhybridencode(f):
360 358 return _hybridencode(f, False)
361 359
362 360
363 361 def _calcmode(vfs):
364 362 try:
365 363 # files in .hg/ will be created using this mode
366 364 mode = vfs.stat().st_mode
367 365 # avoid some useless chmods
368 366 if (0o777 & ~util.umask) == (0o777 & mode):
369 367 mode = None
370 368 except OSError:
371 369 mode = None
372 370 return mode
373 371
374 372
375 373 _data = [
376 374 b'bookmarks',
377 375 b'narrowspec',
378 376 b'data',
379 377 b'meta',
380 378 b'00manifest.d',
381 379 b'00manifest.i',
382 380 b'00changelog.d',
383 381 b'00changelog.i',
384 382 b'phaseroots',
385 383 b'obsstore',
386 384 b'requires',
387 385 ]
388 386
389 387 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 388 REVLOG_FILES_OTHER_EXT = (
391 389 b'.idx',
392 390 b'.d',
393 391 b'.dat',
394 392 b'.n',
395 393 b'.nd',
396 394 b'.sda',
397 395 )
398 396 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 397 REVLOG_FILES_LONG_EXT = (
400 398 b'.nd',
401 399 b'.idx',
402 400 b'.dat',
403 401 b'.sda',
404 402 )
405 403 # files that are "volatile" and might change between listing and streaming
406 404 #
407 405 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 406 # deleted.
409 407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410 408
411 409 # some exception to the above matching
412 410 #
413 411 # XXX This is currently not in use because of issue6542
414 412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415 413
416 414
417 415 def is_revlog(f, kind, st):
418 416 if kind != stat.S_IFREG:
419 417 return None
420 418 return revlog_type(f)
421 419
422 420
423 421 def revlog_type(f):
424 422 # XXX we need to filter `undo.` created by the transaction here, however
425 423 # being naive about it also filter revlog for `undo.*` files, leading to
426 424 # issue6542. So we no longer use EXCLUDED.
427 425 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 426 return FILEFLAGS_REVLOG_MAIN
429 427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 428 t = FILETYPE_FILELOG_OTHER
431 429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 430 t |= FILEFLAGS_VOLATILE
433 431 return t
434 432 return None
435 433
436 434
437 435 # the file is part of changelog data
438 436 FILEFLAGS_CHANGELOG = 1 << 13
439 437 # the file is part of manifest data
440 438 FILEFLAGS_MANIFESTLOG = 1 << 12
441 439 # the file is part of filelog data
442 440 FILEFLAGS_FILELOG = 1 << 11
443 441 # file that are not directly part of a revlog
444 442 FILEFLAGS_OTHER = 1 << 10
445 443
446 444 # the main entry point for a revlog
447 445 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 446 # a secondary file for a revlog
449 447 FILEFLAGS_REVLOG_OTHER = 1 << 0
450 448
451 449 # files that are "volatile" and might change between listing and streaming
452 450 FILEFLAGS_VOLATILE = 1 << 20
453 451
454 452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 458 FILETYPE_OTHER = FILEFLAGS_OTHER
461 459
462 460
463 461 @attr.s(slots=True, init=False)
464 462 class BaseStoreEntry:
465 463 """An entry in the store
466 464
467 465 This is returned by `store.walk` and represent some data in the store."""
468 466
469 467 unencoded_path = attr.ib()
470 468 _is_volatile = attr.ib(default=False)
471 469 _file_size = attr.ib(default=None)
472 470
473 471 def __init__(
474 472 self,
475 473 unencoded_path,
476 474 is_volatile=False,
477 475 file_size=None,
478 476 ):
479 477 self.unencoded_path = unencoded_path
480 478 self._is_volatile = is_volatile
481 479 self._file_size = file_size
482 480
483 481 def files(self):
484 482 return [
485 483 StoreFile(
486 484 unencoded_path=self.unencoded_path,
487 485 file_size=self._file_size,
488 486 is_volatile=self._is_volatile,
489 487 )
490 488 ]
491 489
492 490
493 491 @attr.s(slots=True, init=False)
494 492 class SimpleStoreEntry(BaseStoreEntry):
495 493 """A generic entry in the store"""
496 494
497 495 is_revlog = False
498 496
499 497
500 498 @attr.s(slots=True, init=False)
501 499 class RevlogStoreEntry(BaseStoreEntry):
502 500 """A revlog entry in the store"""
503 501
504 502 is_revlog = True
505 503 revlog_type = attr.ib(default=None)
506 504 target_id = attr.ib(default=None)
507 505 is_revlog_main = attr.ib(default=None)
508 506
509 507 def __init__(
510 508 self,
511 509 unencoded_path,
512 510 revlog_type,
513 511 target_id,
514 512 is_revlog_main=False,
515 513 is_volatile=False,
516 514 file_size=None,
517 515 ):
518 516 super().__init__(
519 517 unencoded_path=unencoded_path,
520 518 is_volatile=is_volatile,
521 519 file_size=file_size,
522 520 )
523 521 self.revlog_type = revlog_type
524 522 self.target_id = target_id
525 523 self.is_revlog_main = is_revlog_main
526 524
527 525 def main_file_path(self):
528 526 """unencoded path of the main revlog file"""
529 527 return self.unencoded_path
530 528
531 529
532 530 @attr.s(slots=True)
533 531 class StoreFile:
534 532 """a file matching an entry"""
535 533
536 534 unencoded_path = attr.ib()
537 535 _file_size = attr.ib(default=False)
538 536 is_volatile = attr.ib(default=False)
539 537
540 538 def file_size(self, vfs):
541 539 if self._file_size is not None:
542 540 return self._file_size
543 541 try:
544 542 return vfs.stat(self.unencoded_path).st_size
545 543 except FileNotFoundError:
546 544 return 0
547 545
548 546
549 547 def _gather_revlog(files_data):
550 548 """group files per revlog prefix
551 549
552 550 The returns a two level nested dict. The top level key is the revlog prefix
553 551 without extension, the second level is all the file "suffix" that were
554 552 seen for this revlog and arbitrary file data as value.
555 553 """
556 554 revlogs = collections.defaultdict(dict)
557 555 for u, value in files_data:
558 556 name, ext = _split_revlog_ext(u)
559 557 revlogs[name][ext] = value
560 558 return sorted(revlogs.items())
561 559
562 560
563 561 def _split_revlog_ext(filename):
564 562 """split the revlog file prefix from the variable extension"""
565 563 if filename.endswith(REVLOG_FILES_LONG_EXT):
566 564 char = b'-'
567 565 else:
568 566 char = b'.'
569 567 idx = filename.rfind(char)
570 568 return filename[:idx], filename[idx:]
571 569
572 570
573 571 def _ext_key(ext):
574 572 """a key to order revlog suffix
575 573
576 574 important to issue .i after other entry."""
577 575 # the only important part of this order is to keep the `.i` last.
578 576 if ext.endswith(b'.n'):
579 577 return (0, ext)
580 578 elif ext.endswith(b'.nd'):
581 579 return (10, ext)
582 580 elif ext.endswith(b'.d'):
583 581 return (20, ext)
584 582 elif ext.endswith(b'.i'):
585 583 return (50, ext)
586 584 else:
587 585 return (40, ext)
588 586
589 587
590 588 class basicstore:
591 589 '''base class for local repository stores'''
592 590
593 591 def __init__(self, path, vfstype):
594 592 vfs = vfstype(path)
595 593 self.path = vfs.base
596 594 self.createmode = _calcmode(vfs)
597 595 vfs.createmode = self.createmode
598 596 self.rawvfs = vfs
599 597 self.vfs = vfsmod.filtervfs(vfs, encodedir)
600 598 self.opener = self.vfs
601 599
602 600 def join(self, f):
603 601 return self.path + b'/' + encodedir(f)
604 602
605 603 def _walk(self, relpath, recurse, undecodable=None):
606 604 '''yields (revlog_type, unencoded, size)'''
607 605 path = self.path
608 606 if relpath:
609 607 path += b'/' + relpath
610 608 striplen = len(self.path) + 1
611 609 l = []
612 610 if self.rawvfs.isdir(path):
613 611 visit = [path]
614 612 readdir = self.rawvfs.readdir
615 613 while visit:
616 614 p = visit.pop()
617 615 for f, kind, st in readdir(p, stat=True):
618 616 fp = p + b'/' + f
619 617 rl_type = is_revlog(f, kind, st)
620 618 if rl_type is not None:
621 619 n = util.pconvert(fp[striplen:])
622 620 l.append((decodedir(n), (rl_type, st.st_size)))
623 621 elif kind == stat.S_IFDIR and recurse:
624 622 visit.append(fp)
625 623
626 624 l.sort()
627 625 return l
628 626
629 627 def changelog(self, trypending, concurrencychecker=None):
630 628 return changelog.changelog(
631 629 self.vfs,
632 630 trypending=trypending,
633 631 concurrencychecker=concurrencychecker,
634 632 )
635 633
636 634 def manifestlog(self, repo, storenarrowmatch):
637 635 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
638 636 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
639 637
640 638 def datafiles(
641 639 self, matcher=None, undecodable=None
642 640 ) -> Generator[BaseStoreEntry, None, None]:
643 641 """Like walk, but excluding the changelog and root manifest.
644 642
645 643 When [undecodable] is None, revlogs names that can't be
646 644 decoded cause an exception. When it is provided, it should
647 645 be a list and the filenames that can't be decoded are added
648 646 to it instead. This is very rarely needed."""
649 647 dirs = [
650 648 (b'data', FILEFLAGS_FILELOG),
651 649 (b'meta', FILEFLAGS_MANIFESTLOG),
652 650 ]
653 651 for base_dir, rl_type in dirs:
654 652 files = self._walk(base_dir, True, undecodable=undecodable)
655 653 files = (f for f in files if f[1][0] is not None)
656 654 for revlog, details in _gather_revlog(files):
657 655 for ext, (t, s) in sorted(details.items()):
658 656 u = revlog + ext
659 657 revlog_target_id = revlog.split(b'/', 1)[1]
660 658 yield RevlogStoreEntry(
661 659 unencoded_path=u,
662 660 revlog_type=rl_type,
663 661 target_id=revlog_target_id,
664 662 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
665 663 is_volatile=bool(t & FILEFLAGS_VOLATILE),
666 664 file_size=s,
667 665 )
668 666
669 667 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
670 668 files = reversed(self._walk(b'', False))
671 669
672 670 changelogs = collections.defaultdict(dict)
673 671 manifestlogs = collections.defaultdict(dict)
674 672
675 673 for u, (t, s) in files:
676 674 if u.startswith(b'00changelog'):
677 675 name, ext = _split_revlog_ext(u)
678 676 changelogs[name][ext] = (t, s)
679 677 elif u.startswith(b'00manifest'):
680 678 name, ext = _split_revlog_ext(u)
681 679 manifestlogs[name][ext] = (t, s)
682 680 else:
683 681 yield SimpleStoreEntry(
684 682 unencoded_path=u,
685 683 is_volatile=bool(t & FILEFLAGS_VOLATILE),
686 684 file_size=s,
687 685 )
688 686 # yield manifest before changelog
689 687 top_rl = [
690 688 (manifestlogs, FILEFLAGS_MANIFESTLOG),
691 689 (changelogs, FILEFLAGS_CHANGELOG),
692 690 ]
693 691 assert len(manifestlogs) <= 1
694 692 assert len(changelogs) <= 1
695 693 for data, revlog_type in top_rl:
696 694 for revlog, details in sorted(data.items()):
697 695 # (keeping ordering so we get 00changelog.i last)
698 696 key = lambda x: _ext_key(x[0])
699 697 for ext, (t, s) in sorted(details.items(), key=key):
700 698 u = revlog + ext
701 699 yield RevlogStoreEntry(
702 700 unencoded_path=u,
703 701 revlog_type=revlog_type,
704 702 target_id=b'',
705 703 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
706 704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
707 705 file_size=s,
708 706 )
709 707
710 708 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
711 709 """return files related to data storage (ie: revlogs)
712 710
713 711 yields (file_type, unencoded, size)
714 712
715 713 if a matcher is passed, storage files of only those tracked paths
716 714 are passed with matches the matcher
717 715 """
718 716 # yield data files first
719 717 for x in self.datafiles(matcher):
720 718 yield x
721 719 for x in self.topfiles():
722 720 yield x
723 721
724 722 def copylist(self):
725 723 return _data
726 724
727 725 def write(self, tr):
728 726 pass
729 727
730 728 def invalidatecaches(self):
731 729 pass
732 730
733 731 def markremoved(self, fn):
734 732 pass
735 733
736 734 def __contains__(self, path):
737 735 '''Checks if the store contains path'''
738 736 path = b"/".join((b"data", path))
739 737 # file?
740 738 if self.vfs.exists(path + b".i"):
741 739 return True
742 740 # dir?
743 741 if not path.endswith(b"/"):
744 742 path = path + b"/"
745 743 return self.vfs.exists(path)
746 744
747 745
748 746 class encodedstore(basicstore):
749 747 def __init__(self, path, vfstype):
750 748 vfs = vfstype(path + b'/store')
751 749 self.path = vfs.base
752 750 self.createmode = _calcmode(vfs)
753 751 vfs.createmode = self.createmode
754 752 self.rawvfs = vfs
755 753 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
756 754 self.opener = self.vfs
757 755
758 756 def _walk(self, relpath, recurse, undecodable=None):
759 757 old = super()._walk(relpath, recurse)
760 758 new = []
761 759 for f1, value in old:
762 760 try:
763 761 f2 = decodefilename(f1)
764 762 except KeyError:
765 763 if undecodable is None:
766 764 msg = _(b'undecodable revlog name %s') % f1
767 765 raise error.StorageError(msg)
768 766 else:
769 767 undecodable.append(f1)
770 768 continue
771 769 new.append((f2, value))
772 770 return new
773 771
774 772 def datafiles(
775 773 self, matcher=None, undecodable=None
776 774 ) -> Generator[BaseStoreEntry, None, None]:
777 775 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
778 776 for entry in entries:
779 if _matchtrackedpath(entry.unencoded_path, matcher):
777 if _match_tracked_entry(entry, matcher):
780 778 yield entry
781 779
782 780 def join(self, f):
783 781 return self.path + b'/' + encodefilename(f)
784 782
785 783 def copylist(self):
786 784 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
787 785
788 786
789 787 class fncache:
790 788 # the filename used to be partially encoded
791 789 # hence the encodedir/decodedir dance
792 790 def __init__(self, vfs):
793 791 self.vfs = vfs
794 792 self._ignores = set()
795 793 self.entries = None
796 794 self._dirty = False
797 795 # set of new additions to fncache
798 796 self.addls = set()
799 797
800 798 def ensureloaded(self, warn=None):
801 799 """read the fncache file if not already read.
802 800
803 801 If the file on disk is corrupted, raise. If warn is provided,
804 802 warn and keep going instead."""
805 803 if self.entries is None:
806 804 self._load(warn)
807 805
808 806 def _load(self, warn=None):
809 807 '''fill the entries from the fncache file'''
810 808 self._dirty = False
811 809 try:
812 810 fp = self.vfs(b'fncache', mode=b'rb')
813 811 except IOError:
814 812 # skip nonexistent file
815 813 self.entries = set()
816 814 return
817 815
818 816 self.entries = set()
819 817 chunk = b''
820 818 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
821 819 chunk += c
822 820 try:
823 821 p = chunk.rindex(b'\n')
824 822 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
825 823 chunk = chunk[p + 1 :]
826 824 except ValueError:
827 825 # substring '\n' not found, maybe the entry is bigger than the
828 826 # chunksize, so let's keep iterating
829 827 pass
830 828
831 829 if chunk:
832 830 msg = _(b"fncache does not ends with a newline")
833 831 if warn:
834 832 warn(msg + b'\n')
835 833 else:
836 834 raise error.Abort(
837 835 msg,
838 836 hint=_(
839 837 b"use 'hg debugrebuildfncache' to "
840 838 b"rebuild the fncache"
841 839 ),
842 840 )
843 841 self._checkentries(fp, warn)
844 842 fp.close()
845 843
846 844 def _checkentries(self, fp, warn):
847 845 """make sure there is no empty string in entries"""
848 846 if b'' in self.entries:
849 847 fp.seek(0)
850 848 for n, line in enumerate(fp):
851 849 if not line.rstrip(b'\n'):
852 850 t = _(b'invalid entry in fncache, line %d') % (n + 1)
853 851 if warn:
854 852 warn(t + b'\n')
855 853 else:
856 854 raise error.Abort(t)
857 855
858 856 def write(self, tr):
859 857 if self._dirty:
860 858 assert self.entries is not None
861 859 self.entries = self.entries | self.addls
862 860 self.addls = set()
863 861 tr.addbackup(b'fncache')
864 862 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
865 863 if self.entries:
866 864 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
867 865 fp.close()
868 866 self._dirty = False
869 867 if self.addls:
870 868 # if we have just new entries, let's append them to the fncache
871 869 tr.addbackup(b'fncache')
872 870 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
873 871 if self.addls:
874 872 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
875 873 fp.close()
876 874 self.entries = None
877 875 self.addls = set()
878 876
879 877 def addignore(self, fn):
880 878 self._ignores.add(fn)
881 879
882 880 def add(self, fn):
883 881 if fn in self._ignores:
884 882 return
885 883 if self.entries is None:
886 884 self._load()
887 885 if fn not in self.entries:
888 886 self.addls.add(fn)
889 887
890 888 def remove(self, fn):
891 889 if self.entries is None:
892 890 self._load()
893 891 if fn in self.addls:
894 892 self.addls.remove(fn)
895 893 return
896 894 try:
897 895 self.entries.remove(fn)
898 896 self._dirty = True
899 897 except KeyError:
900 898 pass
901 899
902 900 def __contains__(self, fn):
903 901 if fn in self.addls:
904 902 return True
905 903 if self.entries is None:
906 904 self._load()
907 905 return fn in self.entries
908 906
909 907 def __iter__(self):
910 908 if self.entries is None:
911 909 self._load()
912 910 return iter(self.entries | self.addls)
913 911
914 912
915 913 class _fncachevfs(vfsmod.proxyvfs):
916 914 def __init__(self, vfs, fnc, encode):
917 915 vfsmod.proxyvfs.__init__(self, vfs)
918 916 self.fncache = fnc
919 917 self.encode = encode
920 918
921 919 def __call__(self, path, mode=b'r', *args, **kw):
922 920 encoded = self.encode(path)
923 921 if (
924 922 mode not in (b'r', b'rb')
925 923 and (path.startswith(b'data/') or path.startswith(b'meta/'))
926 924 and revlog_type(path) is not None
927 925 ):
928 926 # do not trigger a fncache load when adding a file that already is
929 927 # known to exist.
930 928 notload = self.fncache.entries is None and self.vfs.exists(encoded)
931 929 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
932 930 # when appending to an existing file, if the file has size zero,
933 931 # it should be considered as missing. Such zero-size files are
934 932 # the result of truncation when a transaction is aborted.
935 933 notload = False
936 934 if not notload:
937 935 self.fncache.add(path)
938 936 return self.vfs(encoded, mode, *args, **kw)
939 937
940 938 def join(self, path):
941 939 if path:
942 940 return self.vfs.join(self.encode(path))
943 941 else:
944 942 return self.vfs.join(path)
945 943
946 944 def register_file(self, path):
947 945 """generic hook point to lets fncache steer its stew"""
948 946 if path.startswith(b'data/') or path.startswith(b'meta/'):
949 947 self.fncache.add(path)
950 948
951 949
952 950 class fncachestore(basicstore):
953 951 def __init__(self, path, vfstype, dotencode):
954 952 if dotencode:
955 953 encode = _pathencode
956 954 else:
957 955 encode = _plainhybridencode
958 956 self.encode = encode
959 957 vfs = vfstype(path + b'/store')
960 958 self.path = vfs.base
961 959 self.pathsep = self.path + b'/'
962 960 self.createmode = _calcmode(vfs)
963 961 vfs.createmode = self.createmode
964 962 self.rawvfs = vfs
965 963 fnc = fncache(vfs)
966 964 self.fncache = fnc
967 965 self.vfs = _fncachevfs(vfs, fnc, encode)
968 966 self.opener = self.vfs
969 967
970 968 def join(self, f):
971 969 return self.pathsep + self.encode(f)
972 970
973 971 def getsize(self, path):
974 972 return self.rawvfs.stat(path).st_size
975 973
976 974 def datafiles(
977 975 self, matcher=None, undecodable=None
978 976 ) -> Generator[BaseStoreEntry, None, None]:
979 977 files = ((f, revlog_type(f)) for f in self.fncache)
980 978 # Note: all files in fncache should be revlog related, However the
981 979 # fncache might contains such file added by previous version of
982 980 # Mercurial.
983 981 files = (f for f in files if f[1] is not None)
984 982 by_revlog = _gather_revlog(files)
985 983 for revlog, details in by_revlog:
986 984 if revlog.startswith(b'data/'):
987 985 rl_type = FILEFLAGS_FILELOG
988 986 revlog_target_id = revlog.split(b'/', 1)[1]
989 987 elif revlog.startswith(b'meta/'):
990 988 rl_type = FILEFLAGS_MANIFESTLOG
991 989 # drop the initial directory and the `00manifest` file part
992 990 tmp = revlog.split(b'/', 1)[1]
993 991 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
994 992 else:
995 993 # unreachable
996 994 assert False, revlog
997 995 for ext, t in sorted(details.items()):
998 996 f = revlog + ext
999 if not _matchtrackedpath(f, matcher):
1000 continue
1001 yield RevlogStoreEntry(
997 entry = RevlogStoreEntry(
1002 998 unencoded_path=f,
1003 999 revlog_type=rl_type,
1004 1000 target_id=revlog_target_id,
1005 1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1006 1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1007 1003 )
1004 if _match_tracked_entry(entry, matcher):
1005 yield entry
1008 1006
1009 1007 def copylist(self):
1010 1008 d = (
1011 1009 b'bookmarks',
1012 1010 b'narrowspec',
1013 1011 b'data',
1014 1012 b'meta',
1015 1013 b'dh',
1016 1014 b'fncache',
1017 1015 b'phaseroots',
1018 1016 b'obsstore',
1019 1017 b'00manifest.d',
1020 1018 b'00manifest.i',
1021 1019 b'00changelog.d',
1022 1020 b'00changelog.i',
1023 1021 b'requires',
1024 1022 )
1025 1023 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1026 1024
1027 1025 def write(self, tr):
1028 1026 self.fncache.write(tr)
1029 1027
1030 1028 def invalidatecaches(self):
1031 1029 self.fncache.entries = None
1032 1030 self.fncache.addls = set()
1033 1031
1034 1032 def markremoved(self, fn):
1035 1033 self.fncache.remove(fn)
1036 1034
1037 1035 def _exists(self, f):
1038 1036 ef = self.encode(f)
1039 1037 try:
1040 1038 self.getsize(ef)
1041 1039 return True
1042 1040 except FileNotFoundError:
1043 1041 return False
1044 1042
1045 1043 def __contains__(self, path):
1046 1044 '''Checks if the store contains path'''
1047 1045 path = b"/".join((b"data", path))
1048 1046 # check for files (exact match)
1049 1047 e = path + b'.i'
1050 1048 if e in self.fncache and self._exists(e):
1051 1049 return True
1052 1050 # now check for directories (prefix match)
1053 1051 if not path.endswith(b'/'):
1054 1052 path += b'/'
1055 1053 for e in self.fncache:
1056 1054 if e.startswith(path) and self._exists(e):
1057 1055 return True
1058 1056 return False
General Comments 0
You need to be logged in to leave comments. Login now