##// END OF EJS Templates
store: introduce boolean property for revlog type...
marmoute -
r51390:66c55696 default
parent child Browse files
Show More
@@ -1,1067 +1,1079 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _match_tracked_entry(entry, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 45 return matcher(entry.target_id)
46 46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 49
50 50
51 51 # This avoids a collision between a file named foo and a dir named
52 52 # foo.i or foo.d
53 53 def _encodedir(path):
54 54 """
55 55 >>> _encodedir(b'data/foo.i')
56 56 'data/foo.i'
57 57 >>> _encodedir(b'data/foo.i/bla.i')
58 58 'data/foo.i.hg/bla.i'
59 59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 60 'data/foo.i.hg.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 63 """
64 64 return (
65 65 path.replace(b".hg/", b".hg.hg/")
66 66 .replace(b".i/", b".i.hg/")
67 67 .replace(b".d/", b".d.hg/")
68 68 )
69 69
70 70
71 71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 72
73 73
74 74 def decodedir(path):
75 75 """
76 76 >>> decodedir(b'data/foo.i')
77 77 'data/foo.i'
78 78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 79 'data/foo.i/bla.i'
80 80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 81 'data/foo.i.hg/bla.i'
82 82 """
83 83 if b".hg/" not in path:
84 84 return path
85 85 return (
86 86 path.replace(b".d.hg/", b".d/")
87 87 .replace(b".i.hg/", b".i/")
88 88 .replace(b".hg.hg/", b".hg/")
89 89 )
90 90
91 91
92 92 def _reserved():
93 93 """characters that are problematic for filesystems
94 94
95 95 * ascii escapes (0..31)
96 96 * ascii hi (126..255)
97 97 * windows specials
98 98
99 99 these characters will be escaped by encodefunctions
100 100 """
101 101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 102 for x in range(32):
103 103 yield x
104 104 for x in range(126, 256):
105 105 yield x
106 106 for x in winreserved:
107 107 yield x
108 108
109 109
110 110 def _buildencodefun():
111 111 """
112 112 >>> enc, dec = _buildencodefun()
113 113
114 114 >>> enc(b'nothing/special.txt')
115 115 'nothing/special.txt'
116 116 >>> dec(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118
119 119 >>> enc(b'HELLO')
120 120 '_h_e_l_l_o'
121 121 >>> dec(b'_h_e_l_l_o')
122 122 'HELLO'
123 123
124 124 >>> enc(b'hello:world?')
125 125 'hello~3aworld~3f'
126 126 >>> dec(b'hello~3aworld~3f')
127 127 'hello:world?'
128 128
129 129 >>> enc(b'the\\x07quick\\xADshot')
130 130 'the~07quick~adshot'
131 131 >>> dec(b'the~07quick~adshot')
132 132 'the\\x07quick\\xadshot'
133 133 """
134 134 e = b'_'
135 135 xchr = pycompat.bytechr
136 136 asciistr = list(map(xchr, range(127)))
137 137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 138
139 139 cmap = {x: x for x in asciistr}
140 140 for x in _reserved():
141 141 cmap[xchr(x)] = b"~%02x" % x
142 142 for x in capitals + [ord(e)]:
143 143 cmap[xchr(x)] = e + xchr(x).lower()
144 144
145 145 dmap = {}
146 146 for k, v in cmap.items():
147 147 dmap[v] = k
148 148
149 149 def decode(s):
150 150 i = 0
151 151 while i < len(s):
152 152 for l in range(1, 4):
153 153 try:
154 154 yield dmap[s[i : i + l]]
155 155 i += l
156 156 break
157 157 except KeyError:
158 158 pass
159 159 else:
160 160 raise KeyError
161 161
162 162 return (
163 163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 164 lambda s: b''.join(list(decode(s))),
165 165 )
166 166
167 167
168 168 _encodefname, _decodefname = _buildencodefun()
169 169
170 170
171 171 def encodefilename(s):
172 172 """
173 173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 175 """
176 176 return _encodefname(encodedir(s))
177 177
178 178
179 179 def decodefilename(s):
180 180 """
181 181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 183 """
184 184 return decodedir(_decodefname(s))
185 185
186 186
187 187 def _buildlowerencodefun():
188 188 """
189 189 >>> f = _buildlowerencodefun()
190 190 >>> f(b'nothing/special.txt')
191 191 'nothing/special.txt'
192 192 >>> f(b'HELLO')
193 193 'hello'
194 194 >>> f(b'hello:world?')
195 195 'hello~3aworld~3f'
196 196 >>> f(b'the\\x07quick\\xADshot')
197 197 'the~07quick~adshot'
198 198 """
199 199 xchr = pycompat.bytechr
200 200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 201 for x in _reserved():
202 202 cmap[xchr(x)] = b"~%02x" % x
203 203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 204 cmap[xchr(x)] = xchr(x).lower()
205 205
206 206 def lowerencode(s):
207 207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 208
209 209 return lowerencode
210 210
211 211
212 212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 213
214 214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 217
218 218
219 219 def _auxencode(path, dotencode):
220 220 """
221 221 Encodes filenames containing names reserved by Windows or which end in
222 222 period or space. Does not touch other single reserved characters c.
223 223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 224 Additionally encodes space or period at the beginning, if dotencode is
225 225 True. Parameter path is assumed to be all lowercase.
226 226 A segment only needs encoding if a reserved name appears as a
227 227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 228 doesn't need encoding.
229 229
230 230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 231 >>> _auxencode(s.split(b'/'), True)
232 232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 234 >>> _auxencode(s.split(b'/'), False)
235 235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 236 >>> _auxencode([b'foo. '], True)
237 237 ['foo.~20']
238 238 >>> _auxencode([b' .foo'], True)
239 239 ['~20.foo']
240 240 """
241 241 for i, n in enumerate(path):
242 242 if not n:
243 243 continue
244 244 if dotencode and n[0] in b'. ':
245 245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 246 path[i] = n
247 247 else:
248 248 l = n.find(b'.')
249 249 if l == -1:
250 250 l = len(n)
251 251 if (l == 3 and n[:3] in _winres3) or (
252 252 l == 4
253 253 and n[3:4] <= b'9'
254 254 and n[3:4] >= b'1'
255 255 and n[:3] in _winres4
256 256 ):
257 257 # encode third letter ('aux' -> 'au~78')
258 258 ec = b"~%02x" % ord(n[2:3])
259 259 n = n[0:2] + ec + n[3:]
260 260 path[i] = n
261 261 if n[-1] in b'. ':
262 262 # encode last period or space ('foo...' -> 'foo..~2e')
263 263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 264 return path
265 265
266 266
267 267 _maxstorepathlen = 120
268 268 _dirprefixlen = 8
269 269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 270
271 271
272 272 def _hashencode(path, dotencode):
273 273 digest = hex(hashutil.sha1(path).digest())
274 274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 275 parts = _auxencode(le, dotencode)
276 276 basename = parts[-1]
277 277 _root, ext = os.path.splitext(basename)
278 278 sdirs = []
279 279 sdirslen = 0
280 280 for p in parts[:-1]:
281 281 d = p[:_dirprefixlen]
282 282 if d[-1] in b'. ':
283 283 # Windows can't access dirs ending in period or space
284 284 d = d[:-1] + b'_'
285 285 if sdirslen == 0:
286 286 t = len(d)
287 287 else:
288 288 t = sdirslen + 1 + len(d)
289 289 if t > _maxshortdirslen:
290 290 break
291 291 sdirs.append(d)
292 292 sdirslen = t
293 293 dirs = b'/'.join(sdirs)
294 294 if len(dirs) > 0:
295 295 dirs += b'/'
296 296 res = b'dh/' + dirs + digest + ext
297 297 spaceleft = _maxstorepathlen - len(res)
298 298 if spaceleft > 0:
299 299 filler = basename[:spaceleft]
300 300 res = b'dh/' + dirs + filler + digest + ext
301 301 return res
302 302
303 303
304 304 def _hybridencode(path, dotencode):
305 305 """encodes path with a length limit
306 306
307 307 Encodes all paths that begin with 'data/', according to the following.
308 308
309 309 Default encoding (reversible):
310 310
311 311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 312 characters are encoded as '~xx', where xx is the two digit hex code
313 313 of the character (see encodefilename).
314 314 Relevant path components consisting of Windows reserved filenames are
315 315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 316
317 317 Hashed encoding (not reversible):
318 318
319 319 If the default-encoded path is longer than _maxstorepathlen, a
320 320 non-reversible hybrid hashing of the path is done instead.
321 321 This encoding uses up to _dirprefixlen characters of all directory
322 322 levels of the lowerencoded path, but not more levels than can fit into
323 323 _maxshortdirslen.
324 324 Then follows the filler followed by the sha digest of the full path.
325 325 The filler is the beginning of the basename of the lowerencoded path
326 326 (the basename is everything after the last path separator). The filler
327 327 is as long as possible, filling in characters from the basename until
328 328 the encoded path has _maxstorepathlen characters (or all chars of the
329 329 basename have been taken).
330 330 The extension (e.g. '.i' or '.d') is preserved.
331 331
332 332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 333 encoding was used.
334 334 """
335 335 path = encodedir(path)
336 336 ef = _encodefname(path).split(b'/')
337 337 res = b'/'.join(_auxencode(ef, dotencode))
338 338 if len(res) > _maxstorepathlen:
339 339 res = _hashencode(path, dotencode)
340 340 return res
341 341
342 342
343 343 def _pathencode(path):
344 344 de = encodedir(path)
345 345 if len(path) > _maxstorepathlen:
346 346 return _hashencode(de, True)
347 347 ef = _encodefname(de).split(b'/')
348 348 res = b'/'.join(_auxencode(ef, True))
349 349 if len(res) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 return res
352 352
353 353
354 354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 355
356 356
357 357 def _plainhybridencode(f):
358 358 return _hybridencode(f, False)
359 359
360 360
361 361 def _calcmode(vfs):
362 362 try:
363 363 # files in .hg/ will be created using this mode
364 364 mode = vfs.stat().st_mode
365 365 # avoid some useless chmods
366 366 if (0o777 & ~util.umask) == (0o777 & mode):
367 367 mode = None
368 368 except OSError:
369 369 mode = None
370 370 return mode
371 371
372 372
373 373 _data = [
374 374 b'bookmarks',
375 375 b'narrowspec',
376 376 b'data',
377 377 b'meta',
378 378 b'00manifest.d',
379 379 b'00manifest.i',
380 380 b'00changelog.d',
381 381 b'00changelog.i',
382 382 b'phaseroots',
383 383 b'obsstore',
384 384 b'requires',
385 385 ]
386 386
387 387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 388 REVLOG_FILES_OTHER_EXT = (
389 389 b'.idx',
390 390 b'.d',
391 391 b'.dat',
392 392 b'.n',
393 393 b'.nd',
394 394 b'.sda',
395 395 )
396 396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 397 REVLOG_FILES_LONG_EXT = (
398 398 b'.nd',
399 399 b'.idx',
400 400 b'.dat',
401 401 b'.sda',
402 402 )
403 403 # files that are "volatile" and might change between listing and streaming
404 404 #
405 405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 406 # deleted.
407 407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 408
409 409 # some exception to the above matching
410 410 #
411 411 # XXX This is currently not in use because of issue6542
412 412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 413
414 414
415 415 def is_revlog(f, kind, st):
416 416 if kind != stat.S_IFREG:
417 417 return None
418 418 return revlog_type(f)
419 419
420 420
421 421 def revlog_type(f):
422 422 # XXX we need to filter `undo.` created by the transaction here, however
423 423 # being naive about it also filter revlog for `undo.*` files, leading to
424 424 # issue6542. So we no longer use EXCLUDED.
425 425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 426 return FILEFLAGS_REVLOG_MAIN
427 427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 428 t = FILETYPE_FILELOG_OTHER
429 429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 430 t |= FILEFLAGS_VOLATILE
431 431 return t
432 432 return None
433 433
434 434
435 435 # the file is part of changelog data
436 436 FILEFLAGS_CHANGELOG = 1 << 13
437 437 # the file is part of manifest data
438 438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 439 # the file is part of filelog data
440 440 FILEFLAGS_FILELOG = 1 << 11
441 441 # file that are not directly part of a revlog
442 442 FILEFLAGS_OTHER = 1 << 10
443 443
444 444 # the main entry point for a revlog
445 445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 446 # a secondary file for a revlog
447 447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 448
449 449 # files that are "volatile" and might change between listing and streaming
450 450 FILEFLAGS_VOLATILE = 1 << 20
451 451
452 452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_OTHER = FILEFLAGS_OTHER
459 459
460 460
461 461 @attr.s(slots=True, init=False)
462 462 class BaseStoreEntry:
463 463 """An entry in the store
464 464
465 465 This is returned by `store.walk` and represent some data in the store."""
466 466
467 467
468 468 @attr.s(slots=True, init=False)
469 469 class SimpleStoreEntry(BaseStoreEntry):
470 470 """A generic entry in the store"""
471 471
472 472 is_revlog = False
473 473
474 474 _entry_path = attr.ib()
475 475 _is_volatile = attr.ib(default=False)
476 476 _file_size = attr.ib(default=None)
477 477
478 478 def __init__(
479 479 self,
480 480 entry_path,
481 481 is_volatile=False,
482 482 file_size=None,
483 483 ):
484 484 super().__init__()
485 485 self._entry_path = entry_path
486 486 self._is_volatile = is_volatile
487 487 self._file_size = file_size
488 488
489 489 def files(self):
490 490 return [
491 491 StoreFile(
492 492 unencoded_path=self._entry_path,
493 493 file_size=self._file_size,
494 494 is_volatile=self._is_volatile,
495 495 )
496 496 ]
497 497
498 498
499 499 @attr.s(slots=True, init=False)
500 500 class RevlogStoreEntry(BaseStoreEntry):
501 501 """A revlog entry in the store"""
502 502
503 503 is_revlog = True
504 504
505 505 revlog_type = attr.ib(default=None)
506 506 target_id = attr.ib(default=None)
507 507 _path_prefix = attr.ib(default=None)
508 508 _details = attr.ib(default=None)
509 509
510 510 def __init__(
511 511 self,
512 512 revlog_type,
513 513 path_prefix,
514 514 target_id,
515 515 details,
516 516 ):
517 517 super().__init__()
518 518 self.revlog_type = revlog_type
519 519 self.target_id = target_id
520 520 self._path_prefix = path_prefix
521 521 assert b'.i' in details, (path_prefix, details)
522 522 self._details = details
523 523
524 @property
525 def is_changelog(self):
526 return self.revlog_type & FILEFLAGS_CHANGELOG
527
528 @property
529 def is_manifestlog(self):
530 return self.revlog_type & FILEFLAGS_MANIFESTLOG
531
532 @property
533 def is_filelog(self):
534 return self.revlog_type & FILEFLAGS_FILELOG
535
524 536 def main_file_path(self):
525 537 """unencoded path of the main revlog file"""
526 538 return self._path_prefix + b'.i'
527 539
528 540 def files(self):
529 541 files = []
530 542 for ext in sorted(self._details, key=_ext_key):
531 543 path = self._path_prefix + ext
532 544 data = self._details[ext]
533 545 files.append(StoreFile(unencoded_path=path, **data))
534 546 return files
535 547
536 548
537 549 @attr.s(slots=True)
538 550 class StoreFile:
539 551 """a file matching an entry"""
540 552
541 553 unencoded_path = attr.ib()
542 554 _file_size = attr.ib(default=None)
543 555 is_volatile = attr.ib(default=False)
544 556
545 557 def file_size(self, vfs):
546 558 if self._file_size is not None:
547 559 return self._file_size
548 560 try:
549 561 return vfs.stat(self.unencoded_path).st_size
550 562 except FileNotFoundError:
551 563 return 0
552 564
553 565
554 566 def _gather_revlog(files_data):
555 567 """group files per revlog prefix
556 568
557 569 The returns a two level nested dict. The top level key is the revlog prefix
558 570 without extension, the second level is all the file "suffix" that were
559 571 seen for this revlog and arbitrary file data as value.
560 572 """
561 573 revlogs = collections.defaultdict(dict)
562 574 for u, value in files_data:
563 575 name, ext = _split_revlog_ext(u)
564 576 revlogs[name][ext] = value
565 577 return sorted(revlogs.items())
566 578
567 579
568 580 def _split_revlog_ext(filename):
569 581 """split the revlog file prefix from the variable extension"""
570 582 if filename.endswith(REVLOG_FILES_LONG_EXT):
571 583 char = b'-'
572 584 else:
573 585 char = b'.'
574 586 idx = filename.rfind(char)
575 587 return filename[:idx], filename[idx:]
576 588
577 589
578 590 def _ext_key(ext):
579 591 """a key to order revlog suffix
580 592
581 593 important to issue .i after other entry."""
582 594 # the only important part of this order is to keep the `.i` last.
583 595 if ext.endswith(b'.n'):
584 596 return (0, ext)
585 597 elif ext.endswith(b'.nd'):
586 598 return (10, ext)
587 599 elif ext.endswith(b'.d'):
588 600 return (20, ext)
589 601 elif ext.endswith(b'.i'):
590 602 return (50, ext)
591 603 else:
592 604 return (40, ext)
593 605
594 606
595 607 class basicstore:
596 608 '''base class for local repository stores'''
597 609
598 610 def __init__(self, path, vfstype):
599 611 vfs = vfstype(path)
600 612 self.path = vfs.base
601 613 self.createmode = _calcmode(vfs)
602 614 vfs.createmode = self.createmode
603 615 self.rawvfs = vfs
604 616 self.vfs = vfsmod.filtervfs(vfs, encodedir)
605 617 self.opener = self.vfs
606 618
607 619 def join(self, f):
608 620 return self.path + b'/' + encodedir(f)
609 621
610 622 def _walk(self, relpath, recurse, undecodable=None):
611 623 '''yields (revlog_type, unencoded, size)'''
612 624 path = self.path
613 625 if relpath:
614 626 path += b'/' + relpath
615 627 striplen = len(self.path) + 1
616 628 l = []
617 629 if self.rawvfs.isdir(path):
618 630 visit = [path]
619 631 readdir = self.rawvfs.readdir
620 632 while visit:
621 633 p = visit.pop()
622 634 for f, kind, st in readdir(p, stat=True):
623 635 fp = p + b'/' + f
624 636 rl_type = is_revlog(f, kind, st)
625 637 if rl_type is not None:
626 638 n = util.pconvert(fp[striplen:])
627 639 l.append((decodedir(n), (rl_type, st.st_size)))
628 640 elif kind == stat.S_IFDIR and recurse:
629 641 visit.append(fp)
630 642
631 643 l.sort()
632 644 return l
633 645
634 646 def changelog(self, trypending, concurrencychecker=None):
635 647 return changelog.changelog(
636 648 self.vfs,
637 649 trypending=trypending,
638 650 concurrencychecker=concurrencychecker,
639 651 )
640 652
641 653 def manifestlog(self, repo, storenarrowmatch):
642 654 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
643 655 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
644 656
645 657 def datafiles(
646 658 self, matcher=None, undecodable=None
647 659 ) -> Generator[BaseStoreEntry, None, None]:
648 660 """Like walk, but excluding the changelog and root manifest.
649 661
650 662 When [undecodable] is None, revlogs names that can't be
651 663 decoded cause an exception. When it is provided, it should
652 664 be a list and the filenames that can't be decoded are added
653 665 to it instead. This is very rarely needed."""
654 666 dirs = [
655 667 (b'data', FILEFLAGS_FILELOG),
656 668 (b'meta', FILEFLAGS_MANIFESTLOG),
657 669 ]
658 670 for base_dir, rl_type in dirs:
659 671 files = self._walk(base_dir, True, undecodable=undecodable)
660 672 files = (f for f in files if f[1][0] is not None)
661 673 for revlog, details in _gather_revlog(files):
662 674 file_details = {}
663 675 revlog_target_id = revlog.split(b'/', 1)[1]
664 676 for ext, (t, s) in sorted(details.items()):
665 677 file_details[ext] = {
666 678 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
667 679 'file_size': s,
668 680 }
669 681 yield RevlogStoreEntry(
670 682 path_prefix=revlog,
671 683 revlog_type=rl_type,
672 684 target_id=revlog_target_id,
673 685 details=file_details,
674 686 )
675 687
676 688 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
677 689 files = reversed(self._walk(b'', False))
678 690
679 691 changelogs = collections.defaultdict(dict)
680 692 manifestlogs = collections.defaultdict(dict)
681 693
682 694 for u, (t, s) in files:
683 695 if u.startswith(b'00changelog'):
684 696 name, ext = _split_revlog_ext(u)
685 697 changelogs[name][ext] = (t, s)
686 698 elif u.startswith(b'00manifest'):
687 699 name, ext = _split_revlog_ext(u)
688 700 manifestlogs[name][ext] = (t, s)
689 701 else:
690 702 yield SimpleStoreEntry(
691 703 entry_path=u,
692 704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
693 705 file_size=s,
694 706 )
695 707 # yield manifest before changelog
696 708 top_rl = [
697 709 (manifestlogs, FILEFLAGS_MANIFESTLOG),
698 710 (changelogs, FILEFLAGS_CHANGELOG),
699 711 ]
700 712 assert len(manifestlogs) <= 1
701 713 assert len(changelogs) <= 1
702 714 for data, revlog_type in top_rl:
703 715 for revlog, details in sorted(data.items()):
704 716 file_details = {}
705 717 for ext, (t, s) in details.items():
706 718 file_details[ext] = {
707 719 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
708 720 'file_size': s,
709 721 }
710 722 yield RevlogStoreEntry(
711 723 path_prefix=revlog,
712 724 revlog_type=revlog_type,
713 725 target_id=b'',
714 726 details=file_details,
715 727 )
716 728
717 729 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
718 730 """return files related to data storage (ie: revlogs)
719 731
720 732 yields (file_type, unencoded, size)
721 733
722 734 if a matcher is passed, storage files of only those tracked paths
723 735 are passed with matches the matcher
724 736 """
725 737 # yield data files first
726 738 for x in self.datafiles(matcher):
727 739 yield x
728 740 for x in self.topfiles():
729 741 yield x
730 742
731 743 def copylist(self):
732 744 return _data
733 745
734 746 def write(self, tr):
735 747 pass
736 748
737 749 def invalidatecaches(self):
738 750 pass
739 751
740 752 def markremoved(self, fn):
741 753 pass
742 754
743 755 def __contains__(self, path):
744 756 '''Checks if the store contains path'''
745 757 path = b"/".join((b"data", path))
746 758 # file?
747 759 if self.vfs.exists(path + b".i"):
748 760 return True
749 761 # dir?
750 762 if not path.endswith(b"/"):
751 763 path = path + b"/"
752 764 return self.vfs.exists(path)
753 765
754 766
755 767 class encodedstore(basicstore):
756 768 def __init__(self, path, vfstype):
757 769 vfs = vfstype(path + b'/store')
758 770 self.path = vfs.base
759 771 self.createmode = _calcmode(vfs)
760 772 vfs.createmode = self.createmode
761 773 self.rawvfs = vfs
762 774 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
763 775 self.opener = self.vfs
764 776
765 777 def _walk(self, relpath, recurse, undecodable=None):
766 778 old = super()._walk(relpath, recurse)
767 779 new = []
768 780 for f1, value in old:
769 781 try:
770 782 f2 = decodefilename(f1)
771 783 except KeyError:
772 784 if undecodable is None:
773 785 msg = _(b'undecodable revlog name %s') % f1
774 786 raise error.StorageError(msg)
775 787 else:
776 788 undecodable.append(f1)
777 789 continue
778 790 new.append((f2, value))
779 791 return new
780 792
781 793 def datafiles(
782 794 self, matcher=None, undecodable=None
783 795 ) -> Generator[BaseStoreEntry, None, None]:
784 796 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
785 797 for entry in entries:
786 798 if _match_tracked_entry(entry, matcher):
787 799 yield entry
788 800
789 801 def join(self, f):
790 802 return self.path + b'/' + encodefilename(f)
791 803
792 804 def copylist(self):
793 805 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
794 806
795 807
796 808 class fncache:
797 809 # the filename used to be partially encoded
798 810 # hence the encodedir/decodedir dance
799 811 def __init__(self, vfs):
800 812 self.vfs = vfs
801 813 self._ignores = set()
802 814 self.entries = None
803 815 self._dirty = False
804 816 # set of new additions to fncache
805 817 self.addls = set()
806 818
807 819 def ensureloaded(self, warn=None):
808 820 """read the fncache file if not already read.
809 821
810 822 If the file on disk is corrupted, raise. If warn is provided,
811 823 warn and keep going instead."""
812 824 if self.entries is None:
813 825 self._load(warn)
814 826
815 827 def _load(self, warn=None):
816 828 '''fill the entries from the fncache file'''
817 829 self._dirty = False
818 830 try:
819 831 fp = self.vfs(b'fncache', mode=b'rb')
820 832 except IOError:
821 833 # skip nonexistent file
822 834 self.entries = set()
823 835 return
824 836
825 837 self.entries = set()
826 838 chunk = b''
827 839 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
828 840 chunk += c
829 841 try:
830 842 p = chunk.rindex(b'\n')
831 843 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
832 844 chunk = chunk[p + 1 :]
833 845 except ValueError:
834 846 # substring '\n' not found, maybe the entry is bigger than the
835 847 # chunksize, so let's keep iterating
836 848 pass
837 849
838 850 if chunk:
839 851 msg = _(b"fncache does not ends with a newline")
840 852 if warn:
841 853 warn(msg + b'\n')
842 854 else:
843 855 raise error.Abort(
844 856 msg,
845 857 hint=_(
846 858 b"use 'hg debugrebuildfncache' to "
847 859 b"rebuild the fncache"
848 860 ),
849 861 )
850 862 self._checkentries(fp, warn)
851 863 fp.close()
852 864
853 865 def _checkentries(self, fp, warn):
854 866 """make sure there is no empty string in entries"""
855 867 if b'' in self.entries:
856 868 fp.seek(0)
857 869 for n, line in enumerate(fp):
858 870 if not line.rstrip(b'\n'):
859 871 t = _(b'invalid entry in fncache, line %d') % (n + 1)
860 872 if warn:
861 873 warn(t + b'\n')
862 874 else:
863 875 raise error.Abort(t)
864 876
865 877 def write(self, tr):
866 878 if self._dirty:
867 879 assert self.entries is not None
868 880 self.entries = self.entries | self.addls
869 881 self.addls = set()
870 882 tr.addbackup(b'fncache')
871 883 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
872 884 if self.entries:
873 885 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
874 886 fp.close()
875 887 self._dirty = False
876 888 if self.addls:
877 889 # if we have just new entries, let's append them to the fncache
878 890 tr.addbackup(b'fncache')
879 891 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
880 892 if self.addls:
881 893 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
882 894 fp.close()
883 895 self.entries = None
884 896 self.addls = set()
885 897
886 898 def addignore(self, fn):
887 899 self._ignores.add(fn)
888 900
889 901 def add(self, fn):
890 902 if fn in self._ignores:
891 903 return
892 904 if self.entries is None:
893 905 self._load()
894 906 if fn not in self.entries:
895 907 self.addls.add(fn)
896 908
897 909 def remove(self, fn):
898 910 if self.entries is None:
899 911 self._load()
900 912 if fn in self.addls:
901 913 self.addls.remove(fn)
902 914 return
903 915 try:
904 916 self.entries.remove(fn)
905 917 self._dirty = True
906 918 except KeyError:
907 919 pass
908 920
909 921 def __contains__(self, fn):
910 922 if fn in self.addls:
911 923 return True
912 924 if self.entries is None:
913 925 self._load()
914 926 return fn in self.entries
915 927
916 928 def __iter__(self):
917 929 if self.entries is None:
918 930 self._load()
919 931 return iter(self.entries | self.addls)
920 932
921 933
922 934 class _fncachevfs(vfsmod.proxyvfs):
923 935 def __init__(self, vfs, fnc, encode):
924 936 vfsmod.proxyvfs.__init__(self, vfs)
925 937 self.fncache = fnc
926 938 self.encode = encode
927 939
928 940 def __call__(self, path, mode=b'r', *args, **kw):
929 941 encoded = self.encode(path)
930 942 if (
931 943 mode not in (b'r', b'rb')
932 944 and (path.startswith(b'data/') or path.startswith(b'meta/'))
933 945 and revlog_type(path) is not None
934 946 ):
935 947 # do not trigger a fncache load when adding a file that already is
936 948 # known to exist.
937 949 notload = self.fncache.entries is None and self.vfs.exists(encoded)
938 950 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
939 951 # when appending to an existing file, if the file has size zero,
940 952 # it should be considered as missing. Such zero-size files are
941 953 # the result of truncation when a transaction is aborted.
942 954 notload = False
943 955 if not notload:
944 956 self.fncache.add(path)
945 957 return self.vfs(encoded, mode, *args, **kw)
946 958
947 959 def join(self, path):
948 960 if path:
949 961 return self.vfs.join(self.encode(path))
950 962 else:
951 963 return self.vfs.join(path)
952 964
953 965 def register_file(self, path):
954 966 """generic hook point to lets fncache steer its stew"""
955 967 if path.startswith(b'data/') or path.startswith(b'meta/'):
956 968 self.fncache.add(path)
957 969
958 970
959 971 class fncachestore(basicstore):
960 972 def __init__(self, path, vfstype, dotencode):
961 973 if dotencode:
962 974 encode = _pathencode
963 975 else:
964 976 encode = _plainhybridencode
965 977 self.encode = encode
966 978 vfs = vfstype(path + b'/store')
967 979 self.path = vfs.base
968 980 self.pathsep = self.path + b'/'
969 981 self.createmode = _calcmode(vfs)
970 982 vfs.createmode = self.createmode
971 983 self.rawvfs = vfs
972 984 fnc = fncache(vfs)
973 985 self.fncache = fnc
974 986 self.vfs = _fncachevfs(vfs, fnc, encode)
975 987 self.opener = self.vfs
976 988
977 989 def join(self, f):
978 990 return self.pathsep + self.encode(f)
979 991
980 992 def getsize(self, path):
981 993 return self.rawvfs.stat(path).st_size
982 994
983 995 def datafiles(
984 996 self, matcher=None, undecodable=None
985 997 ) -> Generator[BaseStoreEntry, None, None]:
986 998 files = ((f, revlog_type(f)) for f in self.fncache)
987 999 # Note: all files in fncache should be revlog related, However the
988 1000 # fncache might contains such file added by previous version of
989 1001 # Mercurial.
990 1002 files = (f for f in files if f[1] is not None)
991 1003 by_revlog = _gather_revlog(files)
992 1004 for revlog, details in by_revlog:
993 1005 file_details = {}
994 1006 if revlog.startswith(b'data/'):
995 1007 rl_type = FILEFLAGS_FILELOG
996 1008 revlog_target_id = revlog.split(b'/', 1)[1]
997 1009 elif revlog.startswith(b'meta/'):
998 1010 rl_type = FILEFLAGS_MANIFESTLOG
999 1011 # drop the initial directory and the `00manifest` file part
1000 1012 tmp = revlog.split(b'/', 1)[1]
1001 1013 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1002 1014 else:
1003 1015 # unreachable
1004 1016 assert False, revlog
1005 1017 for ext, t in details.items():
1006 1018 file_details[ext] = {
1007 1019 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1008 1020 }
1009 1021 entry = RevlogStoreEntry(
1010 1022 path_prefix=revlog,
1011 1023 revlog_type=rl_type,
1012 1024 target_id=revlog_target_id,
1013 1025 details=file_details,
1014 1026 )
1015 1027 if _match_tracked_entry(entry, matcher):
1016 1028 yield entry
1017 1029
1018 1030 def copylist(self):
1019 1031 d = (
1020 1032 b'bookmarks',
1021 1033 b'narrowspec',
1022 1034 b'data',
1023 1035 b'meta',
1024 1036 b'dh',
1025 1037 b'fncache',
1026 1038 b'phaseroots',
1027 1039 b'obsstore',
1028 1040 b'00manifest.d',
1029 1041 b'00manifest.i',
1030 1042 b'00changelog.d',
1031 1043 b'00changelog.i',
1032 1044 b'requires',
1033 1045 )
1034 1046 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1035 1047
1036 1048 def write(self, tr):
1037 1049 self.fncache.write(tr)
1038 1050
1039 1051 def invalidatecaches(self):
1040 1052 self.fncache.entries = None
1041 1053 self.fncache.addls = set()
1042 1054
1043 1055 def markremoved(self, fn):
1044 1056 self.fncache.remove(fn)
1045 1057
1046 1058 def _exists(self, f):
1047 1059 ef = self.encode(f)
1048 1060 try:
1049 1061 self.getsize(ef)
1050 1062 return True
1051 1063 except FileNotFoundError:
1052 1064 return False
1053 1065
1054 1066 def __contains__(self, path):
1055 1067 '''Checks if the store contains path'''
1056 1068 path = b"/".join((b"data", path))
1057 1069 # check for files (exact match)
1058 1070 e = path + b'.i'
1059 1071 if e in self.fncache and self._exists(e):
1060 1072 return True
1061 1073 # now check for directories (prefix match)
1062 1074 if not path.endswith(b'/'):
1063 1075 path += b'/'
1064 1076 for e in self.fncache:
1065 1077 if e.startswith(path) and self._exists(e):
1066 1078 return True
1067 1079 return False
General Comments 0
You need to be logged in to leave comments. Login now