##// END OF EJS Templates
store: have the revlog determine which files are volatile itself...
marmoute -
r51561:6b522a9e default
parent child Browse files
Show More
@@ -1,1219 +1,1222 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 filelog,
23 23 manifest,
24 24 policy,
25 25 pycompat,
26 26 util,
27 27 vfs as vfsmod,
28 28 )
29 29 from .utils import hashutil
30 30
31 31 parsers = policy.importmod('parsers')
32 32 # how much bytes should be read from fncache in one read
33 33 # It is done to prevent loading large fncache files into memory
34 34 fncache_chunksize = 10 ** 6
35 35
36 36
37 37 def _match_tracked_entry(entry, matcher):
38 38 """parses a fncache entry and returns whether the entry is tracking a path
39 39 matched by matcher or not.
40 40
41 41 If matcher is None, returns True"""
42 42
43 43 if matcher is None:
44 44 return True
45 45 if entry.is_filelog:
46 46 return matcher(entry.target_id)
47 47 elif entry.is_manifestlog:
48 48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50 50
51 51
52 52 # This avoids a collision between a file named foo and a dir named
53 53 # foo.i or foo.d
54 54 def _encodedir(path):
55 55 """
56 56 >>> _encodedir(b'data/foo.i')
57 57 'data/foo.i'
58 58 >>> _encodedir(b'data/foo.i/bla.i')
59 59 'data/foo.i.hg/bla.i'
60 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 61 'data/foo.i.hg.hg/bla.i'
62 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 64 """
65 65 return (
66 66 path.replace(b".hg/", b".hg.hg/")
67 67 .replace(b".i/", b".i.hg/")
68 68 .replace(b".d/", b".d.hg/")
69 69 )
70 70
71 71
72 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 73
74 74
75 75 def decodedir(path):
76 76 """
77 77 >>> decodedir(b'data/foo.i')
78 78 'data/foo.i'
79 79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 80 'data/foo.i/bla.i'
81 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 82 'data/foo.i.hg/bla.i'
83 83 """
84 84 if b".hg/" not in path:
85 85 return path
86 86 return (
87 87 path.replace(b".d.hg/", b".d/")
88 88 .replace(b".i.hg/", b".i/")
89 89 .replace(b".hg.hg/", b".hg/")
90 90 )
91 91
92 92
93 93 def _reserved():
94 94 """characters that are problematic for filesystems
95 95
96 96 * ascii escapes (0..31)
97 97 * ascii hi (126..255)
98 98 * windows specials
99 99
100 100 these characters will be escaped by encodefunctions
101 101 """
102 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 103 for x in range(32):
104 104 yield x
105 105 for x in range(126, 256):
106 106 yield x
107 107 for x in winreserved:
108 108 yield x
109 109
110 110
111 111 def _buildencodefun():
112 112 """
113 113 >>> enc, dec = _buildencodefun()
114 114
115 115 >>> enc(b'nothing/special.txt')
116 116 'nothing/special.txt'
117 117 >>> dec(b'nothing/special.txt')
118 118 'nothing/special.txt'
119 119
120 120 >>> enc(b'HELLO')
121 121 '_h_e_l_l_o'
122 122 >>> dec(b'_h_e_l_l_o')
123 123 'HELLO'
124 124
125 125 >>> enc(b'hello:world?')
126 126 'hello~3aworld~3f'
127 127 >>> dec(b'hello~3aworld~3f')
128 128 'hello:world?'
129 129
130 130 >>> enc(b'the\\x07quick\\xADshot')
131 131 'the~07quick~adshot'
132 132 >>> dec(b'the~07quick~adshot')
133 133 'the\\x07quick\\xadshot'
134 134 """
135 135 e = b'_'
136 136 xchr = pycompat.bytechr
137 137 asciistr = list(map(xchr, range(127)))
138 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 139
140 140 cmap = {x: x for x in asciistr}
141 141 for x in _reserved():
142 142 cmap[xchr(x)] = b"~%02x" % x
143 143 for x in capitals + [ord(e)]:
144 144 cmap[xchr(x)] = e + xchr(x).lower()
145 145
146 146 dmap = {}
147 147 for k, v in cmap.items():
148 148 dmap[v] = k
149 149
150 150 def decode(s):
151 151 i = 0
152 152 while i < len(s):
153 153 for l in range(1, 4):
154 154 try:
155 155 yield dmap[s[i : i + l]]
156 156 i += l
157 157 break
158 158 except KeyError:
159 159 pass
160 160 else:
161 161 raise KeyError
162 162
163 163 return (
164 164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 165 lambda s: b''.join(list(decode(s))),
166 166 )
167 167
168 168
169 169 _encodefname, _decodefname = _buildencodefun()
170 170
171 171
172 172 def encodefilename(s):
173 173 """
174 174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 176 """
177 177 return _encodefname(encodedir(s))
178 178
179 179
180 180 def decodefilename(s):
181 181 """
182 182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 184 """
185 185 return decodedir(_decodefname(s))
186 186
187 187
188 188 def _buildlowerencodefun():
189 189 """
190 190 >>> f = _buildlowerencodefun()
191 191 >>> f(b'nothing/special.txt')
192 192 'nothing/special.txt'
193 193 >>> f(b'HELLO')
194 194 'hello'
195 195 >>> f(b'hello:world?')
196 196 'hello~3aworld~3f'
197 197 >>> f(b'the\\x07quick\\xADshot')
198 198 'the~07quick~adshot'
199 199 """
200 200 xchr = pycompat.bytechr
201 201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 202 for x in _reserved():
203 203 cmap[xchr(x)] = b"~%02x" % x
204 204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 205 cmap[xchr(x)] = xchr(x).lower()
206 206
207 207 def lowerencode(s):
208 208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 209
210 210 return lowerencode
211 211
212 212
213 213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 214
215 215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 218
219 219
220 220 def _auxencode(path, dotencode):
221 221 """
222 222 Encodes filenames containing names reserved by Windows or which end in
223 223 period or space. Does not touch other single reserved characters c.
224 224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 225 Additionally encodes space or period at the beginning, if dotencode is
226 226 True. Parameter path is assumed to be all lowercase.
227 227 A segment only needs encoding if a reserved name appears as a
228 228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 229 doesn't need encoding.
230 230
231 231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 232 >>> _auxencode(s.split(b'/'), True)
233 233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 235 >>> _auxencode(s.split(b'/'), False)
236 236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 237 >>> _auxencode([b'foo. '], True)
238 238 ['foo.~20']
239 239 >>> _auxencode([b' .foo'], True)
240 240 ['~20.foo']
241 241 """
242 242 for i, n in enumerate(path):
243 243 if not n:
244 244 continue
245 245 if dotencode and n[0] in b'. ':
246 246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 247 path[i] = n
248 248 else:
249 249 l = n.find(b'.')
250 250 if l == -1:
251 251 l = len(n)
252 252 if (l == 3 and n[:3] in _winres3) or (
253 253 l == 4
254 254 and n[3:4] <= b'9'
255 255 and n[3:4] >= b'1'
256 256 and n[:3] in _winres4
257 257 ):
258 258 # encode third letter ('aux' -> 'au~78')
259 259 ec = b"~%02x" % ord(n[2:3])
260 260 n = n[0:2] + ec + n[3:]
261 261 path[i] = n
262 262 if n[-1] in b'. ':
263 263 # encode last period or space ('foo...' -> 'foo..~2e')
264 264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 265 return path
266 266
267 267
268 268 _maxstorepathlen = 120
269 269 _dirprefixlen = 8
270 270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 271
272 272
273 273 def _hashencode(path, dotencode):
274 274 digest = hex(hashutil.sha1(path).digest())
275 275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 276 parts = _auxencode(le, dotencode)
277 277 basename = parts[-1]
278 278 _root, ext = os.path.splitext(basename)
279 279 sdirs = []
280 280 sdirslen = 0
281 281 for p in parts[:-1]:
282 282 d = p[:_dirprefixlen]
283 283 if d[-1] in b'. ':
284 284 # Windows can't access dirs ending in period or space
285 285 d = d[:-1] + b'_'
286 286 if sdirslen == 0:
287 287 t = len(d)
288 288 else:
289 289 t = sdirslen + 1 + len(d)
290 290 if t > _maxshortdirslen:
291 291 break
292 292 sdirs.append(d)
293 293 sdirslen = t
294 294 dirs = b'/'.join(sdirs)
295 295 if len(dirs) > 0:
296 296 dirs += b'/'
297 297 res = b'dh/' + dirs + digest + ext
298 298 spaceleft = _maxstorepathlen - len(res)
299 299 if spaceleft > 0:
300 300 filler = basename[:spaceleft]
301 301 res = b'dh/' + dirs + filler + digest + ext
302 302 return res
303 303
304 304
305 305 def _hybridencode(path, dotencode):
306 306 """encodes path with a length limit
307 307
308 308 Encodes all paths that begin with 'data/', according to the following.
309 309
310 310 Default encoding (reversible):
311 311
312 312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 313 characters are encoded as '~xx', where xx is the two digit hex code
314 314 of the character (see encodefilename).
315 315 Relevant path components consisting of Windows reserved filenames are
316 316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 317
318 318 Hashed encoding (not reversible):
319 319
320 320 If the default-encoded path is longer than _maxstorepathlen, a
321 321 non-reversible hybrid hashing of the path is done instead.
322 322 This encoding uses up to _dirprefixlen characters of all directory
323 323 levels of the lowerencoded path, but not more levels than can fit into
324 324 _maxshortdirslen.
325 325 Then follows the filler followed by the sha digest of the full path.
326 326 The filler is the beginning of the basename of the lowerencoded path
327 327 (the basename is everything after the last path separator). The filler
328 328 is as long as possible, filling in characters from the basename until
329 329 the encoded path has _maxstorepathlen characters (or all chars of the
330 330 basename have been taken).
331 331 The extension (e.g. '.i' or '.d') is preserved.
332 332
333 333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 334 encoding was used.
335 335 """
336 336 path = encodedir(path)
337 337 ef = _encodefname(path).split(b'/')
338 338 res = b'/'.join(_auxencode(ef, dotencode))
339 339 if len(res) > _maxstorepathlen:
340 340 res = _hashencode(path, dotencode)
341 341 return res
342 342
343 343
344 344 def _pathencode(path):
345 345 de = encodedir(path)
346 346 if len(path) > _maxstorepathlen:
347 347 return _hashencode(de, True)
348 348 ef = _encodefname(de).split(b'/')
349 349 res = b'/'.join(_auxencode(ef, True))
350 350 if len(res) > _maxstorepathlen:
351 351 return _hashencode(de, True)
352 352 return res
353 353
354 354
355 355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 356
357 357
358 358 def _plainhybridencode(f):
359 359 return _hybridencode(f, False)
360 360
361 361
362 362 def _calcmode(vfs):
363 363 try:
364 364 # files in .hg/ will be created using this mode
365 365 mode = vfs.stat().st_mode
366 366 # avoid some useless chmods
367 367 if (0o777 & ~util.umask) == (0o777 & mode):
368 368 mode = None
369 369 except OSError:
370 370 mode = None
371 371 return mode
372 372
373 373
374 374 _data = [
375 375 b'bookmarks',
376 376 b'narrowspec',
377 377 b'data',
378 378 b'meta',
379 379 b'00manifest.d',
380 380 b'00manifest.i',
381 381 b'00changelog.d',
382 382 b'00changelog.i',
383 383 b'phaseroots',
384 384 b'obsstore',
385 385 b'requires',
386 386 ]
387 387
388 388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 389 REVLOG_FILES_OTHER_EXT = (
390 390 b'.idx',
391 391 b'.d',
392 392 b'.dat',
393 393 b'.n',
394 394 b'.nd',
395 395 b'.sda',
396 396 )
397 397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 398 REVLOG_FILES_LONG_EXT = (
399 399 b'.nd',
400 400 b'.idx',
401 401 b'.dat',
402 402 b'.sda',
403 403 )
404 404 # files that are "volatile" and might change between listing and streaming
405 405 #
406 406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 407 # deleted.
408 408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 409
410 410 # some exception to the above matching
411 411 #
412 412 # XXX This is currently not in use because of issue6542
413 413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 414
415 415
416 416 def is_revlog(f, kind, st):
417 417 if kind != stat.S_IFREG:
418 418 return None
419 419 return revlog_type(f)
420 420
421 421
422 422 def revlog_type(f):
423 423 # XXX we need to filter `undo.` created by the transaction here, however
424 424 # being naive about it also filter revlog for `undo.*` files, leading to
425 425 # issue6542. So we no longer use EXCLUDED.
426 426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 427 return FILEFLAGS_REVLOG_MAIN
428 428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 429 t = FILETYPE_FILELOG_OTHER
430 430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 431 t |= FILEFLAGS_VOLATILE
432 432 return t
433 433 return None
434 434
435 435
436 436 # the file is part of changelog data
437 437 FILEFLAGS_CHANGELOG = 1 << 13
438 438 # the file is part of manifest data
439 439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 440 # the file is part of filelog data
441 441 FILEFLAGS_FILELOG = 1 << 11
442 442 # file that are not directly part of a revlog
443 443 FILEFLAGS_OTHER = 1 << 10
444 444
445 445 # the main entry point for a revlog
446 446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 447 # a secondary file for a revlog
448 448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 449
450 450 # files that are "volatile" and might change between listing and streaming
451 451 FILEFLAGS_VOLATILE = 1 << 20
452 452
453 453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 459 FILETYPE_OTHER = FILEFLAGS_OTHER
460 460
461 461
462 462 @attr.s(slots=True)
463 463 class StoreFile:
464 464 """a file matching a store entry"""
465 465
466 466 unencoded_path = attr.ib()
467 467 _file_size = attr.ib(default=None)
468 468 is_volatile = attr.ib(default=False)
469 469
470 470 def file_size(self, vfs):
471 471 if self._file_size is None:
472 472 if vfs is None:
473 473 msg = b"calling vfs-less file_size without prior call: %s"
474 474 msg %= self.unencoded_path
475 475 raise error.ProgrammingError(msg)
476 476 try:
477 477 self._file_size = vfs.stat(self.unencoded_path).st_size
478 478 except FileNotFoundError:
479 479 self._file_size = 0
480 480 return self._file_size
481 481
482 482 def get_stream(self, vfs, copies):
483 483 """return data "stream" information for this file
484 484
485 485 (unencoded_file_path, content_iterator, content_size)
486 486 """
487 487 size = self.file_size(None)
488 488
489 489 def get_stream():
490 490 actual_path = copies[vfs.join(self.unencoded_path)]
491 491 with open(actual_path, 'rb') as fp:
492 492 yield None # ready to stream
493 493 if size <= 65536:
494 494 yield fp.read(size)
495 495 else:
496 496 yield from util.filechunkiter(fp, limit=size)
497 497
498 498 s = get_stream()
499 499 next(s)
500 500 return (self.unencoded_path, s, size)
501 501
502 502
503 503 @attr.s(slots=True, init=False)
504 504 class BaseStoreEntry:
505 505 """An entry in the store
506 506
507 507 This is returned by `store.walk` and represent some data in the store."""
508 508
509 509 def files(self) -> List[StoreFile]:
510 510 raise NotImplementedError
511 511
512 512 def get_streams(
513 513 self,
514 514 repo=None,
515 515 vfs=None,
516 516 copies=None,
517 517 max_changeset=None,
518 518 ):
519 519 """return a list of data stream associated to files for this entry
520 520
521 521 return [(unencoded_file_path, content_iterator, content_size), …]
522 522 """
523 523 assert vfs is not None
524 524 return [f.get_stream(vfs, copies) for f in self.files()]
525 525
526 526
527 527 @attr.s(slots=True, init=False)
528 528 class SimpleStoreEntry(BaseStoreEntry):
529 529 """A generic entry in the store"""
530 530
531 531 is_revlog = False
532 532
533 533 _entry_path = attr.ib()
534 534 _is_volatile = attr.ib(default=False)
535 535 _file_size = attr.ib(default=None)
536 536 _files = attr.ib(default=None)
537 537
538 538 def __init__(
539 539 self,
540 540 entry_path,
541 541 is_volatile=False,
542 542 file_size=None,
543 543 ):
544 544 super().__init__()
545 545 self._entry_path = entry_path
546 546 self._is_volatile = is_volatile
547 547 self._file_size = file_size
548 548 self._files = None
549 549
550 550 def files(self) -> List[StoreFile]:
551 551 if self._files is None:
552 552 self._files = [
553 553 StoreFile(
554 554 unencoded_path=self._entry_path,
555 555 file_size=self._file_size,
556 556 is_volatile=self._is_volatile,
557 557 )
558 558 ]
559 559 return self._files
560 560
561 561
562 562 @attr.s(slots=True, init=False)
563 563 class RevlogStoreEntry(BaseStoreEntry):
564 564 """A revlog entry in the store"""
565 565
566 566 is_revlog = True
567 567
568 568 revlog_type = attr.ib(default=None)
569 569 target_id = attr.ib(default=None)
570 570 _path_prefix = attr.ib(default=None)
571 571 _details = attr.ib(default=None)
572 572 _files = attr.ib(default=None)
573 573
574 574 def __init__(
575 575 self,
576 576 revlog_type,
577 577 path_prefix,
578 578 target_id,
579 579 details,
580 580 ):
581 581 super().__init__()
582 582 self.revlog_type = revlog_type
583 583 self.target_id = target_id
584 584 self._path_prefix = path_prefix
585 585 assert b'.i' in details, (path_prefix, details)
586 586 self._details = details
587 587 self._files = None
588 588
589 589 @property
590 590 def is_changelog(self):
591 591 return self.revlog_type & FILEFLAGS_CHANGELOG
592 592
593 593 @property
594 594 def is_manifestlog(self):
595 595 return self.revlog_type & FILEFLAGS_MANIFESTLOG
596 596
597 597 @property
598 598 def is_filelog(self):
599 599 return self.revlog_type & FILEFLAGS_FILELOG
600 600
601 601 def main_file_path(self):
602 602 """unencoded path of the main revlog file"""
603 603 return self._path_prefix + b'.i'
604 604
605 605 def files(self) -> List[StoreFile]:
606 606 if self._files is None:
607 607 self._files = []
608 608 for ext in sorted(self._details, key=_ext_key):
609 609 path = self._path_prefix + ext
610 610 data = self._details[ext]
611 self._files.append(StoreFile(unencoded_path=path, **data))
611 # files that are "volatile" and might change between
612 # listing and streaming
613 #
614 # note: the ".nd" file are nodemap data and won't "change"
615 # but they might be deleted.
616 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
617 f = StoreFile(unencoded_path=path, is_volatile=volatile, **data)
618 self._files.append(f)
612 619 return self._files
613 620
614 621 def get_streams(
615 622 self,
616 623 repo=None,
617 624 vfs=None,
618 625 copies=None,
619 626 max_changeset=None,
620 627 ):
621 628 if repo is None or max_changeset is None:
622 629 return super().get_streams(
623 630 repo=repo,
624 631 vfs=vfs,
625 632 copies=copies,
626 633 max_changeset=max_changeset,
627 634 )
628 635 if any(k.endswith(b'.idx') for k in self._details.keys()):
629 636 # This use revlog-v2, ignore for now
630 637 return super().get_streams(
631 638 repo=repo,
632 639 vfs=vfs,
633 640 copies=copies,
634 641 max_changeset=max_changeset,
635 642 )
636 643 name_to_ext = {}
637 644 for ext in self._details.keys():
638 645 name_to_ext[self._path_prefix + ext] = ext
639 646 name_to_size = {}
640 647 for f in self.files():
641 648 name_to_size[f.unencoded_path] = f.file_size(None)
642 649 stream = [
643 650 f.get_stream(vfs, copies)
644 651 for f in self.files()
645 652 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
646 653 ]
647 654
648 655 is_inline = b'.d' not in self._details
649 656
650 657 rl = self.get_revlog_instance(repo).get_revlog()
651 658 rl_stream = rl.get_streams(max_changeset, force_inline=is_inline)
652 659
653 660 for name, s, size in rl_stream:
654 661 if name_to_size.get(name, 0) != size:
655 662 msg = _(b"expected %d bytes but %d provided for %s")
656 663 msg %= name_to_size.get(name, 0), size, name
657 664 raise error.Abort(msg)
658 665 stream.extend(rl_stream)
659 666 files = self.files()
660 667 assert len(stream) == len(files), (
661 668 stream,
662 669 files,
663 670 self._path_prefix,
664 671 self.target_id,
665 672 )
666 673 return stream
667 674
668 675 def get_revlog_instance(self, repo):
669 676 """Obtain a revlog instance from this store entry
670 677
671 678 An instance of the appropriate class is returned.
672 679 """
673 680 if self.is_changelog:
674 681 return changelog.changelog(repo.svfs)
675 682 elif self.is_manifestlog:
676 683 mandir = self.target_id
677 684 return manifest.manifestrevlog(
678 685 repo.nodeconstants, repo.svfs, tree=mandir
679 686 )
680 687 else:
681 688 return filelog.filelog(repo.svfs, self.target_id)
682 689
683 690
684 691 def _gather_revlog(files_data):
685 692 """group files per revlog prefix
686 693
687 694 The returns a two level nested dict. The top level key is the revlog prefix
688 695 without extension, the second level is all the file "suffix" that were
689 696 seen for this revlog and arbitrary file data as value.
690 697 """
691 698 revlogs = collections.defaultdict(dict)
692 699 for u, value in files_data:
693 700 name, ext = _split_revlog_ext(u)
694 701 revlogs[name][ext] = value
695 702 return sorted(revlogs.items())
696 703
697 704
698 705 def _split_revlog_ext(filename):
699 706 """split the revlog file prefix from the variable extension"""
700 707 if filename.endswith(REVLOG_FILES_LONG_EXT):
701 708 char = b'-'
702 709 else:
703 710 char = b'.'
704 711 idx = filename.rfind(char)
705 712 return filename[:idx], filename[idx:]
706 713
707 714
708 715 def _ext_key(ext):
709 716 """a key to order revlog suffix
710 717
711 718 important to issue .i after other entry."""
712 719 # the only important part of this order is to keep the `.i` last.
713 720 if ext.endswith(b'.n'):
714 721 return (0, ext)
715 722 elif ext.endswith(b'.nd'):
716 723 return (10, ext)
717 724 elif ext.endswith(b'.d'):
718 725 return (20, ext)
719 726 elif ext.endswith(b'.i'):
720 727 return (50, ext)
721 728 else:
722 729 return (40, ext)
723 730
724 731
725 732 class basicstore:
726 733 '''base class for local repository stores'''
727 734
728 735 def __init__(self, path, vfstype):
729 736 vfs = vfstype(path)
730 737 self.path = vfs.base
731 738 self.createmode = _calcmode(vfs)
732 739 vfs.createmode = self.createmode
733 740 self.rawvfs = vfs
734 741 self.vfs = vfsmod.filtervfs(vfs, encodedir)
735 742 self.opener = self.vfs
736 743
737 744 def join(self, f):
738 745 return self.path + b'/' + encodedir(f)
739 746
740 747 def _walk(self, relpath, recurse, undecodable=None):
741 748 '''yields (revlog_type, unencoded, size)'''
742 749 path = self.path
743 750 if relpath:
744 751 path += b'/' + relpath
745 752 striplen = len(self.path) + 1
746 753 l = []
747 754 if self.rawvfs.isdir(path):
748 755 visit = [path]
749 756 readdir = self.rawvfs.readdir
750 757 while visit:
751 758 p = visit.pop()
752 759 for f, kind, st in readdir(p, stat=True):
753 760 fp = p + b'/' + f
754 761 rl_type = is_revlog(f, kind, st)
755 762 if rl_type is not None:
756 763 n = util.pconvert(fp[striplen:])
757 764 l.append((decodedir(n), (rl_type, st.st_size)))
758 765 elif kind == stat.S_IFDIR and recurse:
759 766 visit.append(fp)
760 767
761 768 l.sort()
762 769 return l
763 770
764 771 def changelog(self, trypending, concurrencychecker=None):
765 772 return changelog.changelog(
766 773 self.vfs,
767 774 trypending=trypending,
768 775 concurrencychecker=concurrencychecker,
769 776 )
770 777
771 778 def manifestlog(self, repo, storenarrowmatch):
772 779 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
773 780 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
774 781
775 782 def data_entries(
776 783 self, matcher=None, undecodable=None
777 784 ) -> Generator[BaseStoreEntry, None, None]:
778 785 """Like walk, but excluding the changelog and root manifest.
779 786
780 787 When [undecodable] is None, revlogs names that can't be
781 788 decoded cause an exception. When it is provided, it should
782 789 be a list and the filenames that can't be decoded are added
783 790 to it instead. This is very rarely needed."""
784 791 dirs = [
785 792 (b'data', FILEFLAGS_FILELOG, False),
786 793 (b'meta', FILEFLAGS_MANIFESTLOG, True),
787 794 ]
788 795 for base_dir, rl_type, strip_filename in dirs:
789 796 files = self._walk(base_dir, True, undecodable=undecodable)
790 797 files = (f for f in files if f[1][0] is not None)
791 798 for revlog, details in _gather_revlog(files):
792 799 file_details = {}
793 800 revlog_target_id = revlog.split(b'/', 1)[1]
794 801 if strip_filename and b'/' in revlog:
795 802 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
796 803 revlog_target_id += b'/'
797 804 for ext, (t, s) in sorted(details.items()):
798 805 file_details[ext] = {
799 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
800 806 'file_size': s,
801 807 }
802 808 yield RevlogStoreEntry(
803 809 path_prefix=revlog,
804 810 revlog_type=rl_type,
805 811 target_id=revlog_target_id,
806 812 details=file_details,
807 813 )
808 814
809 815 def top_entries(
810 816 self, phase=False, obsolescence=False
811 817 ) -> Generator[BaseStoreEntry, None, None]:
812 818 if phase and self.vfs.exists(b'phaseroots'):
813 819 yield SimpleStoreEntry(
814 820 entry_path=b'phaseroots',
815 821 is_volatile=True,
816 822 )
817 823
818 824 if obsolescence and self.vfs.exists(b'obsstore'):
819 825 # XXX if we had the file size it could be non-volatile
820 826 yield SimpleStoreEntry(
821 827 entry_path=b'obsstore',
822 828 is_volatile=True,
823 829 )
824 830
825 831 files = reversed(self._walk(b'', False))
826 832
827 833 changelogs = collections.defaultdict(dict)
828 834 manifestlogs = collections.defaultdict(dict)
829 835
830 836 for u, (t, s) in files:
831 837 if u.startswith(b'00changelog'):
832 838 name, ext = _split_revlog_ext(u)
833 839 changelogs[name][ext] = (t, s)
834 840 elif u.startswith(b'00manifest'):
835 841 name, ext = _split_revlog_ext(u)
836 842 manifestlogs[name][ext] = (t, s)
837 843 else:
838 844 yield SimpleStoreEntry(
839 845 entry_path=u,
840 846 is_volatile=bool(t & FILEFLAGS_VOLATILE),
841 847 file_size=s,
842 848 )
843 849 # yield manifest before changelog
844 850 top_rl = [
845 851 (manifestlogs, FILEFLAGS_MANIFESTLOG),
846 852 (changelogs, FILEFLAGS_CHANGELOG),
847 853 ]
848 854 assert len(manifestlogs) <= 1
849 855 assert len(changelogs) <= 1
850 856 for data, revlog_type in top_rl:
851 857 for revlog, details in sorted(data.items()):
852 858 file_details = {}
853 859 for ext, (t, s) in details.items():
854 860 file_details[ext] = {
855 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
856 861 'file_size': s,
857 862 }
858 863 yield RevlogStoreEntry(
859 864 path_prefix=revlog,
860 865 revlog_type=revlog_type,
861 866 target_id=b'',
862 867 details=file_details,
863 868 )
864 869
865 870 def walk(
866 871 self, matcher=None, phase=False, obsolescence=False
867 872 ) -> Generator[BaseStoreEntry, None, None]:
868 873 """return files related to data storage (ie: revlogs)
869 874
870 875 yields instance from BaseStoreEntry subclasses
871 876
872 877 if a matcher is passed, storage files of only those tracked paths
873 878 are passed with matches the matcher
874 879 """
875 880 # yield data files first
876 881 for x in self.data_entries(matcher):
877 882 yield x
878 883 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
879 884 yield x
880 885
881 886 def copylist(self):
882 887 return _data
883 888
884 889 def write(self, tr):
885 890 pass
886 891
887 892 def invalidatecaches(self):
888 893 pass
889 894
890 895 def markremoved(self, fn):
891 896 pass
892 897
893 898 def __contains__(self, path):
894 899 '''Checks if the store contains path'''
895 900 path = b"/".join((b"data", path))
896 901 # file?
897 902 if self.vfs.exists(path + b".i"):
898 903 return True
899 904 # dir?
900 905 if not path.endswith(b"/"):
901 906 path = path + b"/"
902 907 return self.vfs.exists(path)
903 908
904 909
905 910 class encodedstore(basicstore):
906 911 def __init__(self, path, vfstype):
907 912 vfs = vfstype(path + b'/store')
908 913 self.path = vfs.base
909 914 self.createmode = _calcmode(vfs)
910 915 vfs.createmode = self.createmode
911 916 self.rawvfs = vfs
912 917 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
913 918 self.opener = self.vfs
914 919
915 920 def _walk(self, relpath, recurse, undecodable=None):
916 921 old = super()._walk(relpath, recurse)
917 922 new = []
918 923 for f1, value in old:
919 924 try:
920 925 f2 = decodefilename(f1)
921 926 except KeyError:
922 927 if undecodable is None:
923 928 msg = _(b'undecodable revlog name %s') % f1
924 929 raise error.StorageError(msg)
925 930 else:
926 931 undecodable.append(f1)
927 932 continue
928 933 new.append((f2, value))
929 934 return new
930 935
931 936 def data_entries(
932 937 self, matcher=None, undecodable=None
933 938 ) -> Generator[BaseStoreEntry, None, None]:
934 939 entries = super(encodedstore, self).data_entries(
935 940 undecodable=undecodable
936 941 )
937 942 for entry in entries:
938 943 if _match_tracked_entry(entry, matcher):
939 944 yield entry
940 945
941 946 def join(self, f):
942 947 return self.path + b'/' + encodefilename(f)
943 948
944 949 def copylist(self):
945 950 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
946 951
947 952
948 953 class fncache:
949 954 # the filename used to be partially encoded
950 955 # hence the encodedir/decodedir dance
951 956 def __init__(self, vfs):
952 957 self.vfs = vfs
953 958 self._ignores = set()
954 959 self.entries = None
955 960 self._dirty = False
956 961 # set of new additions to fncache
957 962 self.addls = set()
958 963
959 964 def ensureloaded(self, warn=None):
960 965 """read the fncache file if not already read.
961 966
962 967 If the file on disk is corrupted, raise. If warn is provided,
963 968 warn and keep going instead."""
964 969 if self.entries is None:
965 970 self._load(warn)
966 971
967 972 def _load(self, warn=None):
968 973 '''fill the entries from the fncache file'''
969 974 self._dirty = False
970 975 try:
971 976 fp = self.vfs(b'fncache', mode=b'rb')
972 977 except IOError:
973 978 # skip nonexistent file
974 979 self.entries = set()
975 980 return
976 981
977 982 self.entries = set()
978 983 chunk = b''
979 984 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
980 985 chunk += c
981 986 try:
982 987 p = chunk.rindex(b'\n')
983 988 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
984 989 chunk = chunk[p + 1 :]
985 990 except ValueError:
986 991 # substring '\n' not found, maybe the entry is bigger than the
987 992 # chunksize, so let's keep iterating
988 993 pass
989 994
990 995 if chunk:
991 996 msg = _(b"fncache does not ends with a newline")
992 997 if warn:
993 998 warn(msg + b'\n')
994 999 else:
995 1000 raise error.Abort(
996 1001 msg,
997 1002 hint=_(
998 1003 b"use 'hg debugrebuildfncache' to "
999 1004 b"rebuild the fncache"
1000 1005 ),
1001 1006 )
1002 1007 self._checkentries(fp, warn)
1003 1008 fp.close()
1004 1009
1005 1010 def _checkentries(self, fp, warn):
1006 1011 """make sure there is no empty string in entries"""
1007 1012 if b'' in self.entries:
1008 1013 fp.seek(0)
1009 1014 for n, line in enumerate(fp):
1010 1015 if not line.rstrip(b'\n'):
1011 1016 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1012 1017 if warn:
1013 1018 warn(t + b'\n')
1014 1019 else:
1015 1020 raise error.Abort(t)
1016 1021
1017 1022 def write(self, tr):
1018 1023 if self._dirty:
1019 1024 assert self.entries is not None
1020 1025 self.entries = self.entries | self.addls
1021 1026 self.addls = set()
1022 1027 tr.addbackup(b'fncache')
1023 1028 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1024 1029 if self.entries:
1025 1030 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1026 1031 fp.close()
1027 1032 self._dirty = False
1028 1033 if self.addls:
1029 1034 # if we have just new entries, let's append them to the fncache
1030 1035 tr.addbackup(b'fncache')
1031 1036 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1032 1037 if self.addls:
1033 1038 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1034 1039 fp.close()
1035 1040 self.entries = None
1036 1041 self.addls = set()
1037 1042
1038 1043 def addignore(self, fn):
1039 1044 self._ignores.add(fn)
1040 1045
1041 1046 def add(self, fn):
1042 1047 if fn in self._ignores:
1043 1048 return
1044 1049 if self.entries is None:
1045 1050 self._load()
1046 1051 if fn not in self.entries:
1047 1052 self.addls.add(fn)
1048 1053
1049 1054 def remove(self, fn):
1050 1055 if self.entries is None:
1051 1056 self._load()
1052 1057 if fn in self.addls:
1053 1058 self.addls.remove(fn)
1054 1059 return
1055 1060 try:
1056 1061 self.entries.remove(fn)
1057 1062 self._dirty = True
1058 1063 except KeyError:
1059 1064 pass
1060 1065
1061 1066 def __contains__(self, fn):
1062 1067 if fn in self.addls:
1063 1068 return True
1064 1069 if self.entries is None:
1065 1070 self._load()
1066 1071 return fn in self.entries
1067 1072
1068 1073 def __iter__(self):
1069 1074 if self.entries is None:
1070 1075 self._load()
1071 1076 return iter(self.entries | self.addls)
1072 1077
1073 1078
1074 1079 class _fncachevfs(vfsmod.proxyvfs):
1075 1080 def __init__(self, vfs, fnc, encode):
1076 1081 vfsmod.proxyvfs.__init__(self, vfs)
1077 1082 self.fncache = fnc
1078 1083 self.encode = encode
1079 1084
1080 1085 def __call__(self, path, mode=b'r', *args, **kw):
1081 1086 encoded = self.encode(path)
1082 1087 if (
1083 1088 mode not in (b'r', b'rb')
1084 1089 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1085 1090 and revlog_type(path) is not None
1086 1091 ):
1087 1092 # do not trigger a fncache load when adding a file that already is
1088 1093 # known to exist.
1089 1094 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1090 1095 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1091 1096 # when appending to an existing file, if the file has size zero,
1092 1097 # it should be considered as missing. Such zero-size files are
1093 1098 # the result of truncation when a transaction is aborted.
1094 1099 notload = False
1095 1100 if not notload:
1096 1101 self.fncache.add(path)
1097 1102 return self.vfs(encoded, mode, *args, **kw)
1098 1103
1099 1104 def join(self, path):
1100 1105 if path:
1101 1106 return self.vfs.join(self.encode(path))
1102 1107 else:
1103 1108 return self.vfs.join(path)
1104 1109
1105 1110 def register_file(self, path):
1106 1111 """generic hook point to lets fncache steer its stew"""
1107 1112 if path.startswith(b'data/') or path.startswith(b'meta/'):
1108 1113 self.fncache.add(path)
1109 1114
1110 1115
1111 1116 class fncachestore(basicstore):
1112 1117 def __init__(self, path, vfstype, dotencode):
1113 1118 if dotencode:
1114 1119 encode = _pathencode
1115 1120 else:
1116 1121 encode = _plainhybridencode
1117 1122 self.encode = encode
1118 1123 vfs = vfstype(path + b'/store')
1119 1124 self.path = vfs.base
1120 1125 self.pathsep = self.path + b'/'
1121 1126 self.createmode = _calcmode(vfs)
1122 1127 vfs.createmode = self.createmode
1123 1128 self.rawvfs = vfs
1124 1129 fnc = fncache(vfs)
1125 1130 self.fncache = fnc
1126 1131 self.vfs = _fncachevfs(vfs, fnc, encode)
1127 1132 self.opener = self.vfs
1128 1133
1129 1134 def join(self, f):
1130 1135 return self.pathsep + self.encode(f)
1131 1136
1132 1137 def getsize(self, path):
1133 1138 return self.rawvfs.stat(path).st_size
1134 1139
1135 1140 def data_entries(
1136 1141 self, matcher=None, undecodable=None
1137 1142 ) -> Generator[BaseStoreEntry, None, None]:
1138 1143 files = ((f, revlog_type(f)) for f in self.fncache)
1139 1144 # Note: all files in fncache should be revlog related, However the
1140 1145 # fncache might contains such file added by previous version of
1141 1146 # Mercurial.
1142 1147 files = (f for f in files if f[1] is not None)
1143 1148 by_revlog = _gather_revlog(files)
1144 1149 for revlog, details in by_revlog:
1145 1150 file_details = {}
1146 1151 if revlog.startswith(b'data/'):
1147 1152 rl_type = FILEFLAGS_FILELOG
1148 1153 revlog_target_id = revlog.split(b'/', 1)[1]
1149 1154 elif revlog.startswith(b'meta/'):
1150 1155 rl_type = FILEFLAGS_MANIFESTLOG
1151 1156 # drop the initial directory and the `00manifest` file part
1152 1157 tmp = revlog.split(b'/', 1)[1]
1153 1158 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1154 1159 else:
1155 1160 # unreachable
1156 1161 assert False, revlog
1157 1162 for ext, t in details.items():
1158 file_details[ext] = {
1159 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1160 }
1163 file_details[ext] = {}
1161 1164 entry = RevlogStoreEntry(
1162 1165 path_prefix=revlog,
1163 1166 revlog_type=rl_type,
1164 1167 target_id=revlog_target_id,
1165 1168 details=file_details,
1166 1169 )
1167 1170 if _match_tracked_entry(entry, matcher):
1168 1171 yield entry
1169 1172
1170 1173 def copylist(self):
1171 1174 d = (
1172 1175 b'bookmarks',
1173 1176 b'narrowspec',
1174 1177 b'data',
1175 1178 b'meta',
1176 1179 b'dh',
1177 1180 b'fncache',
1178 1181 b'phaseroots',
1179 1182 b'obsstore',
1180 1183 b'00manifest.d',
1181 1184 b'00manifest.i',
1182 1185 b'00changelog.d',
1183 1186 b'00changelog.i',
1184 1187 b'requires',
1185 1188 )
1186 1189 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1187 1190
1188 1191 def write(self, tr):
1189 1192 self.fncache.write(tr)
1190 1193
1191 1194 def invalidatecaches(self):
1192 1195 self.fncache.entries = None
1193 1196 self.fncache.addls = set()
1194 1197
1195 1198 def markremoved(self, fn):
1196 1199 self.fncache.remove(fn)
1197 1200
1198 1201 def _exists(self, f):
1199 1202 ef = self.encode(f)
1200 1203 try:
1201 1204 self.getsize(ef)
1202 1205 return True
1203 1206 except FileNotFoundError:
1204 1207 return False
1205 1208
1206 1209 def __contains__(self, path):
1207 1210 '''Checks if the store contains path'''
1208 1211 path = b"/".join((b"data", path))
1209 1212 # check for files (exact match)
1210 1213 e = path + b'.i'
1211 1214 if e in self.fncache and self._exists(e):
1212 1215 return True
1213 1216 # now check for directories (prefix match)
1214 1217 if not path.endswith(b'/'):
1215 1218 path += b'/'
1216 1219 for e in self.fncache:
1217 1220 if e.startswith(path) and self._exists(e):
1218 1221 return True
1219 1222 return False
General Comments 0
You need to be logged in to leave comments. Login now