##// END OF EJS Templates
store: fix a signature mismatch for a vfs subclass...
Matt Harbison -
r52775:2391a5fa default
parent child Browse files
Show More
@@ -1,1252 +1,1258
1 1 # store.py - repository store handling for Mercurial)
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import collections
11 11 import functools
12 12 import os
13 13 import re
14 14 import stat
15 15 import typing
16 16
17 from typing import Generator, List
17 from typing import (
18 Generator,
19 List,
20 Optional,
21 )
18 22
19 23 from .i18n import _
20 24 from .thirdparty import attr
21 25
22 26 # Force pytype to use the non-vendored package
23 27 if typing.TYPE_CHECKING:
24 28 # noinspection PyPackageRequirements
25 29 import attr
26 30
27 31 from .node import hex
28 32 from .revlogutils.constants import (
29 33 INDEX_HEADER,
30 34 KIND_CHANGELOG,
31 35 KIND_FILELOG,
32 36 KIND_MANIFESTLOG,
33 37 )
34 38 from . import (
35 39 changelog,
36 40 error,
37 41 filelog,
38 42 manifest,
39 43 policy,
40 44 pycompat,
41 45 revlog as revlogmod,
42 46 util,
43 47 vfs as vfsmod,
44 48 )
45 49 from .utils import hashutil
46 50
47 51 parsers = policy.importmod('parsers')
48 52 # how much bytes should be read from fncache in one read
49 53 # It is done to prevent loading large fncache files into memory
50 54 fncache_chunksize = 10**6
51 55
52 56
53 57 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
54 58 """parses a fncache entry and returns whether the entry is tracking a path
55 59 matched by matcher or not.
56 60
57 61 If matcher is None, returns True"""
58 62
59 63 if matcher is None:
60 64 return True
61 65
62 66 # TODO: make this safe for other entry types. Currently, the various
63 67 # store.data_entry generators only yield RevlogStoreEntry, so the
64 68 # attributes do exist on `entry`.
65 69 # pytype: disable=attribute-error
66 70 if entry.is_filelog:
67 71 return matcher(entry.target_id)
68 72 elif entry.is_manifestlog:
69 73 return matcher.visitdir(entry.target_id.rstrip(b'/'))
70 74 # pytype: enable=attribute-error
71 75 raise error.ProgrammingError(b"cannot process entry %r" % entry)
72 76
73 77
74 78 # This avoids a collision between a file named foo and a dir named
75 79 # foo.i or foo.d
76 80 def _encodedir(path):
77 81 """
78 82 >>> _encodedir(b'data/foo.i')
79 83 'data/foo.i'
80 84 >>> _encodedir(b'data/foo.i/bla.i')
81 85 'data/foo.i.hg/bla.i'
82 86 >>> _encodedir(b'data/foo.i.hg/bla.i')
83 87 'data/foo.i.hg.hg/bla.i'
84 88 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
85 89 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
86 90 """
87 91 return (
88 92 path.replace(b".hg/", b".hg.hg/")
89 93 .replace(b".i/", b".i.hg/")
90 94 .replace(b".d/", b".d.hg/")
91 95 )
92 96
93 97
94 98 encodedir = getattr(parsers, 'encodedir', _encodedir)
95 99
96 100
97 101 def decodedir(path):
98 102 """
99 103 >>> decodedir(b'data/foo.i')
100 104 'data/foo.i'
101 105 >>> decodedir(b'data/foo.i.hg/bla.i')
102 106 'data/foo.i/bla.i'
103 107 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
104 108 'data/foo.i.hg/bla.i'
105 109 """
106 110 if b".hg/" not in path:
107 111 return path
108 112 return (
109 113 path.replace(b".d.hg/", b".d/")
110 114 .replace(b".i.hg/", b".i/")
111 115 .replace(b".hg.hg/", b".hg/")
112 116 )
113 117
114 118
115 119 def _reserved():
116 120 """characters that are problematic for filesystems
117 121
118 122 * ascii escapes (0..31)
119 123 * ascii hi (126..255)
120 124 * windows specials
121 125
122 126 these characters will be escaped by encodefunctions
123 127 """
124 128 winreserved = [ord(x) for x in u'\\:*?"<>|']
125 129 for x in range(32):
126 130 yield x
127 131 for x in range(126, 256):
128 132 yield x
129 133 for x in winreserved:
130 134 yield x
131 135
132 136
133 137 def _buildencodefun():
134 138 """
135 139 >>> enc, dec = _buildencodefun()
136 140
137 141 >>> enc(b'nothing/special.txt')
138 142 'nothing/special.txt'
139 143 >>> dec(b'nothing/special.txt')
140 144 'nothing/special.txt'
141 145
142 146 >>> enc(b'HELLO')
143 147 '_h_e_l_l_o'
144 148 >>> dec(b'_h_e_l_l_o')
145 149 'HELLO'
146 150
147 151 >>> enc(b'hello:world?')
148 152 'hello~3aworld~3f'
149 153 >>> dec(b'hello~3aworld~3f')
150 154 'hello:world?'
151 155
152 156 >>> enc(b'the\\x07quick\\xADshot')
153 157 'the~07quick~adshot'
154 158 >>> dec(b'the~07quick~adshot')
155 159 'the\\x07quick\\xadshot'
156 160 """
157 161 e = b'_'
158 162 xchr = pycompat.bytechr
159 163 asciistr = list(map(xchr, range(127)))
160 164 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
161 165
162 166 cmap = {x: x for x in asciistr}
163 167 for x in _reserved():
164 168 cmap[xchr(x)] = b"~%02x" % x
165 169 for x in capitals + [ord(e)]:
166 170 cmap[xchr(x)] = e + xchr(x).lower()
167 171
168 172 dmap = {}
169 173 for k, v in cmap.items():
170 174 dmap[v] = k
171 175
172 176 def decode(s):
173 177 i = 0
174 178 while i < len(s):
175 179 for l in range(1, 4):
176 180 try:
177 181 yield dmap[s[i : i + l]]
178 182 i += l
179 183 break
180 184 except KeyError:
181 185 pass
182 186 else:
183 187 raise KeyError
184 188
185 189 return (
186 190 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
187 191 lambda s: b''.join(list(decode(s))),
188 192 )
189 193
190 194
191 195 _encodefname, _decodefname = _buildencodefun()
192 196
193 197
194 198 def encodefilename(s):
195 199 """
196 200 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
197 201 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
198 202 """
199 203 return _encodefname(encodedir(s))
200 204
201 205
202 206 def decodefilename(s):
203 207 """
204 208 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
205 209 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
206 210 """
207 211 return decodedir(_decodefname(s))
208 212
209 213
210 214 def _buildlowerencodefun():
211 215 """
212 216 >>> f = _buildlowerencodefun()
213 217 >>> f(b'nothing/special.txt')
214 218 'nothing/special.txt'
215 219 >>> f(b'HELLO')
216 220 'hello'
217 221 >>> f(b'hello:world?')
218 222 'hello~3aworld~3f'
219 223 >>> f(b'the\\x07quick\\xADshot')
220 224 'the~07quick~adshot'
221 225 """
222 226 xchr = pycompat.bytechr
223 227 cmap = {xchr(x): xchr(x) for x in range(127)}
224 228 for x in _reserved():
225 229 cmap[xchr(x)] = b"~%02x" % x
226 230 for x in range(ord(b"A"), ord(b"Z") + 1):
227 231 cmap[xchr(x)] = xchr(x).lower()
228 232
229 233 def lowerencode(s):
230 234 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
231 235
232 236 return lowerencode
233 237
234 238
235 239 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
236 240
237 241 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
238 242 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
239 243 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
240 244
241 245
242 246 def _auxencode(path, dotencode):
243 247 """
244 248 Encodes filenames containing names reserved by Windows or which end in
245 249 period or space. Does not touch other single reserved characters c.
246 250 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
247 251 Additionally encodes space or period at the beginning, if dotencode is
248 252 True. Parameter path is assumed to be all lowercase.
249 253 A segment only needs encoding if a reserved name appears as a
250 254 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
251 255 doesn't need encoding.
252 256
253 257 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
254 258 >>> _auxencode(s.split(b'/'), True)
255 259 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
256 260 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
257 261 >>> _auxencode(s.split(b'/'), False)
258 262 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
259 263 >>> _auxencode([b'foo. '], True)
260 264 ['foo.~20']
261 265 >>> _auxencode([b' .foo'], True)
262 266 ['~20.foo']
263 267 """
264 268 for i, n in enumerate(path):
265 269 if not n:
266 270 continue
267 271 if dotencode and n[0] in b'. ':
268 272 n = b"~%02x" % ord(n[0:1]) + n[1:]
269 273 path[i] = n
270 274 else:
271 275 l = n.find(b'.')
272 276 if l == -1:
273 277 l = len(n)
274 278 if (l == 3 and n[:3] in _winres3) or (
275 279 l == 4
276 280 and n[3:4] <= b'9'
277 281 and n[3:4] >= b'1'
278 282 and n[:3] in _winres4
279 283 ):
280 284 # encode third letter ('aux' -> 'au~78')
281 285 ec = b"~%02x" % ord(n[2:3])
282 286 n = n[0:2] + ec + n[3:]
283 287 path[i] = n
284 288 if n[-1] in b'. ':
285 289 # encode last period or space ('foo...' -> 'foo..~2e')
286 290 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
287 291 return path
288 292
289 293
290 294 _maxstorepathlen = 120
291 295 _dirprefixlen = 8
292 296 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
293 297
294 298
295 299 def _hashencode(path, dotencode):
296 300 digest = hex(hashutil.sha1(path).digest())
297 301 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
298 302 parts = _auxencode(le, dotencode)
299 303 basename = parts[-1]
300 304 _root, ext = os.path.splitext(basename)
301 305 sdirs = []
302 306 sdirslen = 0
303 307 for p in parts[:-1]:
304 308 d = p[:_dirprefixlen]
305 309 if d[-1] in b'. ':
306 310 # Windows can't access dirs ending in period or space
307 311 d = d[:-1] + b'_'
308 312 if sdirslen == 0:
309 313 t = len(d)
310 314 else:
311 315 t = sdirslen + 1 + len(d)
312 316 if t > _maxshortdirslen:
313 317 break
314 318 sdirs.append(d)
315 319 sdirslen = t
316 320 dirs = b'/'.join(sdirs)
317 321 if len(dirs) > 0:
318 322 dirs += b'/'
319 323 res = b'dh/' + dirs + digest + ext
320 324 spaceleft = _maxstorepathlen - len(res)
321 325 if spaceleft > 0:
322 326 filler = basename[:spaceleft]
323 327 res = b'dh/' + dirs + filler + digest + ext
324 328 return res
325 329
326 330
327 331 def _hybridencode(path, dotencode):
328 332 """encodes path with a length limit
329 333
330 334 Encodes all paths that begin with 'data/', according to the following.
331 335
332 336 Default encoding (reversible):
333 337
334 338 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
335 339 characters are encoded as '~xx', where xx is the two digit hex code
336 340 of the character (see encodefilename).
337 341 Relevant path components consisting of Windows reserved filenames are
338 342 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
339 343
340 344 Hashed encoding (not reversible):
341 345
342 346 If the default-encoded path is longer than _maxstorepathlen, a
343 347 non-reversible hybrid hashing of the path is done instead.
344 348 This encoding uses up to _dirprefixlen characters of all directory
345 349 levels of the lowerencoded path, but not more levels than can fit into
346 350 _maxshortdirslen.
347 351 Then follows the filler followed by the sha digest of the full path.
348 352 The filler is the beginning of the basename of the lowerencoded path
349 353 (the basename is everything after the last path separator). The filler
350 354 is as long as possible, filling in characters from the basename until
351 355 the encoded path has _maxstorepathlen characters (or all chars of the
352 356 basename have been taken).
353 357 The extension (e.g. '.i' or '.d') is preserved.
354 358
355 359 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
356 360 encoding was used.
357 361 """
358 362 path = encodedir(path)
359 363 ef = _encodefname(path).split(b'/')
360 364 res = b'/'.join(_auxencode(ef, dotencode))
361 365 if len(res) > _maxstorepathlen:
362 366 res = _hashencode(path, dotencode)
363 367 return res
364 368
365 369
366 370 def _pathencode(path):
367 371 de = encodedir(path)
368 372 if len(path) > _maxstorepathlen:
369 373 return _hashencode(de, True)
370 374 ef = _encodefname(de).split(b'/')
371 375 res = b'/'.join(_auxencode(ef, True))
372 376 if len(res) > _maxstorepathlen:
373 377 return _hashencode(de, True)
374 378 return res
375 379
376 380
377 381 _pathencode = getattr(parsers, 'pathencode', _pathencode)
378 382
379 383
380 384 def _plainhybridencode(f):
381 385 return _hybridencode(f, False)
382 386
383 387
384 388 def _calcmode(vfs):
385 389 try:
386 390 # files in .hg/ will be created using this mode
387 391 mode = vfs.stat().st_mode
388 392 # avoid some useless chmods
389 393 if (0o777 & ~util.umask) == (0o777 & mode):
390 394 mode = None
391 395 except OSError:
392 396 mode = None
393 397 return mode
394 398
395 399
396 400 _data = [
397 401 b'bookmarks',
398 402 b'narrowspec',
399 403 b'data',
400 404 b'meta',
401 405 b'00manifest.d',
402 406 b'00manifest.i',
403 407 b'00changelog.d',
404 408 b'00changelog.i',
405 409 b'phaseroots',
406 410 b'obsstore',
407 411 b'requires',
408 412 ]
409 413
410 414 REVLOG_FILES_EXT = (
411 415 b'.i',
412 416 b'.idx',
413 417 b'.d',
414 418 b'.dat',
415 419 b'.n',
416 420 b'.nd',
417 421 b'.sda',
418 422 )
419 423 # file extension that also use a `-SOMELONGIDHASH.ext` form
420 424 REVLOG_FILES_LONG_EXT = (
421 425 b'.nd',
422 426 b'.idx',
423 427 b'.dat',
424 428 b'.sda',
425 429 )
426 430 # files that are "volatile" and might change between listing and streaming
427 431 #
428 432 # note: the ".nd" file are nodemap data and won't "change" but they might be
429 433 # deleted.
430 434 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
431 435
432 436 # some exception to the above matching
433 437 #
434 438 # XXX This is currently not in use because of issue6542
435 439 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
436 440
437 441
438 442 def is_revlog(f, kind, st):
439 443 if kind != stat.S_IFREG:
440 444 return False
441 445 if f.endswith(REVLOG_FILES_EXT):
442 446 return True
443 447 return False
444 448
445 449
446 450 def is_revlog_file(f):
447 451 if f.endswith(REVLOG_FILES_EXT):
448 452 return True
449 453 return False
450 454
451 455
452 456 @attr.s(slots=True)
453 457 class StoreFile:
454 458 """a file matching a store entry"""
455 459
456 460 unencoded_path = attr.ib()
457 461 _file_size = attr.ib(default=None)
458 462 is_volatile = attr.ib(default=False)
459 463
460 464 def file_size(self, vfs):
461 465 if self._file_size is None:
462 466 if vfs is None:
463 467 msg = b"calling vfs-less file_size without prior call: %s"
464 468 msg %= self.unencoded_path
465 469 raise error.ProgrammingError(msg)
466 470 try:
467 471 self._file_size = vfs.stat(self.unencoded_path).st_size
468 472 except FileNotFoundError:
469 473 self._file_size = 0
470 474 return self._file_size
471 475
472 476 @property
473 477 def has_size(self):
474 478 return self._file_size is not None
475 479
476 480 def get_stream(self, vfs, copies):
477 481 """return data "stream" information for this file
478 482
479 483 (unencoded_file_path, content_iterator, content_size)
480 484 """
481 485 size = self.file_size(None)
482 486
483 487 def get_stream():
484 488 actual_path = copies[vfs.join(self.unencoded_path)]
485 489 with open(actual_path, 'rb') as fp:
486 490 yield None # ready to stream
487 491 if size <= 65536:
488 492 yield fp.read(size)
489 493 else:
490 494 yield from util.filechunkiter(fp, limit=size)
491 495
492 496 s = get_stream()
493 497 next(s)
494 498 return (self.unencoded_path, s, size)
495 499
496 500
497 501 @attr.s(slots=True, init=False)
498 502 class BaseStoreEntry:
499 503 """An entry in the store
500 504
501 505 This is returned by `store.walk` and represent some data in the store."""
502 506
503 507 maybe_volatile = True
504 508
505 509 def files(self) -> List[StoreFile]:
506 510 raise NotImplementedError
507 511
508 512 def get_streams(
509 513 self,
510 514 repo=None,
511 515 vfs=None,
512 516 copies=None,
513 517 max_changeset=None,
514 518 preserve_file_count=False,
515 519 ):
516 520 """return a list of data stream associated to files for this entry
517 521
518 522 return [(unencoded_file_path, content_iterator, content_size), …]
519 523 """
520 524 assert vfs is not None
521 525 return [f.get_stream(vfs, copies) for f in self.files()]
522 526
523 527
524 528 @attr.s(slots=True, init=False)
525 529 class SimpleStoreEntry(BaseStoreEntry):
526 530 """A generic entry in the store"""
527 531
528 532 is_revlog = False
529 533
530 534 maybe_volatile = attr.ib()
531 535 _entry_path = attr.ib()
532 536 _is_volatile = attr.ib(default=False)
533 537 _file_size = attr.ib(default=None)
534 538 _files = attr.ib(default=None)
535 539
536 540 def __init__(
537 541 self,
538 542 entry_path,
539 543 is_volatile=False,
540 544 file_size=None,
541 545 ):
542 546 super().__init__()
543 547 self._entry_path = entry_path
544 548 self._is_volatile = is_volatile
545 549 self._file_size = file_size
546 550 self._files = None
547 551 self.maybe_volatile = is_volatile
548 552
549 553 def files(self) -> List[StoreFile]:
550 554 if self._files is None:
551 555 self._files = [
552 556 StoreFile(
553 557 unencoded_path=self._entry_path,
554 558 file_size=self._file_size,
555 559 is_volatile=self._is_volatile,
556 560 )
557 561 ]
558 562 return self._files
559 563
560 564
561 565 @attr.s(slots=True, init=False)
562 566 class RevlogStoreEntry(BaseStoreEntry):
563 567 """A revlog entry in the store"""
564 568
565 569 is_revlog = True
566 570
567 571 revlog_type = attr.ib(default=None)
568 572 target_id = attr.ib(default=None)
569 573 maybe_volatile = attr.ib(default=True)
570 574 _path_prefix = attr.ib(default=None)
571 575 _details = attr.ib(default=None)
572 576 _files = attr.ib(default=None)
573 577
574 578 def __init__(
575 579 self,
576 580 revlog_type,
577 581 path_prefix,
578 582 target_id,
579 583 details,
580 584 ):
581 585 super().__init__()
582 586 self.revlog_type = revlog_type
583 587 self.target_id = target_id
584 588 self._path_prefix = path_prefix
585 589 assert b'.i' in details, (path_prefix, details)
586 590 for ext in details:
587 591 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
588 592 self.maybe_volatile = True
589 593 break
590 594 else:
591 595 self.maybe_volatile = False
592 596 self._details = details
593 597 self._files = None
594 598
595 599 @property
596 600 def is_changelog(self):
597 601 return self.revlog_type == KIND_CHANGELOG
598 602
599 603 @property
600 604 def is_manifestlog(self):
601 605 return self.revlog_type == KIND_MANIFESTLOG
602 606
603 607 @property
604 608 def is_filelog(self):
605 609 return self.revlog_type == KIND_FILELOG
606 610
607 611 def main_file_path(self):
608 612 """unencoded path of the main revlog file"""
609 613 return self._path_prefix + b'.i'
610 614
611 615 def files(self) -> List[StoreFile]:
612 616 if self._files is None:
613 617 self._files = []
614 618 for ext in sorted(self._details, key=_ext_key):
615 619 path = self._path_prefix + ext
616 620 file_size = self._details[ext]
617 621 # files that are "volatile" and might change between
618 622 # listing and streaming
619 623 #
620 624 # note: the ".nd" file are nodemap data and won't "change"
621 625 # but they might be deleted.
622 626 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
623 627 f = StoreFile(path, file_size, volatile)
624 628 self._files.append(f)
625 629 return self._files
626 630
627 631 def get_streams(
628 632 self,
629 633 repo=None,
630 634 vfs=None,
631 635 copies=None,
632 636 max_changeset=None,
633 637 preserve_file_count=False,
634 638 ):
635 639 pre_sized = all(f.has_size for f in self.files())
636 640 if pre_sized and (
637 641 repo is None
638 642 or max_changeset is None
639 643 # This use revlog-v2, ignore for now
640 644 or any(k.endswith(b'.idx') for k in self._details.keys())
641 645 # This is not inline, no race expected
642 646 or b'.d' in self._details
643 647 ):
644 648 return super().get_streams(
645 649 repo=repo,
646 650 vfs=vfs,
647 651 copies=copies,
648 652 max_changeset=max_changeset,
649 653 preserve_file_count=preserve_file_count,
650 654 )
651 655 elif not preserve_file_count:
652 656 stream = [
653 657 f.get_stream(vfs, copies)
654 658 for f in self.files()
655 659 if not f.unencoded_path.endswith((b'.i', b'.d'))
656 660 ]
657 661 rl = self.get_revlog_instance(repo).get_revlog()
658 662 rl_stream = rl.get_streams(max_changeset)
659 663 stream.extend(rl_stream)
660 664 return stream
661 665
662 666 name_to_size = {}
663 667 for f in self.files():
664 668 name_to_size[f.unencoded_path] = f.file_size(None)
665 669
666 670 stream = [
667 671 f.get_stream(vfs, copies)
668 672 for f in self.files()
669 673 if not f.unencoded_path.endswith(b'.i')
670 674 ]
671 675
672 676 index_path = self._path_prefix + b'.i'
673 677
674 678 index_file = None
675 679 try:
676 680 index_file = vfs(index_path)
677 681 header = index_file.read(INDEX_HEADER.size)
678 682 if revlogmod.revlog.is_inline_index(header):
679 683 size = name_to_size[index_path]
680 684
681 685 # no split underneath, just return the stream
682 686 def get_stream():
683 687 fp = index_file
684 688 try:
685 689 fp.seek(0)
686 690 yield None
687 691 if size <= 65536:
688 692 yield fp.read(size)
689 693 else:
690 694 yield from util.filechunkiter(fp, limit=size)
691 695 finally:
692 696 fp.close()
693 697
694 698 s = get_stream()
695 699 next(s)
696 700 index_file = None
697 701 stream.append((index_path, s, size))
698 702 else:
699 703 rl = self.get_revlog_instance(repo).get_revlog()
700 704 rl_stream = rl.get_streams(max_changeset, force_inline=True)
701 705 for name, s, size in rl_stream:
702 706 if name_to_size.get(name, 0) != size:
703 707 msg = _(b"expected %d bytes but %d provided for %s")
704 708 msg %= name_to_size.get(name, 0), size, name
705 709 raise error.Abort(msg)
706 710 stream.extend(rl_stream)
707 711 finally:
708 712 if index_file is not None:
709 713 index_file.close()
710 714
711 715 files = self.files()
712 716 assert len(stream) == len(files), (
713 717 stream,
714 718 files,
715 719 self._path_prefix,
716 720 self.target_id,
717 721 )
718 722 return stream
719 723
720 724 def get_revlog_instance(self, repo):
721 725 """Obtain a revlog instance from this store entry
722 726
723 727 An instance of the appropriate class is returned.
724 728 """
725 729 if self.is_changelog:
726 730 return changelog.changelog(repo.svfs)
727 731 elif self.is_manifestlog:
728 732 mandir = self.target_id
729 733 return manifest.manifestrevlog(
730 734 repo.nodeconstants, repo.svfs, tree=mandir
731 735 )
732 736 else:
733 737 return filelog.filelog(repo.svfs, self.target_id)
734 738
735 739
736 740 def _gather_revlog(files_data):
737 741 """group files per revlog prefix
738 742
739 743 The returns a two level nested dict. The top level key is the revlog prefix
740 744 without extension, the second level is all the file "suffix" that were
741 745 seen for this revlog and arbitrary file data as value.
742 746 """
743 747 revlogs = collections.defaultdict(dict)
744 748 for u, value in files_data:
745 749 name, ext = _split_revlog_ext(u)
746 750 revlogs[name][ext] = value
747 751 return sorted(revlogs.items())
748 752
749 753
750 754 def _split_revlog_ext(filename):
751 755 """split the revlog file prefix from the variable extension"""
752 756 if filename.endswith(REVLOG_FILES_LONG_EXT):
753 757 char = b'-'
754 758 else:
755 759 char = b'.'
756 760 idx = filename.rfind(char)
757 761 return filename[:idx], filename[idx:]
758 762
759 763
760 764 def _ext_key(ext):
761 765 """a key to order revlog suffix
762 766
763 767 important to issue .i after other entry."""
764 768 # the only important part of this order is to keep the `.i` last.
765 769 if ext.endswith(b'.n'):
766 770 return (0, ext)
767 771 elif ext.endswith(b'.nd'):
768 772 return (10, ext)
769 773 elif ext.endswith(b'.d'):
770 774 return (20, ext)
771 775 elif ext.endswith(b'.i'):
772 776 return (50, ext)
773 777 else:
774 778 return (40, ext)
775 779
776 780
777 781 class basicstore:
778 782 '''base class for local repository stores'''
779 783
780 784 def __init__(self, path, vfstype):
781 785 vfs = vfstype(path)
782 786 self.path = vfs.base
783 787 self.createmode = _calcmode(vfs)
784 788 vfs.createmode = self.createmode
785 789 self.rawvfs = vfs
786 790 self.vfs = vfsmod.filtervfs(vfs, encodedir)
787 791 self.opener = self.vfs
788 792
789 793 def join(self, f):
790 794 return self.path + b'/' + encodedir(f)
791 795
792 796 def _walk(self, relpath, recurse, undecodable=None):
793 797 '''yields (revlog_type, unencoded, size)'''
794 798 path = self.path
795 799 if relpath:
796 800 path += b'/' + relpath
797 801 striplen = len(self.path) + 1
798 802 l = []
799 803 if self.rawvfs.isdir(path):
800 804 visit = [path]
801 805 readdir = self.rawvfs.readdir
802 806 while visit:
803 807 p = visit.pop()
804 808 for f, kind, st in readdir(p, stat=True):
805 809 fp = p + b'/' + f
806 810 if is_revlog(f, kind, st):
807 811 n = util.pconvert(fp[striplen:])
808 812 l.append((decodedir(n), st.st_size))
809 813 elif kind == stat.S_IFDIR and recurse:
810 814 visit.append(fp)
811 815
812 816 l.sort()
813 817 return l
814 818
815 819 def changelog(self, trypending, concurrencychecker=None):
816 820 return changelog.changelog(
817 821 self.vfs,
818 822 trypending=trypending,
819 823 concurrencychecker=concurrencychecker,
820 824 )
821 825
822 826 def manifestlog(self, repo, storenarrowmatch) -> manifest.ManifestLog:
823 827 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
824 828 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
825 829
826 830 def data_entries(
827 831 self, matcher=None, undecodable=None
828 832 ) -> Generator[BaseStoreEntry, None, None]:
829 833 """Like walk, but excluding the changelog and root manifest.
830 834
831 835 When [undecodable] is None, revlogs names that can't be
832 836 decoded cause an exception. When it is provided, it should
833 837 be a list and the filenames that can't be decoded are added
834 838 to it instead. This is very rarely needed."""
835 839 dirs = [
836 840 (b'data', KIND_FILELOG, False),
837 841 (b'meta', KIND_MANIFESTLOG, True),
838 842 ]
839 843 for base_dir, rl_type, strip_filename in dirs:
840 844 files = self._walk(base_dir, True, undecodable=undecodable)
841 845 for revlog, details in _gather_revlog(files):
842 846 revlog_target_id = revlog.split(b'/', 1)[1]
843 847 if strip_filename and b'/' in revlog:
844 848 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
845 849 revlog_target_id += b'/'
846 850 yield RevlogStoreEntry(
847 851 path_prefix=revlog,
848 852 revlog_type=rl_type,
849 853 target_id=revlog_target_id,
850 854 details=details,
851 855 )
852 856
853 857 def top_entries(
854 858 self, phase=False, obsolescence=False
855 859 ) -> Generator[BaseStoreEntry, None, None]:
856 860 if phase and self.vfs.exists(b'phaseroots'):
857 861 yield SimpleStoreEntry(
858 862 entry_path=b'phaseroots',
859 863 is_volatile=True,
860 864 )
861 865
862 866 if obsolescence and self.vfs.exists(b'obsstore'):
863 867 # XXX if we had the file size it could be non-volatile
864 868 yield SimpleStoreEntry(
865 869 entry_path=b'obsstore',
866 870 is_volatile=True,
867 871 )
868 872
869 873 files = reversed(self._walk(b'', False))
870 874
871 875 changelogs = collections.defaultdict(dict)
872 876 manifestlogs = collections.defaultdict(dict)
873 877
874 878 for u, s in files:
875 879 if u.startswith(b'00changelog'):
876 880 name, ext = _split_revlog_ext(u)
877 881 changelogs[name][ext] = s
878 882 elif u.startswith(b'00manifest'):
879 883 name, ext = _split_revlog_ext(u)
880 884 manifestlogs[name][ext] = s
881 885 else:
882 886 yield SimpleStoreEntry(
883 887 entry_path=u,
884 888 is_volatile=False,
885 889 file_size=s,
886 890 )
887 891 # yield manifest before changelog
888 892 top_rl = [
889 893 (manifestlogs, KIND_MANIFESTLOG),
890 894 (changelogs, KIND_CHANGELOG),
891 895 ]
892 896 assert len(manifestlogs) <= 1
893 897 assert len(changelogs) <= 1
894 898 for data, revlog_type in top_rl:
895 899 for revlog, details in sorted(data.items()):
896 900 yield RevlogStoreEntry(
897 901 path_prefix=revlog,
898 902 revlog_type=revlog_type,
899 903 target_id=b'',
900 904 details=details,
901 905 )
902 906
903 907 def walk(
904 908 self, matcher=None, phase=False, obsolescence=False
905 909 ) -> Generator[BaseStoreEntry, None, None]:
906 910 """return files related to data storage (ie: revlogs)
907 911
908 912 yields instance from BaseStoreEntry subclasses
909 913
910 914 if a matcher is passed, storage files of only those tracked paths
911 915 are passed with matches the matcher
912 916 """
913 917 # yield data files first
914 918 for x in self.data_entries(matcher):
915 919 yield x
916 920 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
917 921 yield x
918 922
919 923 def copylist(self):
920 924 return _data
921 925
922 926 def write(self, tr):
923 927 pass
924 928
925 929 def invalidatecaches(self):
926 930 pass
927 931
928 932 def markremoved(self, fn):
929 933 pass
930 934
931 935 def __contains__(self, path):
932 936 '''Checks if the store contains path'''
933 937 path = b"/".join((b"data", path))
934 938 # file?
935 939 if self.vfs.exists(path + b".i"):
936 940 return True
937 941 # dir?
938 942 if not path.endswith(b"/"):
939 943 path = path + b"/"
940 944 return self.vfs.exists(path)
941 945
942 946
943 947 class encodedstore(basicstore):
944 948 def __init__(self, path, vfstype):
945 949 vfs = vfstype(path + b'/store')
946 950 self.path = vfs.base
947 951 self.createmode = _calcmode(vfs)
948 952 vfs.createmode = self.createmode
949 953 self.rawvfs = vfs
950 954 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
951 955 self.opener = self.vfs
952 956
953 957 def _walk(self, relpath, recurse, undecodable=None):
954 958 old = super()._walk(relpath, recurse)
955 959 new = []
956 960 for f1, value in old:
957 961 try:
958 962 f2 = decodefilename(f1)
959 963 except KeyError:
960 964 if undecodable is None:
961 965 msg = _(b'undecodable revlog name %s') % f1
962 966 raise error.StorageError(msg)
963 967 else:
964 968 undecodable.append(f1)
965 969 continue
966 970 new.append((f2, value))
967 971 return new
968 972
969 973 def data_entries(
970 974 self, matcher=None, undecodable=None
971 975 ) -> Generator[BaseStoreEntry, None, None]:
972 976 entries = super(encodedstore, self).data_entries(
973 977 undecodable=undecodable
974 978 )
975 979 for entry in entries:
976 980 if _match_tracked_entry(entry, matcher):
977 981 yield entry
978 982
979 983 def join(self, f):
980 984 return self.path + b'/' + encodefilename(f)
981 985
982 986 def copylist(self):
983 987 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
984 988
985 989
986 990 class fncache:
987 991 # the filename used to be partially encoded
988 992 # hence the encodedir/decodedir dance
989 993 def __init__(self, vfs):
990 994 self.vfs = vfs
991 995 self._ignores = set()
992 996 self.entries = None
993 997 self._dirty = False
994 998 # set of new additions to fncache
995 999 self.addls = set()
996 1000
997 1001 def ensureloaded(self, warn=None):
998 1002 """read the fncache file if not already read.
999 1003
1000 1004 If the file on disk is corrupted, raise. If warn is provided,
1001 1005 warn and keep going instead."""
1002 1006 if self.entries is None:
1003 1007 self._load(warn)
1004 1008
1005 1009 def _load(self, warn=None):
1006 1010 '''fill the entries from the fncache file'''
1007 1011 self._dirty = False
1008 1012 try:
1009 1013 fp = self.vfs(b'fncache', mode=b'rb')
1010 1014 except IOError:
1011 1015 # skip nonexistent file
1012 1016 self.entries = set()
1013 1017 return
1014 1018
1015 1019 self.entries = set()
1016 1020 chunk = b''
1017 1021 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1018 1022 chunk += c
1019 1023 try:
1020 1024 p = chunk.rindex(b'\n')
1021 1025 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1022 1026 chunk = chunk[p + 1 :]
1023 1027 except ValueError:
1024 1028 # substring '\n' not found, maybe the entry is bigger than the
1025 1029 # chunksize, so let's keep iterating
1026 1030 pass
1027 1031
1028 1032 if chunk:
1029 1033 msg = _(b"fncache does not ends with a newline")
1030 1034 if warn:
1031 1035 warn(msg + b'\n')
1032 1036 else:
1033 1037 raise error.Abort(
1034 1038 msg,
1035 1039 hint=_(
1036 1040 b"use 'hg debugrebuildfncache' to "
1037 1041 b"rebuild the fncache"
1038 1042 ),
1039 1043 )
1040 1044 self._checkentries(fp, warn)
1041 1045 fp.close()
1042 1046
1043 1047 def _checkentries(self, fp, warn):
1044 1048 """make sure there is no empty string in entries"""
1045 1049 if b'' in self.entries:
1046 1050 fp.seek(0)
1047 1051 for n, line in enumerate(fp):
1048 1052 if not line.rstrip(b'\n'):
1049 1053 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1050 1054 if warn:
1051 1055 warn(t + b'\n')
1052 1056 else:
1053 1057 raise error.Abort(t)
1054 1058
1055 1059 def write(self, tr):
1056 1060 if self._dirty:
1057 1061 assert self.entries is not None
1058 1062 self.entries = self.entries | self.addls
1059 1063 self.addls = set()
1060 1064 tr.addbackup(b'fncache')
1061 1065 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1062 1066 if self.entries:
1063 1067 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1064 1068 fp.close()
1065 1069 self._dirty = False
1066 1070 if self.addls:
1067 1071 # if we have just new entries, let's append them to the fncache
1068 1072 tr.addbackup(b'fncache')
1069 1073 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1070 1074 if self.addls:
1071 1075 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1072 1076 fp.close()
1073 1077 self.entries = None
1074 1078 self.addls = set()
1075 1079
1076 1080 def addignore(self, fn):
1077 1081 self._ignores.add(fn)
1078 1082
1079 1083 def add(self, fn):
1080 1084 if fn in self._ignores:
1081 1085 return
1082 1086 if self.entries is None:
1083 1087 self._load()
1084 1088 if fn not in self.entries:
1085 1089 self.addls.add(fn)
1086 1090
1087 1091 def remove(self, fn):
1088 1092 if self.entries is None:
1089 1093 self._load()
1090 1094 if fn in self.addls:
1091 1095 self.addls.remove(fn)
1092 1096 return
1093 1097 try:
1094 1098 self.entries.remove(fn)
1095 1099 self._dirty = True
1096 1100 except KeyError:
1097 1101 pass
1098 1102
1099 1103 def __contains__(self, fn):
1100 1104 if fn in self.addls:
1101 1105 return True
1102 1106 if self.entries is None:
1103 1107 self._load()
1104 1108 return fn in self.entries
1105 1109
1106 1110 def __iter__(self):
1107 1111 if self.entries is None:
1108 1112 self._load()
1109 1113 return iter(self.entries | self.addls)
1110 1114
1111 1115
1112 1116 class _fncachevfs(vfsmod.proxyvfs):
1113 1117 def __init__(self, vfs, fnc, encode):
1114 1118 vfsmod.proxyvfs.__init__(self, vfs)
1115 1119 self.fncache = fnc
1116 1120 self.encode = encode
1117 1121
1118 1122 def __call__(self, path, mode=b'r', *args, **kw):
1119 1123 encoded = self.encode(path)
1120 1124 if (
1121 1125 mode not in (b'r', b'rb')
1122 1126 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1123 1127 and is_revlog_file(path)
1124 1128 ):
1125 1129 # do not trigger a fncache load when adding a file that already is
1126 1130 # known to exist.
1127 1131 notload = self.fncache.entries is None and (
1128 1132 # if the file has size zero, it should be considered as missing.
1129 1133 # Such zero-size files are the result of truncation when a
1130 1134 # transaction is aborted.
1131 1135 self.vfs.exists(encoded)
1132 1136 and self.vfs.stat(encoded).st_size
1133 1137 )
1134 1138 if not notload:
1135 1139 self.fncache.add(path)
1136 1140 return self.vfs(encoded, mode, *args, **kw)
1137 1141
1138 def join(self, path):
1142 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
1143 insidef = (self.encode(f) for f in insidef)
1144
1139 1145 if path:
1140 return self.vfs.join(self.encode(path))
1146 return self.vfs.join(self.encode(path), *insidef)
1141 1147 else:
1142 return self.vfs.join(path)
1148 return self.vfs.join(path, *insidef)
1143 1149
1144 1150 def register_file(self, path):
1145 1151 """generic hook point to lets fncache steer its stew"""
1146 1152 if path.startswith(b'data/') or path.startswith(b'meta/'):
1147 1153 self.fncache.add(path)
1148 1154
1149 1155
1150 1156 class fncachestore(basicstore):
1151 1157 def __init__(self, path, vfstype, dotencode):
1152 1158 if dotencode:
1153 1159 encode = _pathencode
1154 1160 else:
1155 1161 encode = _plainhybridencode
1156 1162 self.encode = encode
1157 1163 vfs = vfstype(path + b'/store')
1158 1164 self.path = vfs.base
1159 1165 self.pathsep = self.path + b'/'
1160 1166 self.createmode = _calcmode(vfs)
1161 1167 vfs.createmode = self.createmode
1162 1168 self.rawvfs = vfs
1163 1169 fnc = fncache(vfs)
1164 1170 self.fncache = fnc
1165 1171 self.vfs = _fncachevfs(vfs, fnc, encode)
1166 1172 self.opener = self.vfs
1167 1173
1168 1174 def join(self, f):
1169 1175 return self.pathsep + self.encode(f)
1170 1176
1171 1177 def getsize(self, path):
1172 1178 return self.rawvfs.stat(path).st_size
1173 1179
1174 1180 def data_entries(
1175 1181 self, matcher=None, undecodable=None
1176 1182 ) -> Generator[BaseStoreEntry, None, None]:
1177 1183 # Note: all files in fncache should be revlog related, However the
1178 1184 # fncache might contains such file added by previous version of
1179 1185 # Mercurial.
1180 1186 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1181 1187 by_revlog = _gather_revlog(files)
1182 1188 for revlog, details in by_revlog:
1183 1189 if revlog.startswith(b'data/'):
1184 1190 rl_type = KIND_FILELOG
1185 1191 revlog_target_id = revlog.split(b'/', 1)[1]
1186 1192 elif revlog.startswith(b'meta/'):
1187 1193 rl_type = KIND_MANIFESTLOG
1188 1194 # drop the initial directory and the `00manifest` file part
1189 1195 tmp = revlog.split(b'/', 1)[1]
1190 1196 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1191 1197 else:
1192 1198 # unreachable
1193 1199 assert False, revlog
1194 1200 entry = RevlogStoreEntry(
1195 1201 path_prefix=revlog,
1196 1202 revlog_type=rl_type,
1197 1203 target_id=revlog_target_id,
1198 1204 details=details,
1199 1205 )
1200 1206 if _match_tracked_entry(entry, matcher):
1201 1207 yield entry
1202 1208
1203 1209 def copylist(self):
1204 1210 d = (
1205 1211 b'bookmarks',
1206 1212 b'narrowspec',
1207 1213 b'data',
1208 1214 b'meta',
1209 1215 b'dh',
1210 1216 b'fncache',
1211 1217 b'phaseroots',
1212 1218 b'obsstore',
1213 1219 b'00manifest.d',
1214 1220 b'00manifest.i',
1215 1221 b'00changelog.d',
1216 1222 b'00changelog.i',
1217 1223 b'requires',
1218 1224 )
1219 1225 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1220 1226
1221 1227 def write(self, tr):
1222 1228 self.fncache.write(tr)
1223 1229
1224 1230 def invalidatecaches(self):
1225 1231 self.fncache.entries = None
1226 1232 self.fncache.addls = set()
1227 1233
1228 1234 def markremoved(self, fn):
1229 1235 self.fncache.remove(fn)
1230 1236
1231 1237 def _exists(self, f):
1232 1238 ef = self.encode(f)
1233 1239 try:
1234 1240 self.getsize(ef)
1235 1241 return True
1236 1242 except FileNotFoundError:
1237 1243 return False
1238 1244
1239 1245 def __contains__(self, path):
1240 1246 '''Checks if the store contains path'''
1241 1247 path = b"/".join((b"data", path))
1242 1248 # check for files (exact match)
1243 1249 e = path + b'.i'
1244 1250 if e in self.fncache and self._exists(e):
1245 1251 return True
1246 1252 # now check for directories (prefix match)
1247 1253 if not path.endswith(b'/'):
1248 1254 path += b'/'
1249 1255 for e in self.fncache:
1250 1256 if e.startswith(path) and self._exists(e):
1251 1257 return True
1252 1258 return False
General Comments 0
You need to be logged in to leave comments. Login now