##// END OF EJS Templates
fncache: add attribute to check whether we're using dotencode...
Raphaël Gomès -
r53068:bd43465a default
parent child Browse files
Show More
@@ -1,1262 +1,1263
1 1 # store.py - repository store handling for Mercurial)
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 import collections
11 11 import functools
12 12 import os
13 13 import re
14 14 import stat
15 15 import typing
16 16
17 17 from typing import (
18 18 Generator,
19 19 List,
20 20 Optional,
21 21 )
22 22
23 23 from .i18n import _
24 24 from .thirdparty import attr
25 25
26 26 # Force pytype to use the non-vendored package
27 27 if typing.TYPE_CHECKING:
28 28 # noinspection PyPackageRequirements
29 29 import attr
30 30
31 31 from .node import hex
32 32 from .revlogutils.constants import (
33 33 INDEX_HEADER,
34 34 KIND_CHANGELOG,
35 35 KIND_FILELOG,
36 36 KIND_MANIFESTLOG,
37 37 )
38 38 from . import (
39 39 changelog,
40 40 error,
41 41 filelog,
42 42 manifest,
43 43 policy,
44 44 pycompat,
45 45 revlog as revlogmod,
46 46 util,
47 47 vfs as vfsmod,
48 48 )
49 49 from .utils import hashutil
50 50
51 51 parsers = policy.importmod('parsers')
52 52 # how much bytes should be read from fncache in one read
53 53 # It is done to prevent loading large fncache files into memory
54 54 fncache_chunksize = 10**6
55 55
56 56
57 57 def _match_tracked_entry(entry: "BaseStoreEntry", matcher):
58 58 """parses a fncache entry and returns whether the entry is tracking a path
59 59 matched by matcher or not.
60 60
61 61 If matcher is None, returns True"""
62 62
63 63 if matcher is None:
64 64 return True
65 65
66 66 # TODO: make this safe for other entry types. Currently, the various
67 67 # store.data_entry generators only yield RevlogStoreEntry, so the
68 68 # attributes do exist on `entry`.
69 69 # pytype: disable=attribute-error
70 70 if entry.is_filelog:
71 71 return matcher(entry.target_id)
72 72 elif entry.is_manifestlog:
73 73 return matcher.visitdir(entry.target_id.rstrip(b'/'))
74 74 # pytype: enable=attribute-error
75 75 raise error.ProgrammingError(b"cannot process entry %r" % entry)
76 76
77 77
78 78 # This avoids a collision between a file named foo and a dir named
79 79 # foo.i or foo.d
80 80 def _encodedir(path):
81 81 """
82 82 >>> _encodedir(b'data/foo.i')
83 83 'data/foo.i'
84 84 >>> _encodedir(b'data/foo.i/bla.i')
85 85 'data/foo.i.hg/bla.i'
86 86 >>> _encodedir(b'data/foo.i.hg/bla.i')
87 87 'data/foo.i.hg.hg/bla.i'
88 88 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
89 89 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
90 90 """
91 91 return (
92 92 path.replace(b".hg/", b".hg.hg/")
93 93 .replace(b".i/", b".i.hg/")
94 94 .replace(b".d/", b".d.hg/")
95 95 )
96 96
97 97
98 98 encodedir = getattr(parsers, 'encodedir', _encodedir)
99 99
100 100
101 101 def decodedir(path):
102 102 """
103 103 >>> decodedir(b'data/foo.i')
104 104 'data/foo.i'
105 105 >>> decodedir(b'data/foo.i.hg/bla.i')
106 106 'data/foo.i/bla.i'
107 107 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
108 108 'data/foo.i.hg/bla.i'
109 109 """
110 110 if b".hg/" not in path:
111 111 return path
112 112 return (
113 113 path.replace(b".d.hg/", b".d/")
114 114 .replace(b".i.hg/", b".i/")
115 115 .replace(b".hg.hg/", b".hg/")
116 116 )
117 117
118 118
119 119 def _reserved():
120 120 """characters that are problematic for filesystems
121 121
122 122 * ascii escapes (0..31)
123 123 * ascii hi (126..255)
124 124 * windows specials
125 125
126 126 these characters will be escaped by encodefunctions
127 127 """
128 128 winreserved = [ord(x) for x in u'\\:*?"<>|']
129 129 for x in range(32):
130 130 yield x
131 131 for x in range(126, 256):
132 132 yield x
133 133 for x in winreserved:
134 134 yield x
135 135
136 136
137 137 def _buildencodefun():
138 138 """
139 139 >>> enc, dec = _buildencodefun()
140 140
141 141 >>> enc(b'nothing/special.txt')
142 142 'nothing/special.txt'
143 143 >>> dec(b'nothing/special.txt')
144 144 'nothing/special.txt'
145 145
146 146 >>> enc(b'HELLO')
147 147 '_h_e_l_l_o'
148 148 >>> dec(b'_h_e_l_l_o')
149 149 'HELLO'
150 150
151 151 >>> enc(b'hello:world?')
152 152 'hello~3aworld~3f'
153 153 >>> dec(b'hello~3aworld~3f')
154 154 'hello:world?'
155 155
156 156 >>> enc(b'the\\x07quick\\xADshot')
157 157 'the~07quick~adshot'
158 158 >>> dec(b'the~07quick~adshot')
159 159 'the\\x07quick\\xadshot'
160 160 """
161 161 e = b'_'
162 162 xchr = pycompat.bytechr
163 163 asciistr = list(map(xchr, range(127)))
164 164 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
165 165
166 166 cmap = {x: x for x in asciistr}
167 167 for x in _reserved():
168 168 cmap[xchr(x)] = b"~%02x" % x
169 169 for x in capitals + [ord(e)]:
170 170 cmap[xchr(x)] = e + xchr(x).lower()
171 171
172 172 dmap = {}
173 173 for k, v in cmap.items():
174 174 dmap[v] = k
175 175
176 176 def decode(s):
177 177 i = 0
178 178 while i < len(s):
179 179 for l in range(1, 4):
180 180 try:
181 181 yield dmap[s[i : i + l]]
182 182 i += l
183 183 break
184 184 except KeyError:
185 185 pass
186 186 else:
187 187 raise KeyError
188 188
189 189 return (
190 190 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
191 191 lambda s: b''.join(list(decode(s))),
192 192 )
193 193
194 194
195 195 _encodefname, _decodefname = _buildencodefun()
196 196
197 197
198 198 def encodefilename(s):
199 199 """
200 200 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
201 201 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
202 202 """
203 203 return _encodefname(encodedir(s))
204 204
205 205
206 206 def decodefilename(s):
207 207 """
208 208 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
209 209 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
210 210 """
211 211 return decodedir(_decodefname(s))
212 212
213 213
214 214 def _buildlowerencodefun():
215 215 """
216 216 >>> f = _buildlowerencodefun()
217 217 >>> f(b'nothing/special.txt')
218 218 'nothing/special.txt'
219 219 >>> f(b'HELLO')
220 220 'hello'
221 221 >>> f(b'hello:world?')
222 222 'hello~3aworld~3f'
223 223 >>> f(b'the\\x07quick\\xADshot')
224 224 'the~07quick~adshot'
225 225 """
226 226 xchr = pycompat.bytechr
227 227 cmap = {xchr(x): xchr(x) for x in range(127)}
228 228 for x in _reserved():
229 229 cmap[xchr(x)] = b"~%02x" % x
230 230 for x in range(ord(b"A"), ord(b"Z") + 1):
231 231 cmap[xchr(x)] = xchr(x).lower()
232 232
233 233 def lowerencode(s):
234 234 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
235 235
236 236 return lowerencode
237 237
238 238
239 239 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
240 240
241 241 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
242 242 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
243 243 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
244 244
245 245
246 246 def _auxencode(path, dotencode):
247 247 """
248 248 Encodes filenames containing names reserved by Windows or which end in
249 249 period or space. Does not touch other single reserved characters c.
250 250 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
251 251 Additionally encodes space or period at the beginning, if dotencode is
252 252 True. Parameter path is assumed to be all lowercase.
253 253 A segment only needs encoding if a reserved name appears as a
254 254 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
255 255 doesn't need encoding.
256 256
257 257 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
258 258 >>> _auxencode(s.split(b'/'), True)
259 259 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
260 260 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
261 261 >>> _auxencode(s.split(b'/'), False)
262 262 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
263 263 >>> _auxencode([b'foo. '], True)
264 264 ['foo.~20']
265 265 >>> _auxencode([b' .foo'], True)
266 266 ['~20.foo']
267 267 """
268 268 for i, n in enumerate(path):
269 269 if not n:
270 270 continue
271 271 if dotencode and n[0] in b'. ':
272 272 n = b"~%02x" % ord(n[0:1]) + n[1:]
273 273 path[i] = n
274 274 else:
275 275 l = n.find(b'.')
276 276 if l == -1:
277 277 l = len(n)
278 278 if (l == 3 and n[:3] in _winres3) or (
279 279 l == 4
280 280 and n[3:4] <= b'9'
281 281 and n[3:4] >= b'1'
282 282 and n[:3] in _winres4
283 283 ):
284 284 # encode third letter ('aux' -> 'au~78')
285 285 ec = b"~%02x" % ord(n[2:3])
286 286 n = n[0:2] + ec + n[3:]
287 287 path[i] = n
288 288 if n[-1] in b'. ':
289 289 # encode last period or space ('foo...' -> 'foo..~2e')
290 290 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
291 291 return path
292 292
293 293
294 294 _maxstorepathlen = 120
295 295 _dirprefixlen = 8
296 296 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
297 297
298 298
299 299 def _hashencode(path, dotencode):
300 300 digest = hex(hashutil.sha1(path).digest())
301 301 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
302 302 parts = _auxencode(le, dotencode)
303 303 basename = parts[-1]
304 304 _root, ext = os.path.splitext(basename)
305 305 sdirs = []
306 306 sdirslen = 0
307 307 for p in parts[:-1]:
308 308 d = p[:_dirprefixlen]
309 309 if d[-1] in b'. ':
310 310 # Windows can't access dirs ending in period or space
311 311 d = d[:-1] + b'_'
312 312 if sdirslen == 0:
313 313 t = len(d)
314 314 else:
315 315 t = sdirslen + 1 + len(d)
316 316 if t > _maxshortdirslen:
317 317 break
318 318 sdirs.append(d)
319 319 sdirslen = t
320 320 dirs = b'/'.join(sdirs)
321 321 if len(dirs) > 0:
322 322 dirs += b'/'
323 323 res = b'dh/' + dirs + digest + ext
324 324 spaceleft = _maxstorepathlen - len(res)
325 325 if spaceleft > 0:
326 326 filler = basename[:spaceleft]
327 327 res = b'dh/' + dirs + filler + digest + ext
328 328 return res
329 329
330 330
331 331 def _hybridencode(path, dotencode):
332 332 """encodes path with a length limit
333 333
334 334 Encodes all paths that begin with 'data/', according to the following.
335 335
336 336 Default encoding (reversible):
337 337
338 338 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
339 339 characters are encoded as '~xx', where xx is the two digit hex code
340 340 of the character (see encodefilename).
341 341 Relevant path components consisting of Windows reserved filenames are
342 342 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
343 343
344 344 Hashed encoding (not reversible):
345 345
346 346 If the default-encoded path is longer than _maxstorepathlen, a
347 347 non-reversible hybrid hashing of the path is done instead.
348 348 This encoding uses up to _dirprefixlen characters of all directory
349 349 levels of the lowerencoded path, but not more levels than can fit into
350 350 _maxshortdirslen.
351 351 Then follows the filler followed by the sha digest of the full path.
352 352 The filler is the beginning of the basename of the lowerencoded path
353 353 (the basename is everything after the last path separator). The filler
354 354 is as long as possible, filling in characters from the basename until
355 355 the encoded path has _maxstorepathlen characters (or all chars of the
356 356 basename have been taken).
357 357 The extension (e.g. '.i' or '.d') is preserved.
358 358
359 359 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
360 360 encoding was used.
361 361 """
362 362 path = encodedir(path)
363 363 ef = _encodefname(path).split(b'/')
364 364 res = b'/'.join(_auxencode(ef, dotencode))
365 365 if len(res) > _maxstorepathlen:
366 366 res = _hashencode(path, dotencode)
367 367 return res
368 368
369 369
370 370 def _pathencode(path):
371 371 de = encodedir(path)
372 372 if len(path) > _maxstorepathlen:
373 373 return _hashencode(de, True)
374 374 ef = _encodefname(de).split(b'/')
375 375 res = b'/'.join(_auxencode(ef, True))
376 376 if len(res) > _maxstorepathlen:
377 377 return _hashencode(de, True)
378 378 return res
379 379
380 380
381 381 _pathencode = getattr(parsers, 'pathencode', _pathencode)
382 382
383 383
384 384 def _plainhybridencode(f):
385 385 return _hybridencode(f, False)
386 386
387 387
388 388 def _calcmode(vfs):
389 389 try:
390 390 # files in .hg/ will be created using this mode
391 391 mode = vfs.stat().st_mode
392 392 # avoid some useless chmods
393 393 if (0o777 & ~util.umask) == (0o777 & mode):
394 394 mode = None
395 395 except OSError:
396 396 mode = None
397 397 return mode
398 398
399 399
400 400 _data = [
401 401 b'bookmarks',
402 402 b'narrowspec',
403 403 b'data',
404 404 b'meta',
405 405 b'00manifest.d',
406 406 b'00manifest.i',
407 407 b'00changelog.d',
408 408 b'00changelog.i',
409 409 b'phaseroots',
410 410 b'obsstore',
411 411 b'requires',
412 412 ]
413 413
414 414 REVLOG_FILES_EXT = (
415 415 b'.i',
416 416 b'.idx',
417 417 b'.d',
418 418 b'.dat',
419 419 b'.n',
420 420 b'.nd',
421 421 b'.sda',
422 422 )
423 423 # file extension that also use a `-SOMELONGIDHASH.ext` form
424 424 REVLOG_FILES_LONG_EXT = (
425 425 b'.nd',
426 426 b'.idx',
427 427 b'.dat',
428 428 b'.sda',
429 429 )
430 430 # files that are "volatile" and might change between listing and streaming
431 431 #
432 432 # note: the ".nd" file are nodemap data and won't "change" but they might be
433 433 # deleted.
434 434 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
435 435
436 436 # some exception to the above matching
437 437 #
438 438 # XXX This is currently not in use because of issue6542
439 439 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
440 440
441 441
442 442 def is_revlog(f, kind, st):
443 443 if kind != stat.S_IFREG:
444 444 return False
445 445 if f.endswith(REVLOG_FILES_EXT):
446 446 return True
447 447 return False
448 448
449 449
450 450 def is_revlog_file(f):
451 451 if f.endswith(REVLOG_FILES_EXT):
452 452 return True
453 453 return False
454 454
455 455
456 456 @attr.s(slots=True)
457 457 class StoreFile:
458 458 """a file matching a store entry"""
459 459
460 460 unencoded_path = attr.ib()
461 461 _file_size = attr.ib(default=None)
462 462 is_volatile = attr.ib(default=False)
463 463
464 464 def file_size(self, vfs):
465 465 if self._file_size is None:
466 466 if vfs is None:
467 467 msg = b"calling vfs-less file_size without prior call: %s"
468 468 msg %= self.unencoded_path
469 469 raise error.ProgrammingError(msg)
470 470 try:
471 471 self._file_size = vfs.stat(self.unencoded_path).st_size
472 472 except FileNotFoundError:
473 473 self._file_size = 0
474 474 return self._file_size
475 475
476 476 @property
477 477 def has_size(self):
478 478 return self._file_size is not None
479 479
480 480 def get_stream(self, vfs, volatiles):
481 481 """return data "stream" information for this file
482 482
483 483 (unencoded_file_path, content_iterator, content_size)
484 484 """
485 485 size = self.file_size(None)
486 486
487 487 def get_stream():
488 488 path = vfs.join(self.unencoded_path)
489 489 with volatiles.open(path) as fp:
490 490 yield None # ready to stream
491 491 if size <= 65536:
492 492 yield fp.read(size)
493 493 else:
494 494 yield from util.filechunkiter(fp, limit=size)
495 495
496 496 s = get_stream()
497 497 next(s)
498 498 return (self.unencoded_path, s, size)
499 499
500 500
501 501 @attr.s(slots=True, init=False)
502 502 class BaseStoreEntry:
503 503 """An entry in the store
504 504
505 505 This is returned by `store.walk` and represent some data in the store."""
506 506
507 507 maybe_volatile = True
508 508
509 509 def files(self) -> List[StoreFile]:
510 510 raise NotImplementedError
511 511
512 512 def get_streams(
513 513 self,
514 514 repo=None,
515 515 vfs=None,
516 516 volatiles=None,
517 517 max_changeset=None,
518 518 preserve_file_count=False,
519 519 ):
520 520 """return a list of data stream associated to files for this entry
521 521
522 522 return [(unencoded_file_path, content_iterator, content_size), …]
523 523 """
524 524 assert vfs is not None
525 525 return [f.get_stream(vfs, volatiles) for f in self.files()]
526 526
527 527
528 528 @attr.s(slots=True, init=False)
529 529 class SimpleStoreEntry(BaseStoreEntry):
530 530 """A generic entry in the store"""
531 531
532 532 is_revlog = False
533 533
534 534 maybe_volatile = attr.ib()
535 535 _entry_path = attr.ib()
536 536 _is_volatile = attr.ib(default=False)
537 537 _file_size = attr.ib(default=None)
538 538 _files = attr.ib(default=None)
539 539
540 540 def __init__(
541 541 self,
542 542 entry_path,
543 543 is_volatile=False,
544 544 file_size=None,
545 545 ):
546 546 super().__init__()
547 547 self._entry_path = entry_path
548 548 self._is_volatile = is_volatile
549 549 self._file_size = file_size
550 550 self._files = None
551 551 self.maybe_volatile = is_volatile
552 552
553 553 def files(self) -> List[StoreFile]:
554 554 if self._files is None:
555 555 self._files = [
556 556 StoreFile(
557 557 unencoded_path=self._entry_path,
558 558 file_size=self._file_size,
559 559 is_volatile=self._is_volatile,
560 560 )
561 561 ]
562 562 return self._files
563 563
564 564
565 565 @attr.s(slots=True, init=False)
566 566 class RevlogStoreEntry(BaseStoreEntry):
567 567 """A revlog entry in the store"""
568 568
569 569 is_revlog = True
570 570
571 571 revlog_type = attr.ib(default=None)
572 572 target_id = attr.ib(default=None)
573 573 maybe_volatile = attr.ib(default=True)
574 574 _path_prefix = attr.ib(default=None)
575 575 _details = attr.ib(default=None)
576 576 _files = attr.ib(default=None)
577 577
578 578 def __init__(
579 579 self,
580 580 revlog_type,
581 581 path_prefix,
582 582 target_id,
583 583 details,
584 584 ):
585 585 super().__init__()
586 586 self.revlog_type = revlog_type
587 587 self.target_id = target_id
588 588 self._path_prefix = path_prefix
589 589 assert b'.i' in details, (path_prefix, details)
590 590 for ext in details:
591 591 if ext.endswith(REVLOG_FILES_VOLATILE_EXT):
592 592 self.maybe_volatile = True
593 593 break
594 594 else:
595 595 self.maybe_volatile = False
596 596 self._details = details
597 597 self._files = None
598 598
599 599 @property
600 600 def is_changelog(self):
601 601 return self.revlog_type == KIND_CHANGELOG
602 602
603 603 @property
604 604 def is_manifestlog(self):
605 605 return self.revlog_type == KIND_MANIFESTLOG
606 606
607 607 @property
608 608 def is_filelog(self):
609 609 return self.revlog_type == KIND_FILELOG
610 610
611 611 def main_file_path(self):
612 612 """unencoded path of the main revlog file"""
613 613 return self._path_prefix + b'.i'
614 614
615 615 def files(self) -> List[StoreFile]:
616 616 if self._files is None:
617 617 self._files = []
618 618 for ext in sorted(self._details, key=_ext_key):
619 619 path = self._path_prefix + ext
620 620 file_size = self._details[ext]
621 621 # files that are "volatile" and might change between
622 622 # listing and streaming
623 623 #
624 624 # note: the ".nd" file are nodemap data and won't "change"
625 625 # but they might be deleted.
626 626 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
627 627 f = StoreFile(path, file_size, volatile)
628 628 self._files.append(f)
629 629 return self._files
630 630
631 631 def get_streams(
632 632 self,
633 633 repo=None,
634 634 vfs=None,
635 635 volatiles=None,
636 636 max_changeset=None,
637 637 preserve_file_count=False,
638 638 ):
639 639 pre_sized = all(f.has_size for f in self.files())
640 640 if pre_sized and (
641 641 repo is None
642 642 or max_changeset is None
643 643 # This use revlog-v2, ignore for now
644 644 or any(k.endswith(b'.idx') for k in self._details.keys())
645 645 # This is not inline, no race expected
646 646 or b'.d' in self._details
647 647 ):
648 648 return super().get_streams(
649 649 repo=repo,
650 650 vfs=vfs,
651 651 volatiles=volatiles,
652 652 max_changeset=max_changeset,
653 653 preserve_file_count=preserve_file_count,
654 654 )
655 655 elif not preserve_file_count:
656 656 stream = [
657 657 f.get_stream(vfs, volatiles)
658 658 for f in self.files()
659 659 if not f.unencoded_path.endswith((b'.i', b'.d'))
660 660 ]
661 661 rl = self.get_revlog_instance(repo).get_revlog()
662 662 rl_stream = rl.get_streams(max_changeset)
663 663 stream.extend(rl_stream)
664 664 return stream
665 665
666 666 name_to_size = {}
667 667 for f in self.files():
668 668 name_to_size[f.unencoded_path] = f.file_size(None)
669 669
670 670 stream = [
671 671 f.get_stream(vfs, volatiles)
672 672 for f in self.files()
673 673 if not f.unencoded_path.endswith(b'.i')
674 674 ]
675 675
676 676 index_path = self._path_prefix + b'.i'
677 677
678 678 index_file = None
679 679 try:
680 680 index_file = vfs(index_path)
681 681 header = index_file.read(INDEX_HEADER.size)
682 682 if revlogmod.revlog.is_inline_index(header):
683 683 size = name_to_size[index_path]
684 684
685 685 # no split underneath, just return the stream
686 686 def get_stream():
687 687 fp = index_file
688 688 try:
689 689 fp.seek(0)
690 690 yield None
691 691 if size <= 65536:
692 692 yield fp.read(size)
693 693 else:
694 694 yield from util.filechunkiter(fp, limit=size)
695 695 finally:
696 696 fp.close()
697 697
698 698 s = get_stream()
699 699 next(s)
700 700 index_file = None
701 701 stream.append((index_path, s, size))
702 702 else:
703 703 rl = self.get_revlog_instance(repo).get_revlog()
704 704 rl_stream = rl.get_streams(max_changeset, force_inline=True)
705 705 for name, s, size in rl_stream:
706 706 if name_to_size.get(name, 0) != size:
707 707 msg = _(b"expected %d bytes but %d provided for %s")
708 708 msg %= name_to_size.get(name, 0), size, name
709 709 raise error.Abort(msg)
710 710 stream.extend(rl_stream)
711 711 finally:
712 712 if index_file is not None:
713 713 index_file.close()
714 714
715 715 files = self.files()
716 716 assert len(stream) == len(files), (
717 717 stream,
718 718 files,
719 719 self._path_prefix,
720 720 self.target_id,
721 721 )
722 722 return stream
723 723
724 724 def get_revlog_instance(self, repo):
725 725 """Obtain a revlog instance from this store entry
726 726
727 727 An instance of the appropriate class is returned.
728 728 """
729 729 if self.is_changelog:
730 730 return changelog.changelog(repo.svfs)
731 731 elif self.is_manifestlog:
732 732 mandir = self.target_id
733 733 return manifest.manifestrevlog(
734 734 repo.nodeconstants, repo.svfs, tree=mandir
735 735 )
736 736 else:
737 737 return filelog.filelog(repo.svfs, self.target_id)
738 738
739 739
740 740 def _gather_revlog(files_data):
741 741 """group files per revlog prefix
742 742
743 743 The returns a two level nested dict. The top level key is the revlog prefix
744 744 without extension, the second level is all the file "suffix" that were
745 745 seen for this revlog and arbitrary file data as value.
746 746 """
747 747 revlogs = collections.defaultdict(dict)
748 748 for u, value in files_data:
749 749 name, ext = _split_revlog_ext(u)
750 750 revlogs[name][ext] = value
751 751 return sorted(revlogs.items())
752 752
753 753
754 754 def _split_revlog_ext(filename):
755 755 """split the revlog file prefix from the variable extension"""
756 756 if filename.endswith(REVLOG_FILES_LONG_EXT):
757 757 char = b'-'
758 758 else:
759 759 char = b'.'
760 760 idx = filename.rfind(char)
761 761 return filename[:idx], filename[idx:]
762 762
763 763
764 764 def _ext_key(ext):
765 765 """a key to order revlog suffix
766 766
767 767 important to issue .i after other entry."""
768 768 # the only important part of this order is to keep the `.i` last.
769 769 if ext.endswith(b'.n'):
770 770 return (0, ext)
771 771 elif ext.endswith(b'.nd'):
772 772 return (10, ext)
773 773 elif ext.endswith(b'.d'):
774 774 return (20, ext)
775 775 elif ext.endswith(b'.i'):
776 776 return (50, ext)
777 777 else:
778 778 return (40, ext)
779 779
780 780
781 781 class basicstore:
782 782 '''base class for local repository stores'''
783 783
784 784 def __init__(self, path, vfstype):
785 785 vfs = vfstype(path)
786 786 self.path = vfs.base
787 787 self.createmode = _calcmode(vfs)
788 788 vfs.createmode = self.createmode
789 789 self.rawvfs = vfs
790 790 self.vfs = vfsmod.filtervfs(vfs, encodedir)
791 791 self.opener = self.vfs
792 792
793 793 def join(self, f):
794 794 return self.path + b'/' + encodedir(f)
795 795
796 796 def _walk(self, relpath, recurse, undecodable=None):
797 797 '''yields (revlog_type, unencoded, size)'''
798 798 path = self.path
799 799 if relpath:
800 800 path += b'/' + relpath
801 801 striplen = len(self.path) + 1
802 802 l = []
803 803 if self.rawvfs.isdir(path):
804 804 visit = [path]
805 805 readdir = self.rawvfs.readdir
806 806 while visit:
807 807 p = visit.pop()
808 808 for f, kind, st in readdir(p, stat=True):
809 809 fp = p + b'/' + f
810 810 if is_revlog(f, kind, st):
811 811 n = util.pconvert(fp[striplen:])
812 812 l.append((decodedir(n), st.st_size))
813 813 elif kind == stat.S_IFDIR and recurse:
814 814 visit.append(fp)
815 815
816 816 l.sort()
817 817 return l
818 818
819 819 def changelog(self, trypending, concurrencychecker=None):
820 820 return changelog.changelog(
821 821 self.vfs,
822 822 trypending=trypending,
823 823 concurrencychecker=concurrencychecker,
824 824 )
825 825
826 826 def manifestlog(self, repo, storenarrowmatch) -> manifest.manifestlog:
827 827 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
828 828 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
829 829
830 830 def data_entries(
831 831 self, matcher=None, undecodable=None
832 832 ) -> Generator[BaseStoreEntry, None, None]:
833 833 """Like walk, but excluding the changelog and root manifest.
834 834
835 835 When [undecodable] is None, revlogs names that can't be
836 836 decoded cause an exception. When it is provided, it should
837 837 be a list and the filenames that can't be decoded are added
838 838 to it instead. This is very rarely needed."""
839 839 dirs = [
840 840 (b'data', KIND_FILELOG, False),
841 841 (b'meta', KIND_MANIFESTLOG, True),
842 842 ]
843 843 for base_dir, rl_type, strip_filename in dirs:
844 844 files = self._walk(base_dir, True, undecodable=undecodable)
845 845 for revlog, details in _gather_revlog(files):
846 846 revlog_target_id = revlog.split(b'/', 1)[1]
847 847 if strip_filename and b'/' in revlog:
848 848 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
849 849 revlog_target_id += b'/'
850 850 yield RevlogStoreEntry(
851 851 path_prefix=revlog,
852 852 revlog_type=rl_type,
853 853 target_id=revlog_target_id,
854 854 details=details,
855 855 )
856 856
857 857 def top_entries(
858 858 self, phase=False, obsolescence=False
859 859 ) -> Generator[BaseStoreEntry, None, None]:
860 860 if phase and self.vfs.exists(b'phaseroots'):
861 861 yield SimpleStoreEntry(
862 862 entry_path=b'phaseroots',
863 863 is_volatile=True,
864 864 )
865 865
866 866 if obsolescence and self.vfs.exists(b'obsstore'):
867 867 # XXX if we had the file size it could be non-volatile
868 868 yield SimpleStoreEntry(
869 869 entry_path=b'obsstore',
870 870 is_volatile=True,
871 871 )
872 872
873 873 files = reversed(self._walk(b'', False))
874 874
875 875 changelogs = collections.defaultdict(dict)
876 876 manifestlogs = collections.defaultdict(dict)
877 877
878 878 for u, s in files:
879 879 if u.startswith(b'00changelog'):
880 880 name, ext = _split_revlog_ext(u)
881 881 changelogs[name][ext] = s
882 882 elif u.startswith(b'00manifest'):
883 883 name, ext = _split_revlog_ext(u)
884 884 manifestlogs[name][ext] = s
885 885 else:
886 886 yield SimpleStoreEntry(
887 887 entry_path=u,
888 888 is_volatile=False,
889 889 file_size=s,
890 890 )
891 891 # yield manifest before changelog
892 892 top_rl = [
893 893 (manifestlogs, KIND_MANIFESTLOG),
894 894 (changelogs, KIND_CHANGELOG),
895 895 ]
896 896 assert len(manifestlogs) <= 1
897 897 assert len(changelogs) <= 1
898 898 for data, revlog_type in top_rl:
899 899 for revlog, details in sorted(data.items()):
900 900 yield RevlogStoreEntry(
901 901 path_prefix=revlog,
902 902 revlog_type=revlog_type,
903 903 target_id=b'',
904 904 details=details,
905 905 )
906 906
907 907 def walk(
908 908 self, matcher=None, phase=False, obsolescence=False
909 909 ) -> Generator[BaseStoreEntry, None, None]:
910 910 """return files related to data storage (ie: revlogs)
911 911
912 912 yields instance from BaseStoreEntry subclasses
913 913
914 914 if a matcher is passed, storage files of only those tracked paths
915 915 are passed with matches the matcher
916 916 """
917 917 # yield data files first
918 918 for x in self.data_entries(matcher):
919 919 yield x
920 920 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
921 921 yield x
922 922
923 923 def copylist(self):
924 924 return _data
925 925
926 926 def write(self, tr):
927 927 pass
928 928
929 929 def invalidatecaches(self):
930 930 pass
931 931
932 932 def markremoved(self, fn):
933 933 pass
934 934
935 935 def __contains__(self, path):
936 936 '''Checks if the store contains path'''
937 937 path = b"/".join((b"data", path))
938 938 # file?
939 939 if self.vfs.exists(path + b".i"):
940 940 return True
941 941 # dir?
942 942 if not path.endswith(b"/"):
943 943 path = path + b"/"
944 944 return self.vfs.exists(path)
945 945
946 946
947 947 class encodedstore(basicstore):
948 948 def __init__(self, path, vfstype):
949 949 vfs = vfstype(path + b'/store')
950 950 self.path = vfs.base
951 951 self.createmode = _calcmode(vfs)
952 952 vfs.createmode = self.createmode
953 953 self.rawvfs = vfs
954 954 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
955 955 self.opener = self.vfs
956 956
957 957 def _walk(self, relpath, recurse, undecodable=None):
958 958 old = super()._walk(relpath, recurse)
959 959 new = []
960 960 for f1, value in old:
961 961 try:
962 962 f2 = decodefilename(f1)
963 963 except KeyError:
964 964 if undecodable is None:
965 965 msg = _(b'undecodable revlog name %s') % f1
966 966 raise error.StorageError(msg)
967 967 else:
968 968 undecodable.append(f1)
969 969 continue
970 970 new.append((f2, value))
971 971 return new
972 972
973 973 def data_entries(
974 974 self, matcher=None, undecodable=None
975 975 ) -> Generator[BaseStoreEntry, None, None]:
976 976 entries = super(encodedstore, self).data_entries(
977 977 undecodable=undecodable
978 978 )
979 979 for entry in entries:
980 980 if _match_tracked_entry(entry, matcher):
981 981 yield entry
982 982
983 983 def join(self, f):
984 984 return self.path + b'/' + encodefilename(f)
985 985
986 986 def copylist(self):
987 987 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
988 988
989 989
990 990 class fncache:
991 991 # the filename used to be partially encoded
992 992 # hence the encodedir/decodedir dance
993 993 def __init__(self, vfs):
994 994 self.vfs = vfs
995 995 self._ignores = set()
996 996 self.entries = None
997 997 self._dirty = False
998 998 # set of new additions to fncache
999 999 self.addls = set()
1000 1000
1001 1001 @property
1002 1002 def is_loaded(self):
1003 1003 return self.entries is not None
1004 1004
1005 1005 def ensureloaded(self, warn=None):
1006 1006 """read the fncache file if not already read.
1007 1007
1008 1008 If the file on disk is corrupted, raise. If warn is provided,
1009 1009 warn and keep going instead."""
1010 1010 if not self.is_loaded:
1011 1011 self._load(warn)
1012 1012
1013 1013 def _load(self, warn=None):
1014 1014 '''fill the entries from the fncache file'''
1015 1015 self._dirty = False
1016 1016 try:
1017 1017 fp = self.vfs(b'fncache', mode=b'rb')
1018 1018 except IOError:
1019 1019 # skip nonexistent file
1020 1020 self.entries = set()
1021 1021 return
1022 1022
1023 1023 self.entries = set()
1024 1024 chunk = b''
1025 1025 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
1026 1026 chunk += c
1027 1027 try:
1028 1028 p = chunk.rindex(b'\n')
1029 1029 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1030 1030 chunk = chunk[p + 1 :]
1031 1031 except ValueError:
1032 1032 # substring '\n' not found, maybe the entry is bigger than the
1033 1033 # chunksize, so let's keep iterating
1034 1034 pass
1035 1035
1036 1036 if chunk:
1037 1037 msg = _(b"fncache does not ends with a newline")
1038 1038 if warn:
1039 1039 warn(msg + b'\n')
1040 1040 else:
1041 1041 raise error.Abort(
1042 1042 msg,
1043 1043 hint=_(
1044 1044 b"use 'hg debugrebuildfncache' to "
1045 1045 b"rebuild the fncache"
1046 1046 ),
1047 1047 )
1048 1048 self._checkentries(fp, warn)
1049 1049 fp.close()
1050 1050
1051 1051 def _checkentries(self, fp, warn):
1052 1052 """make sure there is no empty string in entries"""
1053 1053 if b'' in self.entries:
1054 1054 fp.seek(0)
1055 1055 for n, line in enumerate(fp):
1056 1056 if not line.rstrip(b'\n'):
1057 1057 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1058 1058 if warn:
1059 1059 warn(t + b'\n')
1060 1060 else:
1061 1061 raise error.Abort(t)
1062 1062
1063 1063 def write(self, tr):
1064 1064 if self._dirty:
1065 1065 assert self.is_loaded
1066 1066 self.entries = self.entries | self.addls
1067 1067 self.addls = set()
1068 1068 tr.addbackup(b'fncache')
1069 1069 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1070 1070 if self.entries:
1071 1071 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1072 1072 fp.close()
1073 1073 self._dirty = False
1074 1074 if self.addls:
1075 1075 # if we have just new entries, let's append them to the fncache
1076 1076 tr.addbackup(b'fncache')
1077 1077 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1078 1078 if self.addls:
1079 1079 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1080 1080 fp.close()
1081 1081 self.entries = None
1082 1082 self.addls = set()
1083 1083
1084 1084 def addignore(self, fn):
1085 1085 self._ignores.add(fn)
1086 1086
1087 1087 def add(self, fn):
1088 1088 if fn in self._ignores:
1089 1089 return
1090 1090 if not self.is_loaded:
1091 1091 self._load()
1092 1092 if fn not in self.entries:
1093 1093 self.addls.add(fn)
1094 1094
1095 1095 def remove(self, fn):
1096 1096 if not self.is_loaded:
1097 1097 self._load()
1098 1098 if fn in self.addls:
1099 1099 self.addls.remove(fn)
1100 1100 return
1101 1101 try:
1102 1102 self.entries.remove(fn)
1103 1103 self._dirty = True
1104 1104 except KeyError:
1105 1105 pass
1106 1106
1107 1107 def __contains__(self, fn):
1108 1108 if fn in self.addls:
1109 1109 return True
1110 1110 if not self.is_loaded:
1111 1111 self._load()
1112 1112 return fn in self.entries
1113 1113
1114 1114 def __iter__(self):
1115 1115 if not self.is_loaded:
1116 1116 self._load()
1117 1117 return iter(self.entries | self.addls)
1118 1118
1119 1119
1120 1120 class _fncachevfs(vfsmod.proxyvfs):
1121 1121 def __init__(self, vfs, fnc, encode):
1122 1122 vfsmod.proxyvfs.__init__(self, vfs)
1123 1123 self.fncache: fncache = fnc
1124 1124 self.encode = encode
1125 self.uses_dotencode = encode is _pathencode
1125 1126
1126 1127 def __call__(self, path, mode=b'r', *args, **kw):
1127 1128 encoded = self.encode(path)
1128 1129 if (
1129 1130 mode not in (b'r', b'rb')
1130 1131 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1131 1132 and is_revlog_file(path)
1132 1133 ):
1133 1134 # do not trigger a fncache load when adding a file that already is
1134 1135 # known to exist.
1135 1136 notload = not self.fncache.is_loaded and (
1136 1137 # if the file has size zero, it should be considered as missing.
1137 1138 # Such zero-size files are the result of truncation when a
1138 1139 # transaction is aborted.
1139 1140 self.vfs.exists(encoded)
1140 1141 and self.vfs.stat(encoded).st_size
1141 1142 )
1142 1143 if not notload:
1143 1144 self.fncache.add(path)
1144 1145 return self.vfs(encoded, mode, *args, **kw)
1145 1146
1146 1147 def join(self, path: Optional[bytes], *insidef: bytes) -> bytes:
1147 1148 insidef = (self.encode(f) for f in insidef)
1148 1149
1149 1150 if path:
1150 1151 return self.vfs.join(self.encode(path), *insidef)
1151 1152 else:
1152 1153 return self.vfs.join(path, *insidef)
1153 1154
1154 1155 def register_file(self, path):
1155 1156 """generic hook point to lets fncache steer its stew"""
1156 1157 if path.startswith(b'data/') or path.startswith(b'meta/'):
1157 1158 self.fncache.add(path)
1158 1159
1159 1160
1160 1161 class fncachestore(basicstore):
1161 1162 def __init__(self, path, vfstype, dotencode):
1162 1163 if dotencode:
1163 1164 encode = _pathencode
1164 1165 else:
1165 1166 encode = _plainhybridencode
1166 1167 self.encode = encode
1167 1168 vfs = vfstype(path + b'/store')
1168 1169 self.path = vfs.base
1169 1170 self.pathsep = self.path + b'/'
1170 1171 self.createmode = _calcmode(vfs)
1171 1172 vfs.createmode = self.createmode
1172 1173 self.rawvfs = vfs
1173 1174 fnc = fncache(vfs)
1174 1175 self.fncache = fnc
1175 1176 self.vfs = _fncachevfs(vfs, fnc, encode)
1176 1177 self.opener = self.vfs
1177 1178
1178 1179 def join(self, f):
1179 1180 return self.pathsep + self.encode(f)
1180 1181
1181 1182 def getsize(self, path):
1182 1183 return self.rawvfs.stat(path).st_size
1183 1184
1184 1185 def data_entries(
1185 1186 self, matcher=None, undecodable=None
1186 1187 ) -> Generator[BaseStoreEntry, None, None]:
1187 1188 # Note: all files in fncache should be revlog related, However the
1188 1189 # fncache might contains such file added by previous version of
1189 1190 # Mercurial.
1190 1191 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1191 1192 by_revlog = _gather_revlog(files)
1192 1193 for revlog, details in by_revlog:
1193 1194 if revlog.startswith(b'data/'):
1194 1195 rl_type = KIND_FILELOG
1195 1196 revlog_target_id = revlog.split(b'/', 1)[1]
1196 1197 elif revlog.startswith(b'meta/'):
1197 1198 rl_type = KIND_MANIFESTLOG
1198 1199 # drop the initial directory and the `00manifest` file part
1199 1200 tmp = revlog.split(b'/', 1)[1]
1200 1201 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1201 1202 else:
1202 1203 # unreachable
1203 1204 assert False, revlog
1204 1205 entry = RevlogStoreEntry(
1205 1206 path_prefix=revlog,
1206 1207 revlog_type=rl_type,
1207 1208 target_id=revlog_target_id,
1208 1209 details=details,
1209 1210 )
1210 1211 if _match_tracked_entry(entry, matcher):
1211 1212 yield entry
1212 1213
1213 1214 def copylist(self):
1214 1215 d = (
1215 1216 b'bookmarks',
1216 1217 b'narrowspec',
1217 1218 b'data',
1218 1219 b'meta',
1219 1220 b'dh',
1220 1221 b'fncache',
1221 1222 b'phaseroots',
1222 1223 b'obsstore',
1223 1224 b'00manifest.d',
1224 1225 b'00manifest.i',
1225 1226 b'00changelog.d',
1226 1227 b'00changelog.i',
1227 1228 b'requires',
1228 1229 )
1229 1230 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1230 1231
1231 1232 def write(self, tr):
1232 1233 self.fncache.write(tr)
1233 1234
1234 1235 def invalidatecaches(self):
1235 1236 self.fncache.entries = None
1236 1237 self.fncache.addls = set()
1237 1238
1238 1239 def markremoved(self, fn):
1239 1240 self.fncache.remove(fn)
1240 1241
1241 1242 def _exists(self, f):
1242 1243 ef = self.encode(f)
1243 1244 try:
1244 1245 self.getsize(ef)
1245 1246 return True
1246 1247 except FileNotFoundError:
1247 1248 return False
1248 1249
1249 1250 def __contains__(self, path):
1250 1251 '''Checks if the store contains path'''
1251 1252 path = b"/".join((b"data", path))
1252 1253 # check for files (exact match)
1253 1254 e = path + b'.i'
1254 1255 if e in self.fncache and self._exists(e):
1255 1256 return True
1256 1257 # now check for directories (prefix match)
1257 1258 if not path.endswith(b'/'):
1258 1259 path += b'/'
1259 1260 for e in self.fncache:
1260 1261 if e.startswith(path) and self._exists(e):
1261 1262 return True
1262 1263 return False
General Comments 0
You need to be logged in to leave comments. Login now