##// END OF EJS Templates
store: use the revlog type from revlog's constant instead of custom one...
marmoute -
r51573:e324329f default
parent child Browse files
Show More
@@ -1,1230 +1,1207
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial)
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from .revlogutils.constants import (
20 20 INDEX_HEADER,
21 KIND_CHANGELOG,
22 KIND_FILELOG,
23 KIND_MANIFESTLOG,
21 24 )
22 25 from . import (
23 26 changelog,
24 27 error,
25 28 filelog,
26 29 manifest,
27 30 policy,
28 31 pycompat,
29 32 revlog as revlogmod,
30 33 util,
31 34 vfs as vfsmod,
32 35 )
33 36 from .utils import hashutil
34 37
35 38 parsers = policy.importmod('parsers')
36 39 # how much bytes should be read from fncache in one read
37 40 # It is done to prevent loading large fncache files into memory
38 41 fncache_chunksize = 10 ** 6
39 42
40 43
41 44 def _match_tracked_entry(entry, matcher):
42 45 """parses a fncache entry and returns whether the entry is tracking a path
43 46 matched by matcher or not.
44 47
45 48 If matcher is None, returns True"""
46 49
47 50 if matcher is None:
48 51 return True
49 52 if entry.is_filelog:
50 53 return matcher(entry.target_id)
51 54 elif entry.is_manifestlog:
52 55 return matcher.visitdir(entry.target_id.rstrip(b'/'))
53 56 raise error.ProgrammingError(b"cannot process entry %r" % entry)
54 57
55 58
56 59 # This avoids a collision between a file named foo and a dir named
57 60 # foo.i or foo.d
58 61 def _encodedir(path):
59 62 """
60 63 >>> _encodedir(b'data/foo.i')
61 64 'data/foo.i'
62 65 >>> _encodedir(b'data/foo.i/bla.i')
63 66 'data/foo.i.hg/bla.i'
64 67 >>> _encodedir(b'data/foo.i.hg/bla.i')
65 68 'data/foo.i.hg.hg/bla.i'
66 69 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
67 70 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
68 71 """
69 72 return (
70 73 path.replace(b".hg/", b".hg.hg/")
71 74 .replace(b".i/", b".i.hg/")
72 75 .replace(b".d/", b".d.hg/")
73 76 )
74 77
75 78
76 79 encodedir = getattr(parsers, 'encodedir', _encodedir)
77 80
78 81
79 82 def decodedir(path):
80 83 """
81 84 >>> decodedir(b'data/foo.i')
82 85 'data/foo.i'
83 86 >>> decodedir(b'data/foo.i.hg/bla.i')
84 87 'data/foo.i/bla.i'
85 88 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
86 89 'data/foo.i.hg/bla.i'
87 90 """
88 91 if b".hg/" not in path:
89 92 return path
90 93 return (
91 94 path.replace(b".d.hg/", b".d/")
92 95 .replace(b".i.hg/", b".i/")
93 96 .replace(b".hg.hg/", b".hg/")
94 97 )
95 98
96 99
97 100 def _reserved():
98 101 """characters that are problematic for filesystems
99 102
100 103 * ascii escapes (0..31)
101 104 * ascii hi (126..255)
102 105 * windows specials
103 106
104 107 these characters will be escaped by encodefunctions
105 108 """
106 109 winreserved = [ord(x) for x in u'\\:*?"<>|']
107 110 for x in range(32):
108 111 yield x
109 112 for x in range(126, 256):
110 113 yield x
111 114 for x in winreserved:
112 115 yield x
113 116
114 117
115 118 def _buildencodefun():
116 119 """
117 120 >>> enc, dec = _buildencodefun()
118 121
119 122 >>> enc(b'nothing/special.txt')
120 123 'nothing/special.txt'
121 124 >>> dec(b'nothing/special.txt')
122 125 'nothing/special.txt'
123 126
124 127 >>> enc(b'HELLO')
125 128 '_h_e_l_l_o'
126 129 >>> dec(b'_h_e_l_l_o')
127 130 'HELLO'
128 131
129 132 >>> enc(b'hello:world?')
130 133 'hello~3aworld~3f'
131 134 >>> dec(b'hello~3aworld~3f')
132 135 'hello:world?'
133 136
134 137 >>> enc(b'the\\x07quick\\xADshot')
135 138 'the~07quick~adshot'
136 139 >>> dec(b'the~07quick~adshot')
137 140 'the\\x07quick\\xadshot'
138 141 """
139 142 e = b'_'
140 143 xchr = pycompat.bytechr
141 144 asciistr = list(map(xchr, range(127)))
142 145 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
143 146
144 147 cmap = {x: x for x in asciistr}
145 148 for x in _reserved():
146 149 cmap[xchr(x)] = b"~%02x" % x
147 150 for x in capitals + [ord(e)]:
148 151 cmap[xchr(x)] = e + xchr(x).lower()
149 152
150 153 dmap = {}
151 154 for k, v in cmap.items():
152 155 dmap[v] = k
153 156
154 157 def decode(s):
155 158 i = 0
156 159 while i < len(s):
157 160 for l in range(1, 4):
158 161 try:
159 162 yield dmap[s[i : i + l]]
160 163 i += l
161 164 break
162 165 except KeyError:
163 166 pass
164 167 else:
165 168 raise KeyError
166 169
167 170 return (
168 171 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
169 172 lambda s: b''.join(list(decode(s))),
170 173 )
171 174
172 175
173 176 _encodefname, _decodefname = _buildencodefun()
174 177
175 178
176 179 def encodefilename(s):
177 180 """
178 181 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
179 182 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
180 183 """
181 184 return _encodefname(encodedir(s))
182 185
183 186
184 187 def decodefilename(s):
185 188 """
186 189 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
187 190 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
188 191 """
189 192 return decodedir(_decodefname(s))
190 193
191 194
192 195 def _buildlowerencodefun():
193 196 """
194 197 >>> f = _buildlowerencodefun()
195 198 >>> f(b'nothing/special.txt')
196 199 'nothing/special.txt'
197 200 >>> f(b'HELLO')
198 201 'hello'
199 202 >>> f(b'hello:world?')
200 203 'hello~3aworld~3f'
201 204 >>> f(b'the\\x07quick\\xADshot')
202 205 'the~07quick~adshot'
203 206 """
204 207 xchr = pycompat.bytechr
205 208 cmap = {xchr(x): xchr(x) for x in range(127)}
206 209 for x in _reserved():
207 210 cmap[xchr(x)] = b"~%02x" % x
208 211 for x in range(ord(b"A"), ord(b"Z") + 1):
209 212 cmap[xchr(x)] = xchr(x).lower()
210 213
211 214 def lowerencode(s):
212 215 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
213 216
214 217 return lowerencode
215 218
216 219
217 220 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
218 221
219 222 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
220 223 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
221 224 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
222 225
223 226
224 227 def _auxencode(path, dotencode):
225 228 """
226 229 Encodes filenames containing names reserved by Windows or which end in
227 230 period or space. Does not touch other single reserved characters c.
228 231 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
229 232 Additionally encodes space or period at the beginning, if dotencode is
230 233 True. Parameter path is assumed to be all lowercase.
231 234 A segment only needs encoding if a reserved name appears as a
232 235 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
233 236 doesn't need encoding.
234 237
235 238 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
236 239 >>> _auxencode(s.split(b'/'), True)
237 240 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
238 241 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
239 242 >>> _auxencode(s.split(b'/'), False)
240 243 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
241 244 >>> _auxencode([b'foo. '], True)
242 245 ['foo.~20']
243 246 >>> _auxencode([b' .foo'], True)
244 247 ['~20.foo']
245 248 """
246 249 for i, n in enumerate(path):
247 250 if not n:
248 251 continue
249 252 if dotencode and n[0] in b'. ':
250 253 n = b"~%02x" % ord(n[0:1]) + n[1:]
251 254 path[i] = n
252 255 else:
253 256 l = n.find(b'.')
254 257 if l == -1:
255 258 l = len(n)
256 259 if (l == 3 and n[:3] in _winres3) or (
257 260 l == 4
258 261 and n[3:4] <= b'9'
259 262 and n[3:4] >= b'1'
260 263 and n[:3] in _winres4
261 264 ):
262 265 # encode third letter ('aux' -> 'au~78')
263 266 ec = b"~%02x" % ord(n[2:3])
264 267 n = n[0:2] + ec + n[3:]
265 268 path[i] = n
266 269 if n[-1] in b'. ':
267 270 # encode last period or space ('foo...' -> 'foo..~2e')
268 271 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
269 272 return path
270 273
271 274
272 275 _maxstorepathlen = 120
273 276 _dirprefixlen = 8
274 277 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
275 278
276 279
277 280 def _hashencode(path, dotencode):
278 281 digest = hex(hashutil.sha1(path).digest())
279 282 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
280 283 parts = _auxencode(le, dotencode)
281 284 basename = parts[-1]
282 285 _root, ext = os.path.splitext(basename)
283 286 sdirs = []
284 287 sdirslen = 0
285 288 for p in parts[:-1]:
286 289 d = p[:_dirprefixlen]
287 290 if d[-1] in b'. ':
288 291 # Windows can't access dirs ending in period or space
289 292 d = d[:-1] + b'_'
290 293 if sdirslen == 0:
291 294 t = len(d)
292 295 else:
293 296 t = sdirslen + 1 + len(d)
294 297 if t > _maxshortdirslen:
295 298 break
296 299 sdirs.append(d)
297 300 sdirslen = t
298 301 dirs = b'/'.join(sdirs)
299 302 if len(dirs) > 0:
300 303 dirs += b'/'
301 304 res = b'dh/' + dirs + digest + ext
302 305 spaceleft = _maxstorepathlen - len(res)
303 306 if spaceleft > 0:
304 307 filler = basename[:spaceleft]
305 308 res = b'dh/' + dirs + filler + digest + ext
306 309 return res
307 310
308 311
309 312 def _hybridencode(path, dotencode):
310 313 """encodes path with a length limit
311 314
312 315 Encodes all paths that begin with 'data/', according to the following.
313 316
314 317 Default encoding (reversible):
315 318
316 319 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
317 320 characters are encoded as '~xx', where xx is the two digit hex code
318 321 of the character (see encodefilename).
319 322 Relevant path components consisting of Windows reserved filenames are
320 323 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
321 324
322 325 Hashed encoding (not reversible):
323 326
324 327 If the default-encoded path is longer than _maxstorepathlen, a
325 328 non-reversible hybrid hashing of the path is done instead.
326 329 This encoding uses up to _dirprefixlen characters of all directory
327 330 levels of the lowerencoded path, but not more levels than can fit into
328 331 _maxshortdirslen.
329 332 Then follows the filler followed by the sha digest of the full path.
330 333 The filler is the beginning of the basename of the lowerencoded path
331 334 (the basename is everything after the last path separator). The filler
332 335 is as long as possible, filling in characters from the basename until
333 336 the encoded path has _maxstorepathlen characters (or all chars of the
334 337 basename have been taken).
335 338 The extension (e.g. '.i' or '.d') is preserved.
336 339
337 340 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
338 341 encoding was used.
339 342 """
340 343 path = encodedir(path)
341 344 ef = _encodefname(path).split(b'/')
342 345 res = b'/'.join(_auxencode(ef, dotencode))
343 346 if len(res) > _maxstorepathlen:
344 347 res = _hashencode(path, dotencode)
345 348 return res
346 349
347 350
348 351 def _pathencode(path):
349 352 de = encodedir(path)
350 353 if len(path) > _maxstorepathlen:
351 354 return _hashencode(de, True)
352 355 ef = _encodefname(de).split(b'/')
353 356 res = b'/'.join(_auxencode(ef, True))
354 357 if len(res) > _maxstorepathlen:
355 358 return _hashencode(de, True)
356 359 return res
357 360
358 361
359 362 _pathencode = getattr(parsers, 'pathencode', _pathencode)
360 363
361 364
362 365 def _plainhybridencode(f):
363 366 return _hybridencode(f, False)
364 367
365 368
366 369 def _calcmode(vfs):
367 370 try:
368 371 # files in .hg/ will be created using this mode
369 372 mode = vfs.stat().st_mode
370 373 # avoid some useless chmods
371 374 if (0o777 & ~util.umask) == (0o777 & mode):
372 375 mode = None
373 376 except OSError:
374 377 mode = None
375 378 return mode
376 379
377 380
378 381 _data = [
379 382 b'bookmarks',
380 383 b'narrowspec',
381 384 b'data',
382 385 b'meta',
383 386 b'00manifest.d',
384 387 b'00manifest.i',
385 388 b'00changelog.d',
386 389 b'00changelog.i',
387 390 b'phaseroots',
388 391 b'obsstore',
389 392 b'requires',
390 393 ]
391 394
392 395 REVLOG_FILES_EXT = (
393 396 b'.i',
394 397 b'.idx',
395 398 b'.d',
396 399 b'.dat',
397 400 b'.n',
398 401 b'.nd',
399 402 b'.sda',
400 403 )
401 404 # file extension that also use a `-SOMELONGIDHASH.ext` form
402 405 REVLOG_FILES_LONG_EXT = (
403 406 b'.nd',
404 407 b'.idx',
405 408 b'.dat',
406 409 b'.sda',
407 410 )
408 411 # files that are "volatile" and might change between listing and streaming
409 412 #
410 413 # note: the ".nd" file are nodemap data and won't "change" but they might be
411 414 # deleted.
412 415 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
413 416
414 417 # some exception to the above matching
415 418 #
416 419 # XXX This is currently not in use because of issue6542
417 420 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
418 421
419 422
420 423 def is_revlog(f, kind, st):
421 424 if kind != stat.S_IFREG:
422 425 return False
423 426 if f.endswith(REVLOG_FILES_EXT):
424 427 return True
425 428 return False
426 429
427 430
428 431 def is_revlog_file(f):
429 432 if f.endswith(REVLOG_FILES_EXT):
430 433 return True
431 434 return False
432 435
433 436
434 # the file is part of changelog data
435 FILEFLAGS_CHANGELOG = 1 << 13
436 # the file is part of manifest data
437 FILEFLAGS_MANIFESTLOG = 1 << 12
438 # the file is part of filelog data
439 FILEFLAGS_FILELOG = 1 << 11
440 # file that are not directly part of a revlog
441 FILEFLAGS_OTHER = 1 << 10
442
443 # the main entry point for a revlog
444 FILEFLAGS_REVLOG_MAIN = 1 << 1
445 # a secondary file for a revlog
446 FILEFLAGS_REVLOG_OTHER = 1 << 0
447
448 # files that are "volatile" and might change between listing and streaming
449 FILEFLAGS_VOLATILE = 1 << 20
450
451 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_OTHER = FILEFLAGS_OTHER
458
459
460 437 @attr.s(slots=True)
461 438 class StoreFile:
462 439 """a file matching a store entry"""
463 440
464 441 unencoded_path = attr.ib()
465 442 _file_size = attr.ib(default=None)
466 443 is_volatile = attr.ib(default=False)
467 444
468 445 def file_size(self, vfs):
469 446 if self._file_size is None:
470 447 if vfs is None:
471 448 msg = b"calling vfs-less file_size without prior call: %s"
472 449 msg %= self.unencoded_path
473 450 raise error.ProgrammingError(msg)
474 451 try:
475 452 self._file_size = vfs.stat(self.unencoded_path).st_size
476 453 except FileNotFoundError:
477 454 self._file_size = 0
478 455 return self._file_size
479 456
480 457 def get_stream(self, vfs, copies):
481 458 """return data "stream" information for this file
482 459
483 460 (unencoded_file_path, content_iterator, content_size)
484 461 """
485 462 size = self.file_size(None)
486 463
487 464 def get_stream():
488 465 actual_path = copies[vfs.join(self.unencoded_path)]
489 466 with open(actual_path, 'rb') as fp:
490 467 yield None # ready to stream
491 468 if size <= 65536:
492 469 yield fp.read(size)
493 470 else:
494 471 yield from util.filechunkiter(fp, limit=size)
495 472
496 473 s = get_stream()
497 474 next(s)
498 475 return (self.unencoded_path, s, size)
499 476
500 477
501 478 @attr.s(slots=True, init=False)
502 479 class BaseStoreEntry:
503 480 """An entry in the store
504 481
505 482 This is returned by `store.walk` and represent some data in the store."""
506 483
507 484 def files(self) -> List[StoreFile]:
508 485 raise NotImplementedError
509 486
510 487 def get_streams(
511 488 self,
512 489 repo=None,
513 490 vfs=None,
514 491 copies=None,
515 492 max_changeset=None,
516 493 ):
517 494 """return a list of data stream associated to files for this entry
518 495
519 496 return [(unencoded_file_path, content_iterator, content_size), …]
520 497 """
521 498 assert vfs is not None
522 499 return [f.get_stream(vfs, copies) for f in self.files()]
523 500
524 501
525 502 @attr.s(slots=True, init=False)
526 503 class SimpleStoreEntry(BaseStoreEntry):
527 504 """A generic entry in the store"""
528 505
529 506 is_revlog = False
530 507
531 508 _entry_path = attr.ib()
532 509 _is_volatile = attr.ib(default=False)
533 510 _file_size = attr.ib(default=None)
534 511 _files = attr.ib(default=None)
535 512
536 513 def __init__(
537 514 self,
538 515 entry_path,
539 516 is_volatile=False,
540 517 file_size=None,
541 518 ):
542 519 super().__init__()
543 520 self._entry_path = entry_path
544 521 self._is_volatile = is_volatile
545 522 self._file_size = file_size
546 523 self._files = None
547 524
548 525 def files(self) -> List[StoreFile]:
549 526 if self._files is None:
550 527 self._files = [
551 528 StoreFile(
552 529 unencoded_path=self._entry_path,
553 530 file_size=self._file_size,
554 531 is_volatile=self._is_volatile,
555 532 )
556 533 ]
557 534 return self._files
558 535
559 536
560 537 @attr.s(slots=True, init=False)
561 538 class RevlogStoreEntry(BaseStoreEntry):
562 539 """A revlog entry in the store"""
563 540
564 541 is_revlog = True
565 542
566 543 revlog_type = attr.ib(default=None)
567 544 target_id = attr.ib(default=None)
568 545 _path_prefix = attr.ib(default=None)
569 546 _details = attr.ib(default=None)
570 547 _files = attr.ib(default=None)
571 548
572 549 def __init__(
573 550 self,
574 551 revlog_type,
575 552 path_prefix,
576 553 target_id,
577 554 details,
578 555 ):
579 556 super().__init__()
580 557 self.revlog_type = revlog_type
581 558 self.target_id = target_id
582 559 self._path_prefix = path_prefix
583 560 assert b'.i' in details, (path_prefix, details)
584 561 self._details = details
585 562 self._files = None
586 563
587 564 @property
588 565 def is_changelog(self):
589 return self.revlog_type & FILEFLAGS_CHANGELOG
566 return self.revlog_type == KIND_CHANGELOG
590 567
591 568 @property
592 569 def is_manifestlog(self):
593 return self.revlog_type & FILEFLAGS_MANIFESTLOG
570 return self.revlog_type == KIND_MANIFESTLOG
594 571
595 572 @property
596 573 def is_filelog(self):
597 return self.revlog_type & FILEFLAGS_FILELOG
574 return self.revlog_type == KIND_FILELOG
598 575
599 576 def main_file_path(self):
600 577 """unencoded path of the main revlog file"""
601 578 return self._path_prefix + b'.i'
602 579
603 580 def files(self) -> List[StoreFile]:
604 581 if self._files is None:
605 582 self._files = []
606 583 for ext in sorted(self._details, key=_ext_key):
607 584 path = self._path_prefix + ext
608 585 file_size = self._details[ext]
609 586 # files that are "volatile" and might change between
610 587 # listing and streaming
611 588 #
612 589 # note: the ".nd" file are nodemap data and won't "change"
613 590 # but they might be deleted.
614 591 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
615 592 f = StoreFile(path, file_size, volatile)
616 593 self._files.append(f)
617 594 return self._files
618 595
619 596 def get_streams(
620 597 self,
621 598 repo=None,
622 599 vfs=None,
623 600 copies=None,
624 601 max_changeset=None,
625 602 ):
626 603 if (
627 604 repo is None
628 605 or max_changeset is None
629 606 # This use revlog-v2, ignore for now
630 607 or any(k.endswith(b'.idx') for k in self._details.keys())
631 608 # This is not inline, no race expected
632 609 or b'.d' in self._details
633 610 ):
634 611 return super().get_streams(
635 612 repo=repo,
636 613 vfs=vfs,
637 614 copies=copies,
638 615 max_changeset=max_changeset,
639 616 )
640 617
641 618 name_to_size = {}
642 619 for f in self.files():
643 620 name_to_size[f.unencoded_path] = f.file_size(None)
644 621
645 622 stream = [
646 623 f.get_stream(vfs, copies)
647 624 for f in self.files()
648 625 if not f.unencoded_path.endswith(b'.i')
649 626 ]
650 627
651 628 index_path = self._path_prefix + b'.i'
652 629
653 630 index_file = None
654 631 try:
655 632 index_file = vfs(index_path)
656 633 header = index_file.read(INDEX_HEADER.size)
657 634 if revlogmod.revlog.is_inline_index(header):
658 635 size = name_to_size[index_path]
659 636
660 637 # no split underneath, just return the stream
661 638 def get_stream():
662 639 fp = index_file
663 640 try:
664 641 fp.seek(0)
665 642 yield None
666 643 if size <= 65536:
667 644 yield fp.read(size)
668 645 else:
669 646 yield from util.filechunkiter(fp, limit=size)
670 647 finally:
671 648 fp.close()
672 649
673 650 s = get_stream()
674 651 next(s)
675 652 index_file = None
676 653 stream.append((index_path, s, size))
677 654 else:
678 655 rl = self.get_revlog_instance(repo).get_revlog()
679 656 rl_stream = rl.get_streams(max_changeset, force_inline=True)
680 657 for name, s, size in rl_stream:
681 658 if name_to_size.get(name, 0) != size:
682 659 msg = _(b"expected %d bytes but %d provided for %s")
683 660 msg %= name_to_size.get(name, 0), size, name
684 661 raise error.Abort(msg)
685 662 stream.extend(rl_stream)
686 663 finally:
687 664 if index_file is not None:
688 665 index_file.close()
689 666
690 667 files = self.files()
691 668 assert len(stream) == len(files), (
692 669 stream,
693 670 files,
694 671 self._path_prefix,
695 672 self.target_id,
696 673 )
697 674 return stream
698 675
699 676 def get_revlog_instance(self, repo):
700 677 """Obtain a revlog instance from this store entry
701 678
702 679 An instance of the appropriate class is returned.
703 680 """
704 681 if self.is_changelog:
705 682 return changelog.changelog(repo.svfs)
706 683 elif self.is_manifestlog:
707 684 mandir = self.target_id
708 685 return manifest.manifestrevlog(
709 686 repo.nodeconstants, repo.svfs, tree=mandir
710 687 )
711 688 else:
712 689 return filelog.filelog(repo.svfs, self.target_id)
713 690
714 691
715 692 def _gather_revlog(files_data):
716 693 """group files per revlog prefix
717 694
718 695 The returns a two level nested dict. The top level key is the revlog prefix
719 696 without extension, the second level is all the file "suffix" that were
720 697 seen for this revlog and arbitrary file data as value.
721 698 """
722 699 revlogs = collections.defaultdict(dict)
723 700 for u, value in files_data:
724 701 name, ext = _split_revlog_ext(u)
725 702 revlogs[name][ext] = value
726 703 return sorted(revlogs.items())
727 704
728 705
729 706 def _split_revlog_ext(filename):
730 707 """split the revlog file prefix from the variable extension"""
731 708 if filename.endswith(REVLOG_FILES_LONG_EXT):
732 709 char = b'-'
733 710 else:
734 711 char = b'.'
735 712 idx = filename.rfind(char)
736 713 return filename[:idx], filename[idx:]
737 714
738 715
739 716 def _ext_key(ext):
740 717 """a key to order revlog suffix
741 718
742 719 important to issue .i after other entry."""
743 720 # the only important part of this order is to keep the `.i` last.
744 721 if ext.endswith(b'.n'):
745 722 return (0, ext)
746 723 elif ext.endswith(b'.nd'):
747 724 return (10, ext)
748 725 elif ext.endswith(b'.d'):
749 726 return (20, ext)
750 727 elif ext.endswith(b'.i'):
751 728 return (50, ext)
752 729 else:
753 730 return (40, ext)
754 731
755 732
756 733 class basicstore:
757 734 '''base class for local repository stores'''
758 735
759 736 def __init__(self, path, vfstype):
760 737 vfs = vfstype(path)
761 738 self.path = vfs.base
762 739 self.createmode = _calcmode(vfs)
763 740 vfs.createmode = self.createmode
764 741 self.rawvfs = vfs
765 742 self.vfs = vfsmod.filtervfs(vfs, encodedir)
766 743 self.opener = self.vfs
767 744
768 745 def join(self, f):
769 746 return self.path + b'/' + encodedir(f)
770 747
771 748 def _walk(self, relpath, recurse, undecodable=None):
772 749 '''yields (revlog_type, unencoded, size)'''
773 750 path = self.path
774 751 if relpath:
775 752 path += b'/' + relpath
776 753 striplen = len(self.path) + 1
777 754 l = []
778 755 if self.rawvfs.isdir(path):
779 756 visit = [path]
780 757 readdir = self.rawvfs.readdir
781 758 while visit:
782 759 p = visit.pop()
783 760 for f, kind, st in readdir(p, stat=True):
784 761 fp = p + b'/' + f
785 762 if is_revlog(f, kind, st):
786 763 n = util.pconvert(fp[striplen:])
787 764 l.append((decodedir(n), st.st_size))
788 765 elif kind == stat.S_IFDIR and recurse:
789 766 visit.append(fp)
790 767
791 768 l.sort()
792 769 return l
793 770
794 771 def changelog(self, trypending, concurrencychecker=None):
795 772 return changelog.changelog(
796 773 self.vfs,
797 774 trypending=trypending,
798 775 concurrencychecker=concurrencychecker,
799 776 )
800 777
801 778 def manifestlog(self, repo, storenarrowmatch):
802 779 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
803 780 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
804 781
805 782 def data_entries(
806 783 self, matcher=None, undecodable=None
807 784 ) -> Generator[BaseStoreEntry, None, None]:
808 785 """Like walk, but excluding the changelog and root manifest.
809 786
810 787 When [undecodable] is None, revlogs names that can't be
811 788 decoded cause an exception. When it is provided, it should
812 789 be a list and the filenames that can't be decoded are added
813 790 to it instead. This is very rarely needed."""
814 791 dirs = [
815 (b'data', FILEFLAGS_FILELOG, False),
816 (b'meta', FILEFLAGS_MANIFESTLOG, True),
792 (b'data', KIND_FILELOG, False),
793 (b'meta', KIND_MANIFESTLOG, True),
817 794 ]
818 795 for base_dir, rl_type, strip_filename in dirs:
819 796 files = self._walk(base_dir, True, undecodable=undecodable)
820 797 for revlog, details in _gather_revlog(files):
821 798 revlog_target_id = revlog.split(b'/', 1)[1]
822 799 if strip_filename and b'/' in revlog:
823 800 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
824 801 revlog_target_id += b'/'
825 802 yield RevlogStoreEntry(
826 803 path_prefix=revlog,
827 804 revlog_type=rl_type,
828 805 target_id=revlog_target_id,
829 806 details=details,
830 807 )
831 808
832 809 def top_entries(
833 810 self, phase=False, obsolescence=False
834 811 ) -> Generator[BaseStoreEntry, None, None]:
835 812 if phase and self.vfs.exists(b'phaseroots'):
836 813 yield SimpleStoreEntry(
837 814 entry_path=b'phaseroots',
838 815 is_volatile=True,
839 816 )
840 817
841 818 if obsolescence and self.vfs.exists(b'obsstore'):
842 819 # XXX if we had the file size it could be non-volatile
843 820 yield SimpleStoreEntry(
844 821 entry_path=b'obsstore',
845 822 is_volatile=True,
846 823 )
847 824
848 825 files = reversed(self._walk(b'', False))
849 826
850 827 changelogs = collections.defaultdict(dict)
851 828 manifestlogs = collections.defaultdict(dict)
852 829
853 830 for u, s in files:
854 831 if u.startswith(b'00changelog'):
855 832 name, ext = _split_revlog_ext(u)
856 833 changelogs[name][ext] = s
857 834 elif u.startswith(b'00manifest'):
858 835 name, ext = _split_revlog_ext(u)
859 836 manifestlogs[name][ext] = s
860 837 else:
861 838 yield SimpleStoreEntry(
862 839 entry_path=u,
863 840 is_volatile=False,
864 841 file_size=s,
865 842 )
866 843 # yield manifest before changelog
867 844 top_rl = [
868 (manifestlogs, FILEFLAGS_MANIFESTLOG),
869 (changelogs, FILEFLAGS_CHANGELOG),
845 (manifestlogs, KIND_MANIFESTLOG),
846 (changelogs, KIND_CHANGELOG),
870 847 ]
871 848 assert len(manifestlogs) <= 1
872 849 assert len(changelogs) <= 1
873 850 for data, revlog_type in top_rl:
874 851 for revlog, details in sorted(data.items()):
875 852 yield RevlogStoreEntry(
876 853 path_prefix=revlog,
877 854 revlog_type=revlog_type,
878 855 target_id=b'',
879 856 details=details,
880 857 )
881 858
882 859 def walk(
883 860 self, matcher=None, phase=False, obsolescence=False
884 861 ) -> Generator[BaseStoreEntry, None, None]:
885 862 """return files related to data storage (ie: revlogs)
886 863
887 864 yields instance from BaseStoreEntry subclasses
888 865
889 866 if a matcher is passed, storage files of only those tracked paths
890 867 are passed with matches the matcher
891 868 """
892 869 # yield data files first
893 870 for x in self.data_entries(matcher):
894 871 yield x
895 872 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
896 873 yield x
897 874
898 875 def copylist(self):
899 876 return _data
900 877
901 878 def write(self, tr):
902 879 pass
903 880
904 881 def invalidatecaches(self):
905 882 pass
906 883
907 884 def markremoved(self, fn):
908 885 pass
909 886
910 887 def __contains__(self, path):
911 888 '''Checks if the store contains path'''
912 889 path = b"/".join((b"data", path))
913 890 # file?
914 891 if self.vfs.exists(path + b".i"):
915 892 return True
916 893 # dir?
917 894 if not path.endswith(b"/"):
918 895 path = path + b"/"
919 896 return self.vfs.exists(path)
920 897
921 898
922 899 class encodedstore(basicstore):
923 900 def __init__(self, path, vfstype):
924 901 vfs = vfstype(path + b'/store')
925 902 self.path = vfs.base
926 903 self.createmode = _calcmode(vfs)
927 904 vfs.createmode = self.createmode
928 905 self.rawvfs = vfs
929 906 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
930 907 self.opener = self.vfs
931 908
932 909 def _walk(self, relpath, recurse, undecodable=None):
933 910 old = super()._walk(relpath, recurse)
934 911 new = []
935 912 for f1, value in old:
936 913 try:
937 914 f2 = decodefilename(f1)
938 915 except KeyError:
939 916 if undecodable is None:
940 917 msg = _(b'undecodable revlog name %s') % f1
941 918 raise error.StorageError(msg)
942 919 else:
943 920 undecodable.append(f1)
944 921 continue
945 922 new.append((f2, value))
946 923 return new
947 924
948 925 def data_entries(
949 926 self, matcher=None, undecodable=None
950 927 ) -> Generator[BaseStoreEntry, None, None]:
951 928 entries = super(encodedstore, self).data_entries(
952 929 undecodable=undecodable
953 930 )
954 931 for entry in entries:
955 932 if _match_tracked_entry(entry, matcher):
956 933 yield entry
957 934
958 935 def join(self, f):
959 936 return self.path + b'/' + encodefilename(f)
960 937
961 938 def copylist(self):
962 939 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
963 940
964 941
965 942 class fncache:
966 943 # the filename used to be partially encoded
967 944 # hence the encodedir/decodedir dance
968 945 def __init__(self, vfs):
969 946 self.vfs = vfs
970 947 self._ignores = set()
971 948 self.entries = None
972 949 self._dirty = False
973 950 # set of new additions to fncache
974 951 self.addls = set()
975 952
976 953 def ensureloaded(self, warn=None):
977 954 """read the fncache file if not already read.
978 955
979 956 If the file on disk is corrupted, raise. If warn is provided,
980 957 warn and keep going instead."""
981 958 if self.entries is None:
982 959 self._load(warn)
983 960
984 961 def _load(self, warn=None):
985 962 '''fill the entries from the fncache file'''
986 963 self._dirty = False
987 964 try:
988 965 fp = self.vfs(b'fncache', mode=b'rb')
989 966 except IOError:
990 967 # skip nonexistent file
991 968 self.entries = set()
992 969 return
993 970
994 971 self.entries = set()
995 972 chunk = b''
996 973 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
997 974 chunk += c
998 975 try:
999 976 p = chunk.rindex(b'\n')
1000 977 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
1001 978 chunk = chunk[p + 1 :]
1002 979 except ValueError:
1003 980 # substring '\n' not found, maybe the entry is bigger than the
1004 981 # chunksize, so let's keep iterating
1005 982 pass
1006 983
1007 984 if chunk:
1008 985 msg = _(b"fncache does not ends with a newline")
1009 986 if warn:
1010 987 warn(msg + b'\n')
1011 988 else:
1012 989 raise error.Abort(
1013 990 msg,
1014 991 hint=_(
1015 992 b"use 'hg debugrebuildfncache' to "
1016 993 b"rebuild the fncache"
1017 994 ),
1018 995 )
1019 996 self._checkentries(fp, warn)
1020 997 fp.close()
1021 998
1022 999 def _checkentries(self, fp, warn):
1023 1000 """make sure there is no empty string in entries"""
1024 1001 if b'' in self.entries:
1025 1002 fp.seek(0)
1026 1003 for n, line in enumerate(fp):
1027 1004 if not line.rstrip(b'\n'):
1028 1005 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1029 1006 if warn:
1030 1007 warn(t + b'\n')
1031 1008 else:
1032 1009 raise error.Abort(t)
1033 1010
1034 1011 def write(self, tr):
1035 1012 if self._dirty:
1036 1013 assert self.entries is not None
1037 1014 self.entries = self.entries | self.addls
1038 1015 self.addls = set()
1039 1016 tr.addbackup(b'fncache')
1040 1017 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1041 1018 if self.entries:
1042 1019 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1043 1020 fp.close()
1044 1021 self._dirty = False
1045 1022 if self.addls:
1046 1023 # if we have just new entries, let's append them to the fncache
1047 1024 tr.addbackup(b'fncache')
1048 1025 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1049 1026 if self.addls:
1050 1027 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1051 1028 fp.close()
1052 1029 self.entries = None
1053 1030 self.addls = set()
1054 1031
1055 1032 def addignore(self, fn):
1056 1033 self._ignores.add(fn)
1057 1034
1058 1035 def add(self, fn):
1059 1036 if fn in self._ignores:
1060 1037 return
1061 1038 if self.entries is None:
1062 1039 self._load()
1063 1040 if fn not in self.entries:
1064 1041 self.addls.add(fn)
1065 1042
1066 1043 def remove(self, fn):
1067 1044 if self.entries is None:
1068 1045 self._load()
1069 1046 if fn in self.addls:
1070 1047 self.addls.remove(fn)
1071 1048 return
1072 1049 try:
1073 1050 self.entries.remove(fn)
1074 1051 self._dirty = True
1075 1052 except KeyError:
1076 1053 pass
1077 1054
1078 1055 def __contains__(self, fn):
1079 1056 if fn in self.addls:
1080 1057 return True
1081 1058 if self.entries is None:
1082 1059 self._load()
1083 1060 return fn in self.entries
1084 1061
1085 1062 def __iter__(self):
1086 1063 if self.entries is None:
1087 1064 self._load()
1088 1065 return iter(self.entries | self.addls)
1089 1066
1090 1067
1091 1068 class _fncachevfs(vfsmod.proxyvfs):
1092 1069 def __init__(self, vfs, fnc, encode):
1093 1070 vfsmod.proxyvfs.__init__(self, vfs)
1094 1071 self.fncache = fnc
1095 1072 self.encode = encode
1096 1073
1097 1074 def __call__(self, path, mode=b'r', *args, **kw):
1098 1075 encoded = self.encode(path)
1099 1076 if (
1100 1077 mode not in (b'r', b'rb')
1101 1078 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1102 1079 and is_revlog_file(path)
1103 1080 ):
1104 1081 # do not trigger a fncache load when adding a file that already is
1105 1082 # known to exist.
1106 1083 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1107 1084 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1108 1085 # when appending to an existing file, if the file has size zero,
1109 1086 # it should be considered as missing. Such zero-size files are
1110 1087 # the result of truncation when a transaction is aborted.
1111 1088 notload = False
1112 1089 if not notload:
1113 1090 self.fncache.add(path)
1114 1091 return self.vfs(encoded, mode, *args, **kw)
1115 1092
1116 1093 def join(self, path):
1117 1094 if path:
1118 1095 return self.vfs.join(self.encode(path))
1119 1096 else:
1120 1097 return self.vfs.join(path)
1121 1098
1122 1099 def register_file(self, path):
1123 1100 """generic hook point to lets fncache steer its stew"""
1124 1101 if path.startswith(b'data/') or path.startswith(b'meta/'):
1125 1102 self.fncache.add(path)
1126 1103
1127 1104
1128 1105 class fncachestore(basicstore):
1129 1106 def __init__(self, path, vfstype, dotencode):
1130 1107 if dotencode:
1131 1108 encode = _pathencode
1132 1109 else:
1133 1110 encode = _plainhybridencode
1134 1111 self.encode = encode
1135 1112 vfs = vfstype(path + b'/store')
1136 1113 self.path = vfs.base
1137 1114 self.pathsep = self.path + b'/'
1138 1115 self.createmode = _calcmode(vfs)
1139 1116 vfs.createmode = self.createmode
1140 1117 self.rawvfs = vfs
1141 1118 fnc = fncache(vfs)
1142 1119 self.fncache = fnc
1143 1120 self.vfs = _fncachevfs(vfs, fnc, encode)
1144 1121 self.opener = self.vfs
1145 1122
1146 1123 def join(self, f):
1147 1124 return self.pathsep + self.encode(f)
1148 1125
1149 1126 def getsize(self, path):
1150 1127 return self.rawvfs.stat(path).st_size
1151 1128
1152 1129 def data_entries(
1153 1130 self, matcher=None, undecodable=None
1154 1131 ) -> Generator[BaseStoreEntry, None, None]:
1155 1132 # Note: all files in fncache should be revlog related, However the
1156 1133 # fncache might contains such file added by previous version of
1157 1134 # Mercurial.
1158 1135 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1159 1136 by_revlog = _gather_revlog(files)
1160 1137 for revlog, details in by_revlog:
1161 1138 if revlog.startswith(b'data/'):
1162 rl_type = FILEFLAGS_FILELOG
1139 rl_type = KIND_FILELOG
1163 1140 revlog_target_id = revlog.split(b'/', 1)[1]
1164 1141 elif revlog.startswith(b'meta/'):
1165 rl_type = FILEFLAGS_MANIFESTLOG
1142 rl_type = KIND_MANIFESTLOG
1166 1143 # drop the initial directory and the `00manifest` file part
1167 1144 tmp = revlog.split(b'/', 1)[1]
1168 1145 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1169 1146 else:
1170 1147 # unreachable
1171 1148 assert False, revlog
1172 1149 entry = RevlogStoreEntry(
1173 1150 path_prefix=revlog,
1174 1151 revlog_type=rl_type,
1175 1152 target_id=revlog_target_id,
1176 1153 details=details,
1177 1154 )
1178 1155 if _match_tracked_entry(entry, matcher):
1179 1156 yield entry
1180 1157
1181 1158 def copylist(self):
1182 1159 d = (
1183 1160 b'bookmarks',
1184 1161 b'narrowspec',
1185 1162 b'data',
1186 1163 b'meta',
1187 1164 b'dh',
1188 1165 b'fncache',
1189 1166 b'phaseroots',
1190 1167 b'obsstore',
1191 1168 b'00manifest.d',
1192 1169 b'00manifest.i',
1193 1170 b'00changelog.d',
1194 1171 b'00changelog.i',
1195 1172 b'requires',
1196 1173 )
1197 1174 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1198 1175
1199 1176 def write(self, tr):
1200 1177 self.fncache.write(tr)
1201 1178
1202 1179 def invalidatecaches(self):
1203 1180 self.fncache.entries = None
1204 1181 self.fncache.addls = set()
1205 1182
1206 1183 def markremoved(self, fn):
1207 1184 self.fncache.remove(fn)
1208 1185
1209 1186 def _exists(self, f):
1210 1187 ef = self.encode(f)
1211 1188 try:
1212 1189 self.getsize(ef)
1213 1190 return True
1214 1191 except FileNotFoundError:
1215 1192 return False
1216 1193
1217 1194 def __contains__(self, path):
1218 1195 '''Checks if the store contains path'''
1219 1196 path = b"/".join((b"data", path))
1220 1197 # check for files (exact match)
1221 1198 e = path + b'.i'
1222 1199 if e in self.fncache and self._exists(e):
1223 1200 return True
1224 1201 # now check for directories (prefix match)
1225 1202 if not path.endswith(b'/'):
1226 1203 path += b'/'
1227 1204 for e in self.fncache:
1228 1205 if e.startswith(path) and self._exists(e):
1229 1206 return True
1230 1207 return False
General Comments 0
You need to be logged in to leave comments. Login now