##// END OF EJS Templates
store: add a `get_revlog_instance` method on revlog entries...
marmoute -
r51520:e1ee6910 default
parent child Browse files
Show More
@@ -1,1098 +1,1114 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 filelog,
22 23 manifest,
23 24 policy,
24 25 pycompat,
25 26 util,
26 27 vfs as vfsmod,
27 28 )
28 29 from .utils import hashutil
29 30
30 31 parsers = policy.importmod('parsers')
31 32 # how much bytes should be read from fncache in one read
32 33 # It is done to prevent loading large fncache files into memory
33 34 fncache_chunksize = 10 ** 6
34 35
35 36
36 37 def _match_tracked_entry(entry, matcher):
37 38 """parses a fncache entry and returns whether the entry is tracking a path
38 39 matched by matcher or not.
39 40
40 41 If matcher is None, returns True"""
41 42
42 43 if matcher is None:
43 44 return True
44 45 if entry.is_filelog:
45 46 return matcher(entry.target_id)
46 47 elif entry.is_manifestlog:
47 48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 50
50 51
51 52 # This avoids a collision between a file named foo and a dir named
52 53 # foo.i or foo.d
53 54 def _encodedir(path):
54 55 """
55 56 >>> _encodedir(b'data/foo.i')
56 57 'data/foo.i'
57 58 >>> _encodedir(b'data/foo.i/bla.i')
58 59 'data/foo.i.hg/bla.i'
59 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 61 'data/foo.i.hg.hg/bla.i'
61 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 64 """
64 65 return (
65 66 path.replace(b".hg/", b".hg.hg/")
66 67 .replace(b".i/", b".i.hg/")
67 68 .replace(b".d/", b".d.hg/")
68 69 )
69 70
70 71
71 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 73
73 74
74 75 def decodedir(path):
75 76 """
76 77 >>> decodedir(b'data/foo.i')
77 78 'data/foo.i'
78 79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 80 'data/foo.i/bla.i'
80 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 82 'data/foo.i.hg/bla.i'
82 83 """
83 84 if b".hg/" not in path:
84 85 return path
85 86 return (
86 87 path.replace(b".d.hg/", b".d/")
87 88 .replace(b".i.hg/", b".i/")
88 89 .replace(b".hg.hg/", b".hg/")
89 90 )
90 91
91 92
92 93 def _reserved():
93 94 """characters that are problematic for filesystems
94 95
95 96 * ascii escapes (0..31)
96 97 * ascii hi (126..255)
97 98 * windows specials
98 99
99 100 these characters will be escaped by encodefunctions
100 101 """
101 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 103 for x in range(32):
103 104 yield x
104 105 for x in range(126, 256):
105 106 yield x
106 107 for x in winreserved:
107 108 yield x
108 109
109 110
110 111 def _buildencodefun():
111 112 """
112 113 >>> enc, dec = _buildencodefun()
113 114
114 115 >>> enc(b'nothing/special.txt')
115 116 'nothing/special.txt'
116 117 >>> dec(b'nothing/special.txt')
117 118 'nothing/special.txt'
118 119
119 120 >>> enc(b'HELLO')
120 121 '_h_e_l_l_o'
121 122 >>> dec(b'_h_e_l_l_o')
122 123 'HELLO'
123 124
124 125 >>> enc(b'hello:world?')
125 126 'hello~3aworld~3f'
126 127 >>> dec(b'hello~3aworld~3f')
127 128 'hello:world?'
128 129
129 130 >>> enc(b'the\\x07quick\\xADshot')
130 131 'the~07quick~adshot'
131 132 >>> dec(b'the~07quick~adshot')
132 133 'the\\x07quick\\xadshot'
133 134 """
134 135 e = b'_'
135 136 xchr = pycompat.bytechr
136 137 asciistr = list(map(xchr, range(127)))
137 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 139
139 140 cmap = {x: x for x in asciistr}
140 141 for x in _reserved():
141 142 cmap[xchr(x)] = b"~%02x" % x
142 143 for x in capitals + [ord(e)]:
143 144 cmap[xchr(x)] = e + xchr(x).lower()
144 145
145 146 dmap = {}
146 147 for k, v in cmap.items():
147 148 dmap[v] = k
148 149
149 150 def decode(s):
150 151 i = 0
151 152 while i < len(s):
152 153 for l in range(1, 4):
153 154 try:
154 155 yield dmap[s[i : i + l]]
155 156 i += l
156 157 break
157 158 except KeyError:
158 159 pass
159 160 else:
160 161 raise KeyError
161 162
162 163 return (
163 164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 165 lambda s: b''.join(list(decode(s))),
165 166 )
166 167
167 168
168 169 _encodefname, _decodefname = _buildencodefun()
169 170
170 171
171 172 def encodefilename(s):
172 173 """
173 174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 176 """
176 177 return _encodefname(encodedir(s))
177 178
178 179
179 180 def decodefilename(s):
180 181 """
181 182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 184 """
184 185 return decodedir(_decodefname(s))
185 186
186 187
187 188 def _buildlowerencodefun():
188 189 """
189 190 >>> f = _buildlowerencodefun()
190 191 >>> f(b'nothing/special.txt')
191 192 'nothing/special.txt'
192 193 >>> f(b'HELLO')
193 194 'hello'
194 195 >>> f(b'hello:world?')
195 196 'hello~3aworld~3f'
196 197 >>> f(b'the\\x07quick\\xADshot')
197 198 'the~07quick~adshot'
198 199 """
199 200 xchr = pycompat.bytechr
200 201 cmap = {xchr(x): xchr(x) for x in range(127)}
201 202 for x in _reserved():
202 203 cmap[xchr(x)] = b"~%02x" % x
203 204 for x in range(ord(b"A"), ord(b"Z") + 1):
204 205 cmap[xchr(x)] = xchr(x).lower()
205 206
206 207 def lowerencode(s):
207 208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 209
209 210 return lowerencode
210 211
211 212
212 213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 214
214 215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 218
218 219
219 220 def _auxencode(path, dotencode):
220 221 """
221 222 Encodes filenames containing names reserved by Windows or which end in
222 223 period or space. Does not touch other single reserved characters c.
223 224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 225 Additionally encodes space or period at the beginning, if dotencode is
225 226 True. Parameter path is assumed to be all lowercase.
226 227 A segment only needs encoding if a reserved name appears as a
227 228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 229 doesn't need encoding.
229 230
230 231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 232 >>> _auxencode(s.split(b'/'), True)
232 233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 235 >>> _auxencode(s.split(b'/'), False)
235 236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 237 >>> _auxencode([b'foo. '], True)
237 238 ['foo.~20']
238 239 >>> _auxencode([b' .foo'], True)
239 240 ['~20.foo']
240 241 """
241 242 for i, n in enumerate(path):
242 243 if not n:
243 244 continue
244 245 if dotencode and n[0] in b'. ':
245 246 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 247 path[i] = n
247 248 else:
248 249 l = n.find(b'.')
249 250 if l == -1:
250 251 l = len(n)
251 252 if (l == 3 and n[:3] in _winres3) or (
252 253 l == 4
253 254 and n[3:4] <= b'9'
254 255 and n[3:4] >= b'1'
255 256 and n[:3] in _winres4
256 257 ):
257 258 # encode third letter ('aux' -> 'au~78')
258 259 ec = b"~%02x" % ord(n[2:3])
259 260 n = n[0:2] + ec + n[3:]
260 261 path[i] = n
261 262 if n[-1] in b'. ':
262 263 # encode last period or space ('foo...' -> 'foo..~2e')
263 264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 265 return path
265 266
266 267
267 268 _maxstorepathlen = 120
268 269 _dirprefixlen = 8
269 270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 271
271 272
272 273 def _hashencode(path, dotencode):
273 274 digest = hex(hashutil.sha1(path).digest())
274 275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 276 parts = _auxencode(le, dotencode)
276 277 basename = parts[-1]
277 278 _root, ext = os.path.splitext(basename)
278 279 sdirs = []
279 280 sdirslen = 0
280 281 for p in parts[:-1]:
281 282 d = p[:_dirprefixlen]
282 283 if d[-1] in b'. ':
283 284 # Windows can't access dirs ending in period or space
284 285 d = d[:-1] + b'_'
285 286 if sdirslen == 0:
286 287 t = len(d)
287 288 else:
288 289 t = sdirslen + 1 + len(d)
289 290 if t > _maxshortdirslen:
290 291 break
291 292 sdirs.append(d)
292 293 sdirslen = t
293 294 dirs = b'/'.join(sdirs)
294 295 if len(dirs) > 0:
295 296 dirs += b'/'
296 297 res = b'dh/' + dirs + digest + ext
297 298 spaceleft = _maxstorepathlen - len(res)
298 299 if spaceleft > 0:
299 300 filler = basename[:spaceleft]
300 301 res = b'dh/' + dirs + filler + digest + ext
301 302 return res
302 303
303 304
304 305 def _hybridencode(path, dotencode):
305 306 """encodes path with a length limit
306 307
307 308 Encodes all paths that begin with 'data/', according to the following.
308 309
309 310 Default encoding (reversible):
310 311
311 312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 313 characters are encoded as '~xx', where xx is the two digit hex code
313 314 of the character (see encodefilename).
314 315 Relevant path components consisting of Windows reserved filenames are
315 316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 317
317 318 Hashed encoding (not reversible):
318 319
319 320 If the default-encoded path is longer than _maxstorepathlen, a
320 321 non-reversible hybrid hashing of the path is done instead.
321 322 This encoding uses up to _dirprefixlen characters of all directory
322 323 levels of the lowerencoded path, but not more levels than can fit into
323 324 _maxshortdirslen.
324 325 Then follows the filler followed by the sha digest of the full path.
325 326 The filler is the beginning of the basename of the lowerencoded path
326 327 (the basename is everything after the last path separator). The filler
327 328 is as long as possible, filling in characters from the basename until
328 329 the encoded path has _maxstorepathlen characters (or all chars of the
329 330 basename have been taken).
330 331 The extension (e.g. '.i' or '.d') is preserved.
331 332
332 333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 334 encoding was used.
334 335 """
335 336 path = encodedir(path)
336 337 ef = _encodefname(path).split(b'/')
337 338 res = b'/'.join(_auxencode(ef, dotencode))
338 339 if len(res) > _maxstorepathlen:
339 340 res = _hashencode(path, dotencode)
340 341 return res
341 342
342 343
343 344 def _pathencode(path):
344 345 de = encodedir(path)
345 346 if len(path) > _maxstorepathlen:
346 347 return _hashencode(de, True)
347 348 ef = _encodefname(de).split(b'/')
348 349 res = b'/'.join(_auxencode(ef, True))
349 350 if len(res) > _maxstorepathlen:
350 351 return _hashencode(de, True)
351 352 return res
352 353
353 354
354 355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 356
356 357
357 358 def _plainhybridencode(f):
358 359 return _hybridencode(f, False)
359 360
360 361
361 362 def _calcmode(vfs):
362 363 try:
363 364 # files in .hg/ will be created using this mode
364 365 mode = vfs.stat().st_mode
365 366 # avoid some useless chmods
366 367 if (0o777 & ~util.umask) == (0o777 & mode):
367 368 mode = None
368 369 except OSError:
369 370 mode = None
370 371 return mode
371 372
372 373
373 374 _data = [
374 375 b'bookmarks',
375 376 b'narrowspec',
376 377 b'data',
377 378 b'meta',
378 379 b'00manifest.d',
379 380 b'00manifest.i',
380 381 b'00changelog.d',
381 382 b'00changelog.i',
382 383 b'phaseroots',
383 384 b'obsstore',
384 385 b'requires',
385 386 ]
386 387
387 388 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 389 REVLOG_FILES_OTHER_EXT = (
389 390 b'.idx',
390 391 b'.d',
391 392 b'.dat',
392 393 b'.n',
393 394 b'.nd',
394 395 b'.sda',
395 396 )
396 397 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 398 REVLOG_FILES_LONG_EXT = (
398 399 b'.nd',
399 400 b'.idx',
400 401 b'.dat',
401 402 b'.sda',
402 403 )
403 404 # files that are "volatile" and might change between listing and streaming
404 405 #
405 406 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 407 # deleted.
407 408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 409
409 410 # some exception to the above matching
410 411 #
411 412 # XXX This is currently not in use because of issue6542
412 413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 414
414 415
415 416 def is_revlog(f, kind, st):
416 417 if kind != stat.S_IFREG:
417 418 return None
418 419 return revlog_type(f)
419 420
420 421
421 422 def revlog_type(f):
422 423 # XXX we need to filter `undo.` created by the transaction here, however
423 424 # being naive about it also filter revlog for `undo.*` files, leading to
424 425 # issue6542. So we no longer use EXCLUDED.
425 426 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 427 return FILEFLAGS_REVLOG_MAIN
427 428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 429 t = FILETYPE_FILELOG_OTHER
429 430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 431 t |= FILEFLAGS_VOLATILE
431 432 return t
432 433 return None
433 434
434 435
435 436 # the file is part of changelog data
436 437 FILEFLAGS_CHANGELOG = 1 << 13
437 438 # the file is part of manifest data
438 439 FILEFLAGS_MANIFESTLOG = 1 << 12
439 440 # the file is part of filelog data
440 441 FILEFLAGS_FILELOG = 1 << 11
441 442 # file that are not directly part of a revlog
442 443 FILEFLAGS_OTHER = 1 << 10
443 444
444 445 # the main entry point for a revlog
445 446 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 447 # a secondary file for a revlog
447 448 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 449
449 450 # files that are "volatile" and might change between listing and streaming
450 451 FILEFLAGS_VOLATILE = 1 << 20
451 452
452 453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 459 FILETYPE_OTHER = FILEFLAGS_OTHER
459 460
460 461
461 462 @attr.s(slots=True, init=False)
462 463 class BaseStoreEntry:
463 464 """An entry in the store
464 465
465 466 This is returned by `store.walk` and represent some data in the store."""
466 467
467 468
468 469 @attr.s(slots=True, init=False)
469 470 class SimpleStoreEntry(BaseStoreEntry):
470 471 """A generic entry in the store"""
471 472
472 473 is_revlog = False
473 474
474 475 _entry_path = attr.ib()
475 476 _is_volatile = attr.ib(default=False)
476 477 _file_size = attr.ib(default=None)
477 478
478 479 def __init__(
479 480 self,
480 481 entry_path,
481 482 is_volatile=False,
482 483 file_size=None,
483 484 ):
484 485 super().__init__()
485 486 self._entry_path = entry_path
486 487 self._is_volatile = is_volatile
487 488 self._file_size = file_size
488 489
489 490 def files(self):
490 491 return [
491 492 StoreFile(
492 493 unencoded_path=self._entry_path,
493 494 file_size=self._file_size,
494 495 is_volatile=self._is_volatile,
495 496 )
496 497 ]
497 498
498 499
499 500 @attr.s(slots=True, init=False)
500 501 class RevlogStoreEntry(BaseStoreEntry):
501 502 """A revlog entry in the store"""
502 503
503 504 is_revlog = True
504 505
505 506 revlog_type = attr.ib(default=None)
506 507 target_id = attr.ib(default=None)
507 508 _path_prefix = attr.ib(default=None)
508 509 _details = attr.ib(default=None)
509 510
510 511 def __init__(
511 512 self,
512 513 revlog_type,
513 514 path_prefix,
514 515 target_id,
515 516 details,
516 517 ):
517 518 super().__init__()
518 519 self.revlog_type = revlog_type
519 520 self.target_id = target_id
520 521 self._path_prefix = path_prefix
521 522 assert b'.i' in details, (path_prefix, details)
522 523 self._details = details
523 524
524 525 @property
525 526 def is_changelog(self):
526 527 return self.revlog_type & FILEFLAGS_CHANGELOG
527 528
528 529 @property
529 530 def is_manifestlog(self):
530 531 return self.revlog_type & FILEFLAGS_MANIFESTLOG
531 532
532 533 @property
533 534 def is_filelog(self):
534 535 return self.revlog_type & FILEFLAGS_FILELOG
535 536
536 537 def main_file_path(self):
537 538 """unencoded path of the main revlog file"""
538 539 return self._path_prefix + b'.i'
539 540
540 541 def files(self):
541 542 files = []
542 543 for ext in sorted(self._details, key=_ext_key):
543 544 path = self._path_prefix + ext
544 545 data = self._details[ext]
545 546 files.append(StoreFile(unencoded_path=path, **data))
546 547 return files
547 548
549 def get_revlog_instance(self, repo):
550 """Obtain a revlog instance from this store entry
551
552 An instance of the appropriate class is returned.
553 """
554 if self.is_changelog:
555 return changelog.changelog(repo.svfs)
556 elif self.is_manifestlog:
557 mandir = self.target_id.rstrip(b'/')
558 return manifest.manifestrevlog(
559 repo.nodeconstants, repo.svfs, tree=mandir
560 )
561 else:
562 return filelog.filelog(repo.svfs, self.target_id)
563
548 564
549 565 @attr.s(slots=True)
550 566 class StoreFile:
551 567 """a file matching an entry"""
552 568
553 569 unencoded_path = attr.ib()
554 570 _file_size = attr.ib(default=None)
555 571 is_volatile = attr.ib(default=False)
556 572
557 573 def file_size(self, vfs):
558 574 if self._file_size is not None:
559 575 return self._file_size
560 576 try:
561 577 return vfs.stat(self.unencoded_path).st_size
562 578 except FileNotFoundError:
563 579 return 0
564 580
565 581
566 582 def _gather_revlog(files_data):
567 583 """group files per revlog prefix
568 584
569 585 The returns a two level nested dict. The top level key is the revlog prefix
570 586 without extension, the second level is all the file "suffix" that were
571 587 seen for this revlog and arbitrary file data as value.
572 588 """
573 589 revlogs = collections.defaultdict(dict)
574 590 for u, value in files_data:
575 591 name, ext = _split_revlog_ext(u)
576 592 revlogs[name][ext] = value
577 593 return sorted(revlogs.items())
578 594
579 595
580 596 def _split_revlog_ext(filename):
581 597 """split the revlog file prefix from the variable extension"""
582 598 if filename.endswith(REVLOG_FILES_LONG_EXT):
583 599 char = b'-'
584 600 else:
585 601 char = b'.'
586 602 idx = filename.rfind(char)
587 603 return filename[:idx], filename[idx:]
588 604
589 605
590 606 def _ext_key(ext):
591 607 """a key to order revlog suffix
592 608
593 609 important to issue .i after other entry."""
594 610 # the only important part of this order is to keep the `.i` last.
595 611 if ext.endswith(b'.n'):
596 612 return (0, ext)
597 613 elif ext.endswith(b'.nd'):
598 614 return (10, ext)
599 615 elif ext.endswith(b'.d'):
600 616 return (20, ext)
601 617 elif ext.endswith(b'.i'):
602 618 return (50, ext)
603 619 else:
604 620 return (40, ext)
605 621
606 622
607 623 class basicstore:
608 624 '''base class for local repository stores'''
609 625
610 626 def __init__(self, path, vfstype):
611 627 vfs = vfstype(path)
612 628 self.path = vfs.base
613 629 self.createmode = _calcmode(vfs)
614 630 vfs.createmode = self.createmode
615 631 self.rawvfs = vfs
616 632 self.vfs = vfsmod.filtervfs(vfs, encodedir)
617 633 self.opener = self.vfs
618 634
619 635 def join(self, f):
620 636 return self.path + b'/' + encodedir(f)
621 637
622 638 def _walk(self, relpath, recurse, undecodable=None):
623 639 '''yields (revlog_type, unencoded, size)'''
624 640 path = self.path
625 641 if relpath:
626 642 path += b'/' + relpath
627 643 striplen = len(self.path) + 1
628 644 l = []
629 645 if self.rawvfs.isdir(path):
630 646 visit = [path]
631 647 readdir = self.rawvfs.readdir
632 648 while visit:
633 649 p = visit.pop()
634 650 for f, kind, st in readdir(p, stat=True):
635 651 fp = p + b'/' + f
636 652 rl_type = is_revlog(f, kind, st)
637 653 if rl_type is not None:
638 654 n = util.pconvert(fp[striplen:])
639 655 l.append((decodedir(n), (rl_type, st.st_size)))
640 656 elif kind == stat.S_IFDIR and recurse:
641 657 visit.append(fp)
642 658
643 659 l.sort()
644 660 return l
645 661
646 662 def changelog(self, trypending, concurrencychecker=None):
647 663 return changelog.changelog(
648 664 self.vfs,
649 665 trypending=trypending,
650 666 concurrencychecker=concurrencychecker,
651 667 )
652 668
653 669 def manifestlog(self, repo, storenarrowmatch):
654 670 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
655 671 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
656 672
657 673 def data_entries(
658 674 self, matcher=None, undecodable=None
659 675 ) -> Generator[BaseStoreEntry, None, None]:
660 676 """Like walk, but excluding the changelog and root manifest.
661 677
662 678 When [undecodable] is None, revlogs names that can't be
663 679 decoded cause an exception. When it is provided, it should
664 680 be a list and the filenames that can't be decoded are added
665 681 to it instead. This is very rarely needed."""
666 682 dirs = [
667 683 (b'data', FILEFLAGS_FILELOG),
668 684 (b'meta', FILEFLAGS_MANIFESTLOG),
669 685 ]
670 686 for base_dir, rl_type in dirs:
671 687 files = self._walk(base_dir, True, undecodable=undecodable)
672 688 files = (f for f in files if f[1][0] is not None)
673 689 for revlog, details in _gather_revlog(files):
674 690 file_details = {}
675 691 revlog_target_id = revlog.split(b'/', 1)[1]
676 692 for ext, (t, s) in sorted(details.items()):
677 693 file_details[ext] = {
678 694 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
679 695 'file_size': s,
680 696 }
681 697 yield RevlogStoreEntry(
682 698 path_prefix=revlog,
683 699 revlog_type=rl_type,
684 700 target_id=revlog_target_id,
685 701 details=file_details,
686 702 )
687 703
688 704 def top_entries(
689 705 self, phase=False, obsolescence=False
690 706 ) -> Generator[BaseStoreEntry, None, None]:
691 707 if phase and self.vfs.exists(b'phaseroots'):
692 708 yield SimpleStoreEntry(
693 709 entry_path=b'phaseroots',
694 710 is_volatile=True,
695 711 )
696 712
697 713 if obsolescence and self.vfs.exists(b'obsstore'):
698 714 # XXX if we had the file size it could be non-volatile
699 715 yield SimpleStoreEntry(
700 716 entry_path=b'obsstore',
701 717 is_volatile=True,
702 718 )
703 719
704 720 files = reversed(self._walk(b'', False))
705 721
706 722 changelogs = collections.defaultdict(dict)
707 723 manifestlogs = collections.defaultdict(dict)
708 724
709 725 for u, (t, s) in files:
710 726 if u.startswith(b'00changelog'):
711 727 name, ext = _split_revlog_ext(u)
712 728 changelogs[name][ext] = (t, s)
713 729 elif u.startswith(b'00manifest'):
714 730 name, ext = _split_revlog_ext(u)
715 731 manifestlogs[name][ext] = (t, s)
716 732 else:
717 733 yield SimpleStoreEntry(
718 734 entry_path=u,
719 735 is_volatile=bool(t & FILEFLAGS_VOLATILE),
720 736 file_size=s,
721 737 )
722 738 # yield manifest before changelog
723 739 top_rl = [
724 740 (manifestlogs, FILEFLAGS_MANIFESTLOG),
725 741 (changelogs, FILEFLAGS_CHANGELOG),
726 742 ]
727 743 assert len(manifestlogs) <= 1
728 744 assert len(changelogs) <= 1
729 745 for data, revlog_type in top_rl:
730 746 for revlog, details in sorted(data.items()):
731 747 file_details = {}
732 748 for ext, (t, s) in details.items():
733 749 file_details[ext] = {
734 750 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
735 751 'file_size': s,
736 752 }
737 753 yield RevlogStoreEntry(
738 754 path_prefix=revlog,
739 755 revlog_type=revlog_type,
740 756 target_id=b'',
741 757 details=file_details,
742 758 )
743 759
744 760 def walk(
745 761 self, matcher=None, phase=False, obsolescence=False
746 762 ) -> Generator[BaseStoreEntry, None, None]:
747 763 """return files related to data storage (ie: revlogs)
748 764
749 765 yields instance from BaseStoreEntry subclasses
750 766
751 767 if a matcher is passed, storage files of only those tracked paths
752 768 are passed with matches the matcher
753 769 """
754 770 # yield data files first
755 771 for x in self.data_entries(matcher):
756 772 yield x
757 773 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
758 774 yield x
759 775
760 776 def copylist(self):
761 777 return _data
762 778
763 779 def write(self, tr):
764 780 pass
765 781
766 782 def invalidatecaches(self):
767 783 pass
768 784
769 785 def markremoved(self, fn):
770 786 pass
771 787
772 788 def __contains__(self, path):
773 789 '''Checks if the store contains path'''
774 790 path = b"/".join((b"data", path))
775 791 # file?
776 792 if self.vfs.exists(path + b".i"):
777 793 return True
778 794 # dir?
779 795 if not path.endswith(b"/"):
780 796 path = path + b"/"
781 797 return self.vfs.exists(path)
782 798
783 799
784 800 class encodedstore(basicstore):
785 801 def __init__(self, path, vfstype):
786 802 vfs = vfstype(path + b'/store')
787 803 self.path = vfs.base
788 804 self.createmode = _calcmode(vfs)
789 805 vfs.createmode = self.createmode
790 806 self.rawvfs = vfs
791 807 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
792 808 self.opener = self.vfs
793 809
794 810 def _walk(self, relpath, recurse, undecodable=None):
795 811 old = super()._walk(relpath, recurse)
796 812 new = []
797 813 for f1, value in old:
798 814 try:
799 815 f2 = decodefilename(f1)
800 816 except KeyError:
801 817 if undecodable is None:
802 818 msg = _(b'undecodable revlog name %s') % f1
803 819 raise error.StorageError(msg)
804 820 else:
805 821 undecodable.append(f1)
806 822 continue
807 823 new.append((f2, value))
808 824 return new
809 825
810 826 def data_entries(
811 827 self, matcher=None, undecodable=None
812 828 ) -> Generator[BaseStoreEntry, None, None]:
813 829 entries = super(encodedstore, self).data_entries(
814 830 undecodable=undecodable
815 831 )
816 832 for entry in entries:
817 833 if _match_tracked_entry(entry, matcher):
818 834 yield entry
819 835
820 836 def join(self, f):
821 837 return self.path + b'/' + encodefilename(f)
822 838
823 839 def copylist(self):
824 840 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
825 841
826 842
827 843 class fncache:
828 844 # the filename used to be partially encoded
829 845 # hence the encodedir/decodedir dance
830 846 def __init__(self, vfs):
831 847 self.vfs = vfs
832 848 self._ignores = set()
833 849 self.entries = None
834 850 self._dirty = False
835 851 # set of new additions to fncache
836 852 self.addls = set()
837 853
838 854 def ensureloaded(self, warn=None):
839 855 """read the fncache file if not already read.
840 856
841 857 If the file on disk is corrupted, raise. If warn is provided,
842 858 warn and keep going instead."""
843 859 if self.entries is None:
844 860 self._load(warn)
845 861
846 862 def _load(self, warn=None):
847 863 '''fill the entries from the fncache file'''
848 864 self._dirty = False
849 865 try:
850 866 fp = self.vfs(b'fncache', mode=b'rb')
851 867 except IOError:
852 868 # skip nonexistent file
853 869 self.entries = set()
854 870 return
855 871
856 872 self.entries = set()
857 873 chunk = b''
858 874 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
859 875 chunk += c
860 876 try:
861 877 p = chunk.rindex(b'\n')
862 878 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
863 879 chunk = chunk[p + 1 :]
864 880 except ValueError:
865 881 # substring '\n' not found, maybe the entry is bigger than the
866 882 # chunksize, so let's keep iterating
867 883 pass
868 884
869 885 if chunk:
870 886 msg = _(b"fncache does not ends with a newline")
871 887 if warn:
872 888 warn(msg + b'\n')
873 889 else:
874 890 raise error.Abort(
875 891 msg,
876 892 hint=_(
877 893 b"use 'hg debugrebuildfncache' to "
878 894 b"rebuild the fncache"
879 895 ),
880 896 )
881 897 self._checkentries(fp, warn)
882 898 fp.close()
883 899
884 900 def _checkentries(self, fp, warn):
885 901 """make sure there is no empty string in entries"""
886 902 if b'' in self.entries:
887 903 fp.seek(0)
888 904 for n, line in enumerate(fp):
889 905 if not line.rstrip(b'\n'):
890 906 t = _(b'invalid entry in fncache, line %d') % (n + 1)
891 907 if warn:
892 908 warn(t + b'\n')
893 909 else:
894 910 raise error.Abort(t)
895 911
896 912 def write(self, tr):
897 913 if self._dirty:
898 914 assert self.entries is not None
899 915 self.entries = self.entries | self.addls
900 916 self.addls = set()
901 917 tr.addbackup(b'fncache')
902 918 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
903 919 if self.entries:
904 920 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
905 921 fp.close()
906 922 self._dirty = False
907 923 if self.addls:
908 924 # if we have just new entries, let's append them to the fncache
909 925 tr.addbackup(b'fncache')
910 926 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
911 927 if self.addls:
912 928 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
913 929 fp.close()
914 930 self.entries = None
915 931 self.addls = set()
916 932
917 933 def addignore(self, fn):
918 934 self._ignores.add(fn)
919 935
920 936 def add(self, fn):
921 937 if fn in self._ignores:
922 938 return
923 939 if self.entries is None:
924 940 self._load()
925 941 if fn not in self.entries:
926 942 self.addls.add(fn)
927 943
928 944 def remove(self, fn):
929 945 if self.entries is None:
930 946 self._load()
931 947 if fn in self.addls:
932 948 self.addls.remove(fn)
933 949 return
934 950 try:
935 951 self.entries.remove(fn)
936 952 self._dirty = True
937 953 except KeyError:
938 954 pass
939 955
940 956 def __contains__(self, fn):
941 957 if fn in self.addls:
942 958 return True
943 959 if self.entries is None:
944 960 self._load()
945 961 return fn in self.entries
946 962
947 963 def __iter__(self):
948 964 if self.entries is None:
949 965 self._load()
950 966 return iter(self.entries | self.addls)
951 967
952 968
953 969 class _fncachevfs(vfsmod.proxyvfs):
954 970 def __init__(self, vfs, fnc, encode):
955 971 vfsmod.proxyvfs.__init__(self, vfs)
956 972 self.fncache = fnc
957 973 self.encode = encode
958 974
959 975 def __call__(self, path, mode=b'r', *args, **kw):
960 976 encoded = self.encode(path)
961 977 if (
962 978 mode not in (b'r', b'rb')
963 979 and (path.startswith(b'data/') or path.startswith(b'meta/'))
964 980 and revlog_type(path) is not None
965 981 ):
966 982 # do not trigger a fncache load when adding a file that already is
967 983 # known to exist.
968 984 notload = self.fncache.entries is None and self.vfs.exists(encoded)
969 985 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
970 986 # when appending to an existing file, if the file has size zero,
971 987 # it should be considered as missing. Such zero-size files are
972 988 # the result of truncation when a transaction is aborted.
973 989 notload = False
974 990 if not notload:
975 991 self.fncache.add(path)
976 992 return self.vfs(encoded, mode, *args, **kw)
977 993
978 994 def join(self, path):
979 995 if path:
980 996 return self.vfs.join(self.encode(path))
981 997 else:
982 998 return self.vfs.join(path)
983 999
984 1000 def register_file(self, path):
985 1001 """generic hook point to lets fncache steer its stew"""
986 1002 if path.startswith(b'data/') or path.startswith(b'meta/'):
987 1003 self.fncache.add(path)
988 1004
989 1005
990 1006 class fncachestore(basicstore):
991 1007 def __init__(self, path, vfstype, dotencode):
992 1008 if dotencode:
993 1009 encode = _pathencode
994 1010 else:
995 1011 encode = _plainhybridencode
996 1012 self.encode = encode
997 1013 vfs = vfstype(path + b'/store')
998 1014 self.path = vfs.base
999 1015 self.pathsep = self.path + b'/'
1000 1016 self.createmode = _calcmode(vfs)
1001 1017 vfs.createmode = self.createmode
1002 1018 self.rawvfs = vfs
1003 1019 fnc = fncache(vfs)
1004 1020 self.fncache = fnc
1005 1021 self.vfs = _fncachevfs(vfs, fnc, encode)
1006 1022 self.opener = self.vfs
1007 1023
1008 1024 def join(self, f):
1009 1025 return self.pathsep + self.encode(f)
1010 1026
1011 1027 def getsize(self, path):
1012 1028 return self.rawvfs.stat(path).st_size
1013 1029
1014 1030 def data_entries(
1015 1031 self, matcher=None, undecodable=None
1016 1032 ) -> Generator[BaseStoreEntry, None, None]:
1017 1033 files = ((f, revlog_type(f)) for f in self.fncache)
1018 1034 # Note: all files in fncache should be revlog related, However the
1019 1035 # fncache might contains such file added by previous version of
1020 1036 # Mercurial.
1021 1037 files = (f for f in files if f[1] is not None)
1022 1038 by_revlog = _gather_revlog(files)
1023 1039 for revlog, details in by_revlog:
1024 1040 file_details = {}
1025 1041 if revlog.startswith(b'data/'):
1026 1042 rl_type = FILEFLAGS_FILELOG
1027 1043 revlog_target_id = revlog.split(b'/', 1)[1]
1028 1044 elif revlog.startswith(b'meta/'):
1029 1045 rl_type = FILEFLAGS_MANIFESTLOG
1030 1046 # drop the initial directory and the `00manifest` file part
1031 1047 tmp = revlog.split(b'/', 1)[1]
1032 1048 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1033 1049 else:
1034 1050 # unreachable
1035 1051 assert False, revlog
1036 1052 for ext, t in details.items():
1037 1053 file_details[ext] = {
1038 1054 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1039 1055 }
1040 1056 entry = RevlogStoreEntry(
1041 1057 path_prefix=revlog,
1042 1058 revlog_type=rl_type,
1043 1059 target_id=revlog_target_id,
1044 1060 details=file_details,
1045 1061 )
1046 1062 if _match_tracked_entry(entry, matcher):
1047 1063 yield entry
1048 1064
1049 1065 def copylist(self):
1050 1066 d = (
1051 1067 b'bookmarks',
1052 1068 b'narrowspec',
1053 1069 b'data',
1054 1070 b'meta',
1055 1071 b'dh',
1056 1072 b'fncache',
1057 1073 b'phaseroots',
1058 1074 b'obsstore',
1059 1075 b'00manifest.d',
1060 1076 b'00manifest.i',
1061 1077 b'00changelog.d',
1062 1078 b'00changelog.i',
1063 1079 b'requires',
1064 1080 )
1065 1081 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1066 1082
1067 1083 def write(self, tr):
1068 1084 self.fncache.write(tr)
1069 1085
1070 1086 def invalidatecaches(self):
1071 1087 self.fncache.entries = None
1072 1088 self.fncache.addls = set()
1073 1089
1074 1090 def markremoved(self, fn):
1075 1091 self.fncache.remove(fn)
1076 1092
1077 1093 def _exists(self, f):
1078 1094 ef = self.encode(f)
1079 1095 try:
1080 1096 self.getsize(ef)
1081 1097 return True
1082 1098 except FileNotFoundError:
1083 1099 return False
1084 1100
1085 1101 def __contains__(self, path):
1086 1102 '''Checks if the store contains path'''
1087 1103 path = b"/".join((b"data", path))
1088 1104 # check for files (exact match)
1089 1105 e = path + b'.i'
1090 1106 if e in self.fncache and self._exists(e):
1091 1107 return True
1092 1108 # now check for directories (prefix match)
1093 1109 if not path.endswith(b'/'):
1094 1110 path += b'/'
1095 1111 for e in self.fncache:
1096 1112 if e.startswith(path) and self._exists(e):
1097 1113 return True
1098 1114 return False
@@ -1,668 +1,649 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import stat
10 10
11 11 from ..i18n import _
12 12 from ..pycompat import getattr
13 13 from .. import (
14 changelog,
15 14 error,
16 filelog,
17 manifest,
18 15 metadata,
19 16 pycompat,
20 17 requirements,
21 18 scmutil,
22 19 store,
23 20 util,
24 21 vfs as vfsmod,
25 22 )
26 23 from ..revlogutils import (
27 24 constants as revlogconst,
28 25 flagutil,
29 26 nodemap,
30 27 sidedata as sidedatamod,
31 28 )
32 29 from . import actions as upgrade_actions
33 30
34 31
35 32 def get_sidedata_helpers(srcrepo, dstrepo):
36 33 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
37 34 sequential = pycompat.iswindows or not use_w
38 35 if not sequential:
39 36 srcrepo.register_sidedata_computer(
40 37 revlogconst.KIND_CHANGELOG,
41 38 sidedatamod.SD_FILES,
42 39 (sidedatamod.SD_FILES,),
43 40 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
44 41 flagutil.REVIDX_HASCOPIESINFO,
45 42 replace=True,
46 43 )
47 44 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
48 45
49 46
50 def _revlog_from_store_entry(repo, entry):
51 """Obtain a revlog from a repo store entry.
52
53 An instance of the appropriate class is returned.
54 """
55 if entry.is_changelog:
56 return changelog.changelog(repo.svfs)
57 elif entry.is_manifestlog:
58 mandir = entry.target_id.rstrip(b'/')
59 return manifest.manifestrevlog(
60 repo.nodeconstants, repo.svfs, tree=mandir
61 )
62 else:
63 return filelog.filelog(repo.svfs, entry.target_id)
64
65
66 47 def _copyrevlog(tr, destrepo, oldrl, entry):
67 48 """copy all relevant files for `oldrl` into `destrepo` store
68 49
69 50 Files are copied "as is" without any transformation. The copy is performed
70 51 without extra checks. Callers are responsible for making sure the copied
71 52 content is compatible with format of the destination repository.
72 53 """
73 54 oldrl = getattr(oldrl, '_revlog', oldrl)
74 newrl = _revlog_from_store_entry(destrepo, entry)
55 newrl = entry.get_revlog_instance(destrepo)
75 56 newrl = getattr(newrl, '_revlog', newrl)
76 57
77 58 oldvfs = oldrl.opener
78 59 newvfs = newrl.opener
79 60 oldindex = oldvfs.join(oldrl._indexfile)
80 61 newindex = newvfs.join(newrl._indexfile)
81 62 olddata = oldvfs.join(oldrl._datafile)
82 63 newdata = newvfs.join(newrl._datafile)
83 64
84 65 with newvfs(newrl._indexfile, b'w'):
85 66 pass # create all the directories
86 67
87 68 util.copyfile(oldindex, newindex)
88 69 copydata = oldrl.opener.exists(oldrl._datafile)
89 70 if copydata:
90 71 util.copyfile(olddata, newdata)
91 72
92 73 if entry.is_filelog:
93 74 unencodedname = entry.main_file_path()
94 75 destrepo.svfs.fncache.add(unencodedname)
95 76 if copydata:
96 77 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
97 78
98 79
99 80 UPGRADE_CHANGELOG = b"changelog"
100 81 UPGRADE_MANIFEST = b"manifest"
101 82 UPGRADE_FILELOGS = b"all-filelogs"
102 83
103 84 UPGRADE_ALL_REVLOGS = frozenset(
104 85 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
105 86 )
106 87
107 88
108 89 def matchrevlog(revlogfilter, entry):
109 90 """check if a revlog is selected for cloning.
110 91
111 92 In other words, are there any updates which need to be done on revlog
112 93 or it can be blindly copied.
113 94
114 95 The store entry is checked against the passed filter"""
115 96 if entry.is_changelog:
116 97 return UPGRADE_CHANGELOG in revlogfilter
117 98 elif entry.is_manifestlog:
118 99 return UPGRADE_MANIFEST in revlogfilter
119 100 assert entry.is_filelog
120 101 return UPGRADE_FILELOGS in revlogfilter
121 102
122 103
123 104 def _perform_clone(
124 105 ui,
125 106 dstrepo,
126 107 tr,
127 108 old_revlog,
128 109 entry,
129 110 upgrade_op,
130 111 sidedata_helpers,
131 112 oncopiedrevision,
132 113 ):
133 114 """returns the new revlog object created"""
134 115 newrl = None
135 116 revlog_path = entry.main_file_path()
136 117 if matchrevlog(upgrade_op.revlogs_to_process, entry):
137 118 ui.note(
138 119 _(b'cloning %d revisions from %s\n')
139 120 % (len(old_revlog), revlog_path)
140 121 )
141 newrl = _revlog_from_store_entry(dstrepo, entry)
122 newrl = entry.get_revlog_instance(dstrepo)
142 123 old_revlog.clone(
143 124 tr,
144 125 newrl,
145 126 addrevisioncb=oncopiedrevision,
146 127 deltareuse=upgrade_op.delta_reuse_mode,
147 128 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
148 129 sidedata_helpers=sidedata_helpers,
149 130 )
150 131 else:
151 132 msg = _(b'blindly copying %s containing %i revisions\n')
152 133 ui.note(msg % (revlog_path, len(old_revlog)))
153 134 _copyrevlog(tr, dstrepo, old_revlog, entry)
154 135
155 newrl = _revlog_from_store_entry(dstrepo, entry)
136 newrl = entry.get_revlog_instance(dstrepo)
156 137 return newrl
157 138
158 139
159 140 def _clonerevlogs(
160 141 ui,
161 142 srcrepo,
162 143 dstrepo,
163 144 tr,
164 145 upgrade_op,
165 146 ):
166 147 """Copy revlogs between 2 repos."""
167 148 revcount = 0
168 149 srcsize = 0
169 150 srcrawsize = 0
170 151 dstsize = 0
171 152 fcount = 0
172 153 frevcount = 0
173 154 fsrcsize = 0
174 155 frawsize = 0
175 156 fdstsize = 0
176 157 mcount = 0
177 158 mrevcount = 0
178 159 msrcsize = 0
179 160 mrawsize = 0
180 161 mdstsize = 0
181 162 crevcount = 0
182 163 csrcsize = 0
183 164 crawsize = 0
184 165 cdstsize = 0
185 166
186 167 alldatafiles = list(srcrepo.store.walk())
187 168 # mapping of data files which needs to be cloned
188 169 # key is unencoded filename
189 170 # value is revlog_object_from_srcrepo
190 171 manifests = {}
191 172 changelogs = {}
192 173 filelogs = {}
193 174
194 175 # Perform a pass to collect metadata. This validates we can open all
195 176 # source files and allows a unified progress bar to be displayed.
196 177 for entry in alldatafiles:
197 178 if not entry.is_revlog:
198 179 continue
199 180
200 rl = _revlog_from_store_entry(srcrepo, entry)
181 rl = entry.get_revlog_instance(srcrepo)
201 182
202 183 info = rl.storageinfo(
203 184 exclusivefiles=True,
204 185 revisionscount=True,
205 186 trackedsize=True,
206 187 storedsize=True,
207 188 )
208 189
209 190 revcount += info[b'revisionscount'] or 0
210 191 datasize = info[b'storedsize'] or 0
211 192 rawsize = info[b'trackedsize'] or 0
212 193
213 194 srcsize += datasize
214 195 srcrawsize += rawsize
215 196
216 197 # This is for the separate progress bars.
217 198 if entry.is_changelog:
218 199 changelogs[entry.target_id] = entry
219 200 crevcount += len(rl)
220 201 csrcsize += datasize
221 202 crawsize += rawsize
222 203 elif entry.is_manifestlog:
223 204 manifests[entry.target_id] = entry
224 205 mcount += 1
225 206 mrevcount += len(rl)
226 207 msrcsize += datasize
227 208 mrawsize += rawsize
228 209 elif entry.is_filelog:
229 210 filelogs[entry.target_id] = entry
230 211 fcount += 1
231 212 frevcount += len(rl)
232 213 fsrcsize += datasize
233 214 frawsize += rawsize
234 215 else:
235 216 error.ProgrammingError(b'unknown revlog type')
236 217
237 218 if not revcount:
238 219 return
239 220
240 221 ui.status(
241 222 _(
242 223 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
243 224 b'%d in changelog)\n'
244 225 )
245 226 % (revcount, frevcount, mrevcount, crevcount)
246 227 )
247 228 ui.status(
248 229 _(b'migrating %s in store; %s tracked data\n')
249 230 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
250 231 )
251 232
252 233 # Used to keep track of progress.
253 234 progress = None
254 235
255 236 def oncopiedrevision(rl, rev, node):
256 237 progress.increment()
257 238
258 239 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
259 240
260 241 # Migrating filelogs
261 242 ui.status(
262 243 _(
263 244 b'migrating %d filelogs containing %d revisions '
264 245 b'(%s in store; %s tracked data)\n'
265 246 )
266 247 % (
267 248 fcount,
268 249 frevcount,
269 250 util.bytecount(fsrcsize),
270 251 util.bytecount(frawsize),
271 252 )
272 253 )
273 254 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
274 255 for target_id, entry in sorted(filelogs.items()):
275 oldrl = _revlog_from_store_entry(srcrepo, entry)
256 oldrl = entry.get_revlog_instance(srcrepo)
276 257
277 258 newrl = _perform_clone(
278 259 ui,
279 260 dstrepo,
280 261 tr,
281 262 oldrl,
282 263 entry,
283 264 upgrade_op,
284 265 sidedata_helpers,
285 266 oncopiedrevision,
286 267 )
287 268 info = newrl.storageinfo(storedsize=True)
288 269 fdstsize += info[b'storedsize'] or 0
289 270 ui.status(
290 271 _(
291 272 b'finished migrating %d filelog revisions across %d '
292 273 b'filelogs; change in size: %s\n'
293 274 )
294 275 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
295 276 )
296 277
297 278 # Migrating manifests
298 279 ui.status(
299 280 _(
300 281 b'migrating %d manifests containing %d revisions '
301 282 b'(%s in store; %s tracked data)\n'
302 283 )
303 284 % (
304 285 mcount,
305 286 mrevcount,
306 287 util.bytecount(msrcsize),
307 288 util.bytecount(mrawsize),
308 289 )
309 290 )
310 291 if progress:
311 292 progress.complete()
312 293 progress = srcrepo.ui.makeprogress(
313 294 _(b'manifest revisions'), total=mrevcount
314 295 )
315 296 for target_id, entry in sorted(manifests.items()):
316 oldrl = _revlog_from_store_entry(srcrepo, entry)
297 oldrl = entry.get_revlog_instance(srcrepo)
317 298 newrl = _perform_clone(
318 299 ui,
319 300 dstrepo,
320 301 tr,
321 302 oldrl,
322 303 entry,
323 304 upgrade_op,
324 305 sidedata_helpers,
325 306 oncopiedrevision,
326 307 )
327 308 info = newrl.storageinfo(storedsize=True)
328 309 mdstsize += info[b'storedsize'] or 0
329 310 ui.status(
330 311 _(
331 312 b'finished migrating %d manifest revisions across %d '
332 313 b'manifests; change in size: %s\n'
333 314 )
334 315 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
335 316 )
336 317
337 318 # Migrating changelog
338 319 ui.status(
339 320 _(
340 321 b'migrating changelog containing %d revisions '
341 322 b'(%s in store; %s tracked data)\n'
342 323 )
343 324 % (
344 325 crevcount,
345 326 util.bytecount(csrcsize),
346 327 util.bytecount(crawsize),
347 328 )
348 329 )
349 330 if progress:
350 331 progress.complete()
351 332 progress = srcrepo.ui.makeprogress(
352 333 _(b'changelog revisions'), total=crevcount
353 334 )
354 335 for target_id, entry in sorted(changelogs.items()):
355 oldrl = _revlog_from_store_entry(srcrepo, entry)
336 oldrl = entry.get_revlog_instance(srcrepo)
356 337 newrl = _perform_clone(
357 338 ui,
358 339 dstrepo,
359 340 tr,
360 341 oldrl,
361 342 entry,
362 343 upgrade_op,
363 344 sidedata_helpers,
364 345 oncopiedrevision,
365 346 )
366 347 info = newrl.storageinfo(storedsize=True)
367 348 cdstsize += info[b'storedsize'] or 0
368 349 progress.complete()
369 350 ui.status(
370 351 _(
371 352 b'finished migrating %d changelog revisions; change in size: '
372 353 b'%s\n'
373 354 )
374 355 % (crevcount, util.bytecount(cdstsize - csrcsize))
375 356 )
376 357
377 358 dstsize = fdstsize + mdstsize + cdstsize
378 359 ui.status(
379 360 _(
380 361 b'finished migrating %d total revisions; total change in store '
381 362 b'size: %s\n'
382 363 )
383 364 % (revcount, util.bytecount(dstsize - srcsize))
384 365 )
385 366
386 367
387 368 def _files_to_copy_post_revlog_clone(srcrepo):
388 369 """yields files which should be copied to destination after revlogs
389 370 are cloned"""
390 371 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
391 372 # don't copy revlogs as they are already cloned
392 373 if store.revlog_type(path) is not None:
393 374 continue
394 375 # Skip transaction related files.
395 376 if path.startswith(b'undo'):
396 377 continue
397 378 # Only copy regular files.
398 379 if kind != stat.S_IFREG:
399 380 continue
400 381 # Skip other skipped files.
401 382 if path in (b'lock', b'fncache'):
402 383 continue
403 384 # TODO: should we skip cache too?
404 385
405 386 yield path
406 387
407 388
408 389 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
409 390 """Replace the stores after current repository is upgraded
410 391
411 392 Creates a backup of current repository store at backup path
412 393 Replaces upgraded store files in current repo from upgraded one
413 394
414 395 Arguments:
415 396 currentrepo: repo object of current repository
416 397 upgradedrepo: repo object of the upgraded data
417 398 backupvfs: vfs object for the backup path
418 399 upgrade_op: upgrade operation object
419 400 to be used to decide what all is upgraded
420 401 """
421 402 # TODO: don't blindly rename everything in store
422 403 # There can be upgrades where store is not touched at all
423 404 if upgrade_op.backup_store:
424 405 util.rename(currentrepo.spath, backupvfs.join(b'store'))
425 406 else:
426 407 currentrepo.vfs.rmtree(b'store', forcibly=True)
427 408 util.rename(upgradedrepo.spath, currentrepo.spath)
428 409
429 410
430 411 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
431 412 """Hook point for extensions to perform additional actions during upgrade.
432 413
433 414 This function is called after revlogs and store files have been copied but
434 415 before the new store is swapped into the original location.
435 416 """
436 417
437 418
438 419 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
439 420 """Do the low-level work of upgrading a repository.
440 421
441 422 The upgrade is effectively performed as a copy between a source
442 423 repository and a temporary destination repository.
443 424
444 425 The source repository is unmodified for as long as possible so the
445 426 upgrade can abort at any time without causing loss of service for
446 427 readers and without corrupting the source repository.
447 428 """
448 429 assert srcrepo.currentwlock()
449 430 assert dstrepo.currentwlock()
450 431 backuppath = None
451 432 backupvfs = None
452 433
453 434 ui.status(
454 435 _(
455 436 b'(it is safe to interrupt this process any time before '
456 437 b'data migration completes)\n'
457 438 )
458 439 )
459 440
460 441 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
461 442 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
462 443 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
463 444 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
464 445
465 446 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
466 447 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
467 448 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
468 449 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
469 450
470 451 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
471 452 ui.status(_(b'create dirstate-tracked-hint file\n'))
472 453 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
473 454 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
474 455 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
475 456 ui.status(_(b'remove dirstate-tracked-hint file\n'))
476 457 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
477 458 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
478 459
479 460 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
480 461 return
481 462
482 463 if upgrade_op.requirements_only:
483 464 ui.status(_(b'upgrading repository requirements\n'))
484 465 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
485 466 # if there is only one action and that is persistent nodemap upgrade
486 467 # directly write the nodemap file and update requirements instead of going
487 468 # through the whole cloning process
488 469 elif (
489 470 len(upgrade_op.upgrade_actions) == 1
490 471 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
491 472 and not upgrade_op.removed_actions
492 473 ):
493 474 ui.status(
494 475 _(b'upgrading repository to use persistent nodemap feature\n')
495 476 )
496 477 with srcrepo.transaction(b'upgrade') as tr:
497 478 unfi = srcrepo.unfiltered()
498 479 cl = unfi.changelog
499 480 nodemap.persist_nodemap(tr, cl, force=True)
500 481 # we want to directly operate on the underlying revlog to force
501 482 # create a nodemap file. This is fine since this is upgrade code
502 483 # and it heavily relies on repository being revlog based
503 484 # hence accessing private attributes can be justified
504 485 nodemap.persist_nodemap(
505 486 tr, unfi.manifestlog._rootstore._revlog, force=True
506 487 )
507 488 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
508 489 elif (
509 490 len(upgrade_op.removed_actions) == 1
510 491 and [
511 492 x
512 493 for x in upgrade_op.removed_actions
513 494 if x.name == b'persistent-nodemap'
514 495 ]
515 496 and not upgrade_op.upgrade_actions
516 497 ):
517 498 ui.status(
518 499 _(b'downgrading repository to not use persistent nodemap feature\n')
519 500 )
520 501 with srcrepo.transaction(b'upgrade') as tr:
521 502 unfi = srcrepo.unfiltered()
522 503 cl = unfi.changelog
523 504 nodemap.delete_nodemap(tr, srcrepo, cl)
524 505 # check comment 20 lines above for accessing private attributes
525 506 nodemap.delete_nodemap(
526 507 tr, srcrepo, unfi.manifestlog._rootstore._revlog
527 508 )
528 509 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
529 510 else:
530 511 with dstrepo.transaction(b'upgrade') as tr:
531 512 _clonerevlogs(
532 513 ui,
533 514 srcrepo,
534 515 dstrepo,
535 516 tr,
536 517 upgrade_op,
537 518 )
538 519
539 520 # Now copy other files in the store directory.
540 521 for p in _files_to_copy_post_revlog_clone(srcrepo):
541 522 srcrepo.ui.status(_(b'copying %s\n') % p)
542 523 src = srcrepo.store.rawvfs.join(p)
543 524 dst = dstrepo.store.rawvfs.join(p)
544 525 util.copyfile(src, dst, copystat=True)
545 526
546 527 finishdatamigration(ui, srcrepo, dstrepo, requirements)
547 528
548 529 ui.status(_(b'data fully upgraded in a temporary repository\n'))
549 530
550 531 if upgrade_op.backup_store:
551 532 backuppath = pycompat.mkdtemp(
552 533 prefix=b'upgradebackup.', dir=srcrepo.path
553 534 )
554 535 backupvfs = vfsmod.vfs(backuppath)
555 536
556 537 # Make a backup of requires file first, as it is the first to be modified.
557 538 util.copyfile(
558 539 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
559 540 )
560 541
561 542 # We install an arbitrary requirement that clients must not support
562 543 # as a mechanism to lock out new clients during the data swap. This is
563 544 # better than allowing a client to continue while the repository is in
564 545 # an inconsistent state.
565 546 ui.status(
566 547 _(
567 548 b'marking source repository as being upgraded; clients will be '
568 549 b'unable to read from repository\n'
569 550 )
570 551 )
571 552 scmutil.writereporequirements(
572 553 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
573 554 )
574 555
575 556 ui.status(_(b'starting in-place swap of repository data\n'))
576 557 if upgrade_op.backup_store:
577 558 ui.status(
578 559 _(b'replaced files will be backed up at %s\n') % backuppath
579 560 )
580 561
581 562 # Now swap in the new store directory. Doing it as a rename should make
582 563 # the operation nearly instantaneous and atomic (at least in well-behaved
583 564 # environments).
584 565 ui.status(_(b'replacing store...\n'))
585 566 tstart = util.timer()
586 567 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
587 568 elapsed = util.timer() - tstart
588 569 ui.status(
589 570 _(
590 571 b'store replacement complete; repository was inconsistent for '
591 572 b'%0.1fs\n'
592 573 )
593 574 % elapsed
594 575 )
595 576
596 577 # We first write the requirements file. Any new requirements will lock
597 578 # out legacy clients.
598 579 ui.status(
599 580 _(
600 581 b'finalizing requirements file and making repository readable '
601 582 b'again\n'
602 583 )
603 584 )
604 585 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
605 586
606 587 if upgrade_op.backup_store:
607 588 # The lock file from the old store won't be removed because nothing has a
608 589 # reference to its new location. So clean it up manually. Alternatively, we
609 590 # could update srcrepo.svfs and other variables to point to the new
610 591 # location. This is simpler.
611 592 assert backupvfs is not None # help pytype
612 593 backupvfs.unlink(b'store/lock')
613 594
614 595 return backuppath
615 596
616 597
617 598 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
618 599 if upgrade_op.backup_store:
619 600 backuppath = pycompat.mkdtemp(
620 601 prefix=b'upgradebackup.', dir=srcrepo.path
621 602 )
622 603 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
623 604 backupvfs = vfsmod.vfs(backuppath)
624 605 util.copyfile(
625 606 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
626 607 )
627 608 try:
628 609 util.copyfile(
629 610 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
630 611 )
631 612 except FileNotFoundError:
632 613 # The dirstate does not exist on an empty repo or a repo with no
633 614 # revision checked out
634 615 pass
635 616
636 617 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
637 618 use_v2 = new == b'v2'
638 619 if use_v2:
639 620 # Write the requirements *before* upgrading
640 621 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
641 622
642 623 srcrepo.dirstate._map.preload()
643 624 srcrepo.dirstate._use_dirstate_v2 = use_v2
644 625 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
645 626 srcrepo.dirstate._dirty = True
646 627 try:
647 628 srcrepo.vfs.unlink(b'dirstate')
648 629 except FileNotFoundError:
649 630 # The dirstate does not exist on an empty repo or a repo with no
650 631 # revision checked out
651 632 pass
652 633
653 634 srcrepo.dirstate.write(None)
654 635 if not use_v2:
655 636 # Remove the v2 requirement *after* downgrading
656 637 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
657 638
658 639
659 640 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
660 641 if add:
661 642 srcrepo.dirstate._use_tracked_hint = True
662 643 srcrepo.dirstate._dirty = True
663 644 srcrepo.dirstate._dirty_tracked_set = True
664 645 srcrepo.dirstate.write(None)
665 646 if not add:
666 647 srcrepo.dirstate.delete_tracked_hint()
667 648
668 649 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
General Comments 0
You need to be logged in to leave comments. Login now