##// END OF EJS Templates
store: introduce a main_file_path method for revlog...
marmoute -
r51385:3473d18c default
parent child Browse files
Show More
@@ -1,1054 +1,1058 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 399 REVLOG_FILES_LONG_EXT = (
400 400 b'.nd',
401 401 b'.idx',
402 402 b'.dat',
403 403 b'.sda',
404 404 )
405 405 # files that are "volatile" and might change between listing and streaming
406 406 #
407 407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 408 # deleted.
409 409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410 410
411 411 # some exception to the above matching
412 412 #
413 413 # XXX This is currently not in use because of issue6542
414 414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415 415
416 416
417 417 def is_revlog(f, kind, st):
418 418 if kind != stat.S_IFREG:
419 419 return None
420 420 return revlog_type(f)
421 421
422 422
423 423 def revlog_type(f):
424 424 # XXX we need to filter `undo.` created by the transaction here, however
425 425 # being naive about it also filter revlog for `undo.*` files, leading to
426 426 # issue6542. So we no longer use EXCLUDED.
427 427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 428 return FILEFLAGS_REVLOG_MAIN
429 429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 430 t = FILETYPE_FILELOG_OTHER
431 431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 432 t |= FILEFLAGS_VOLATILE
433 433 return t
434 434 return None
435 435
436 436
437 437 # the file is part of changelog data
438 438 FILEFLAGS_CHANGELOG = 1 << 13
439 439 # the file is part of manifest data
440 440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 441 # the file is part of filelog data
442 442 FILEFLAGS_FILELOG = 1 << 11
443 443 # file that are not directly part of a revlog
444 444 FILEFLAGS_OTHER = 1 << 10
445 445
446 446 # the main entry point for a revlog
447 447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 448 # a secondary file for a revlog
449 449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450 450
451 451 # files that are "volatile" and might change between listing and streaming
452 452 FILEFLAGS_VOLATILE = 1 << 20
453 453
454 454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 460 FILETYPE_OTHER = FILEFLAGS_OTHER
461 461
462 462
463 463 @attr.s(slots=True, init=False)
464 464 class BaseStoreEntry:
465 465 """An entry in the store
466 466
467 467 This is returned by `store.walk` and represent some data in the store."""
468 468
469 469 unencoded_path = attr.ib()
470 470 _is_volatile = attr.ib(default=False)
471 471 _file_size = attr.ib(default=None)
472 472
473 473 def __init__(
474 474 self,
475 475 unencoded_path,
476 476 is_volatile=False,
477 477 file_size=None,
478 478 ):
479 479 self.unencoded_path = unencoded_path
480 480 self._is_volatile = is_volatile
481 481 self._file_size = file_size
482 482
483 483 def files(self):
484 484 return [
485 485 StoreFile(
486 486 unencoded_path=self.unencoded_path,
487 487 file_size=self._file_size,
488 488 is_volatile=self._is_volatile,
489 489 )
490 490 ]
491 491
492 492
493 493 @attr.s(slots=True, init=False)
494 494 class SimpleStoreEntry(BaseStoreEntry):
495 495 """A generic entry in the store"""
496 496
497 497 is_revlog = False
498 498
499 499
500 500 @attr.s(slots=True, init=False)
501 501 class RevlogStoreEntry(BaseStoreEntry):
502 502 """A revlog entry in the store"""
503 503
504 504 is_revlog = True
505 505 revlog_type = attr.ib(default=None)
506 506 target_id = attr.ib(default=None)
507 507 is_revlog_main = attr.ib(default=None)
508 508
509 509 def __init__(
510 510 self,
511 511 unencoded_path,
512 512 revlog_type,
513 513 target_id,
514 514 is_revlog_main=False,
515 515 is_volatile=False,
516 516 file_size=None,
517 517 ):
518 518 super().__init__(
519 519 unencoded_path=unencoded_path,
520 520 is_volatile=is_volatile,
521 521 file_size=file_size,
522 522 )
523 523 self.revlog_type = revlog_type
524 524 self.target_id = target_id
525 525 self.is_revlog_main = is_revlog_main
526 526
527 def main_file_path(self):
528 """unencoded path of the main revlog file"""
529 return self.unencoded_path
530
527 531
528 532 @attr.s(slots=True)
529 533 class StoreFile:
530 534 """a file matching an entry"""
531 535
532 536 unencoded_path = attr.ib()
533 537 _file_size = attr.ib(default=False)
534 538 is_volatile = attr.ib(default=False)
535 539
536 540 def file_size(self, vfs):
537 541 if self._file_size is not None:
538 542 return self._file_size
539 543 try:
540 544 return vfs.stat(self.unencoded_path).st_size
541 545 except FileNotFoundError:
542 546 return 0
543 547
544 548
545 549 def _gather_revlog(files_data):
546 550 """group files per revlog prefix
547 551
548 552 The returns a two level nested dict. The top level key is the revlog prefix
549 553 without extension, the second level is all the file "suffix" that were
550 554 seen for this revlog and arbitrary file data as value.
551 555 """
552 556 revlogs = collections.defaultdict(dict)
553 557 for u, value in files_data:
554 558 name, ext = _split_revlog_ext(u)
555 559 revlogs[name][ext] = value
556 560 return sorted(revlogs.items())
557 561
558 562
559 563 def _split_revlog_ext(filename):
560 564 """split the revlog file prefix from the variable extension"""
561 565 if filename.endswith(REVLOG_FILES_LONG_EXT):
562 566 char = b'-'
563 567 else:
564 568 char = b'.'
565 569 idx = filename.rfind(char)
566 570 return filename[:idx], filename[idx:]
567 571
568 572
569 573 def _ext_key(ext):
570 574 """a key to order revlog suffix
571 575
572 576 important to issue .i after other entry."""
573 577 # the only important part of this order is to keep the `.i` last.
574 578 if ext.endswith(b'.n'):
575 579 return (0, ext)
576 580 elif ext.endswith(b'.nd'):
577 581 return (10, ext)
578 582 elif ext.endswith(b'.d'):
579 583 return (20, ext)
580 584 elif ext.endswith(b'.i'):
581 585 return (50, ext)
582 586 else:
583 587 return (40, ext)
584 588
585 589
586 590 class basicstore:
587 591 '''base class for local repository stores'''
588 592
589 593 def __init__(self, path, vfstype):
590 594 vfs = vfstype(path)
591 595 self.path = vfs.base
592 596 self.createmode = _calcmode(vfs)
593 597 vfs.createmode = self.createmode
594 598 self.rawvfs = vfs
595 599 self.vfs = vfsmod.filtervfs(vfs, encodedir)
596 600 self.opener = self.vfs
597 601
598 602 def join(self, f):
599 603 return self.path + b'/' + encodedir(f)
600 604
601 605 def _walk(self, relpath, recurse):
602 606 '''yields (revlog_type, unencoded, size)'''
603 607 path = self.path
604 608 if relpath:
605 609 path += b'/' + relpath
606 610 striplen = len(self.path) + 1
607 611 l = []
608 612 if self.rawvfs.isdir(path):
609 613 visit = [path]
610 614 readdir = self.rawvfs.readdir
611 615 while visit:
612 616 p = visit.pop()
613 617 for f, kind, st in readdir(p, stat=True):
614 618 fp = p + b'/' + f
615 619 rl_type = is_revlog(f, kind, st)
616 620 if rl_type is not None:
617 621 n = util.pconvert(fp[striplen:])
618 622 l.append((decodedir(n), (rl_type, st.st_size)))
619 623 elif kind == stat.S_IFDIR and recurse:
620 624 visit.append(fp)
621 625
622 626 l.sort()
623 627 return l
624 628
625 629 def changelog(self, trypending, concurrencychecker=None):
626 630 return changelog.changelog(
627 631 self.vfs,
628 632 trypending=trypending,
629 633 concurrencychecker=concurrencychecker,
630 634 )
631 635
632 636 def manifestlog(self, repo, storenarrowmatch):
633 637 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
634 638 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
635 639
636 640 def datafiles(
637 641 self, matcher=None, undecodable=None
638 642 ) -> Generator[BaseStoreEntry, None, None]:
639 643 """Like walk, but excluding the changelog and root manifest.
640 644
641 645 When [undecodable] is None, revlogs names that can't be
642 646 decoded cause an exception. When it is provided, it should
643 647 be a list and the filenames that can't be decoded are added
644 648 to it instead. This is very rarely needed."""
645 649 dirs = [
646 650 (b'data', FILEFLAGS_FILELOG),
647 651 (b'meta', FILEFLAGS_MANIFESTLOG),
648 652 ]
649 653 for base_dir, rl_type in dirs:
650 654 files = self._walk(base_dir, True)
651 655 files = (f for f in files if f[1][0] is not None)
652 656 for revlog, details in _gather_revlog(files):
653 657 for ext, (t, s) in sorted(details.items()):
654 658 u = revlog + ext
655 659 revlog_target_id = revlog.split(b'/', 1)[1]
656 660 yield RevlogStoreEntry(
657 661 unencoded_path=u,
658 662 revlog_type=rl_type,
659 663 target_id=revlog_target_id,
660 664 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
661 665 is_volatile=bool(t & FILEFLAGS_VOLATILE),
662 666 file_size=s,
663 667 )
664 668
665 669 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
666 670 files = reversed(self._walk(b'', False))
667 671
668 672 changelogs = collections.defaultdict(dict)
669 673 manifestlogs = collections.defaultdict(dict)
670 674
671 675 for u, (t, s) in files:
672 676 if u.startswith(b'00changelog'):
673 677 name, ext = _split_revlog_ext(u)
674 678 changelogs[name][ext] = (t, s)
675 679 elif u.startswith(b'00manifest'):
676 680 name, ext = _split_revlog_ext(u)
677 681 manifestlogs[name][ext] = (t, s)
678 682 else:
679 683 yield SimpleStoreEntry(
680 684 unencoded_path=u,
681 685 is_volatile=bool(t & FILEFLAGS_VOLATILE),
682 686 file_size=s,
683 687 )
684 688 # yield manifest before changelog
685 689 top_rl = [
686 690 (manifestlogs, FILEFLAGS_MANIFESTLOG),
687 691 (changelogs, FILEFLAGS_CHANGELOG),
688 692 ]
689 693 assert len(manifestlogs) <= 1
690 694 assert len(changelogs) <= 1
691 695 for data, revlog_type in top_rl:
692 696 for revlog, details in sorted(data.items()):
693 697 # (keeping ordering so we get 00changelog.i last)
694 698 key = lambda x: _ext_key(x[0])
695 699 for ext, (t, s) in sorted(details.items(), key=key):
696 700 u = revlog + ext
697 701 yield RevlogStoreEntry(
698 702 unencoded_path=u,
699 703 revlog_type=revlog_type,
700 704 target_id=b'',
701 705 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
702 706 is_volatile=bool(t & FILEFLAGS_VOLATILE),
703 707 file_size=s,
704 708 )
705 709
706 710 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
707 711 """return files related to data storage (ie: revlogs)
708 712
709 713 yields (file_type, unencoded, size)
710 714
711 715 if a matcher is passed, storage files of only those tracked paths
712 716 are passed with matches the matcher
713 717 """
714 718 # yield data files first
715 719 for x in self.datafiles(matcher):
716 720 yield x
717 721 for x in self.topfiles():
718 722 yield x
719 723
720 724 def copylist(self):
721 725 return _data
722 726
723 727 def write(self, tr):
724 728 pass
725 729
726 730 def invalidatecaches(self):
727 731 pass
728 732
729 733 def markremoved(self, fn):
730 734 pass
731 735
732 736 def __contains__(self, path):
733 737 '''Checks if the store contains path'''
734 738 path = b"/".join((b"data", path))
735 739 # file?
736 740 if self.vfs.exists(path + b".i"):
737 741 return True
738 742 # dir?
739 743 if not path.endswith(b"/"):
740 744 path = path + b"/"
741 745 return self.vfs.exists(path)
742 746
743 747
744 748 class encodedstore(basicstore):
745 749 def __init__(self, path, vfstype):
746 750 vfs = vfstype(path + b'/store')
747 751 self.path = vfs.base
748 752 self.createmode = _calcmode(vfs)
749 753 vfs.createmode = self.createmode
750 754 self.rawvfs = vfs
751 755 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
752 756 self.opener = self.vfs
753 757
754 758 # note: topfiles would also need a decode phase. It is just that in
755 759 # practice we do not have any file outside of `data/` that needs encoding.
756 760 # However that might change so we should probably add a test and encoding
757 761 # decoding for it too. see issue6548
758 762
759 763 def datafiles(
760 764 self, matcher=None, undecodable=None
761 765 ) -> Generator[BaseStoreEntry, None, None]:
762 766 for entry in super(encodedstore, self).datafiles():
763 767 try:
764 768 f1 = entry.unencoded_path
765 769 f2 = decodefilename(f1)
766 770 except KeyError:
767 771 if undecodable is None:
768 772 msg = _(b'undecodable revlog name %s') % f1
769 773 raise error.StorageError(msg)
770 774 else:
771 775 undecodable.append(f1)
772 776 continue
773 777 if not _matchtrackedpath(f2, matcher):
774 778 continue
775 779 entry.unencoded_path = f2
776 780 yield entry
777 781
778 782 def join(self, f):
779 783 return self.path + b'/' + encodefilename(f)
780 784
781 785 def copylist(self):
782 786 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
783 787
784 788
785 789 class fncache:
786 790 # the filename used to be partially encoded
787 791 # hence the encodedir/decodedir dance
788 792 def __init__(self, vfs):
789 793 self.vfs = vfs
790 794 self._ignores = set()
791 795 self.entries = None
792 796 self._dirty = False
793 797 # set of new additions to fncache
794 798 self.addls = set()
795 799
796 800 def ensureloaded(self, warn=None):
797 801 """read the fncache file if not already read.
798 802
799 803 If the file on disk is corrupted, raise. If warn is provided,
800 804 warn and keep going instead."""
801 805 if self.entries is None:
802 806 self._load(warn)
803 807
804 808 def _load(self, warn=None):
805 809 '''fill the entries from the fncache file'''
806 810 self._dirty = False
807 811 try:
808 812 fp = self.vfs(b'fncache', mode=b'rb')
809 813 except IOError:
810 814 # skip nonexistent file
811 815 self.entries = set()
812 816 return
813 817
814 818 self.entries = set()
815 819 chunk = b''
816 820 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
817 821 chunk += c
818 822 try:
819 823 p = chunk.rindex(b'\n')
820 824 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
821 825 chunk = chunk[p + 1 :]
822 826 except ValueError:
823 827 # substring '\n' not found, maybe the entry is bigger than the
824 828 # chunksize, so let's keep iterating
825 829 pass
826 830
827 831 if chunk:
828 832 msg = _(b"fncache does not ends with a newline")
829 833 if warn:
830 834 warn(msg + b'\n')
831 835 else:
832 836 raise error.Abort(
833 837 msg,
834 838 hint=_(
835 839 b"use 'hg debugrebuildfncache' to "
836 840 b"rebuild the fncache"
837 841 ),
838 842 )
839 843 self._checkentries(fp, warn)
840 844 fp.close()
841 845
842 846 def _checkentries(self, fp, warn):
843 847 """make sure there is no empty string in entries"""
844 848 if b'' in self.entries:
845 849 fp.seek(0)
846 850 for n, line in enumerate(fp):
847 851 if not line.rstrip(b'\n'):
848 852 t = _(b'invalid entry in fncache, line %d') % (n + 1)
849 853 if warn:
850 854 warn(t + b'\n')
851 855 else:
852 856 raise error.Abort(t)
853 857
854 858 def write(self, tr):
855 859 if self._dirty:
856 860 assert self.entries is not None
857 861 self.entries = self.entries | self.addls
858 862 self.addls = set()
859 863 tr.addbackup(b'fncache')
860 864 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
861 865 if self.entries:
862 866 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
863 867 fp.close()
864 868 self._dirty = False
865 869 if self.addls:
866 870 # if we have just new entries, let's append them to the fncache
867 871 tr.addbackup(b'fncache')
868 872 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
869 873 if self.addls:
870 874 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
871 875 fp.close()
872 876 self.entries = None
873 877 self.addls = set()
874 878
875 879 def addignore(self, fn):
876 880 self._ignores.add(fn)
877 881
878 882 def add(self, fn):
879 883 if fn in self._ignores:
880 884 return
881 885 if self.entries is None:
882 886 self._load()
883 887 if fn not in self.entries:
884 888 self.addls.add(fn)
885 889
886 890 def remove(self, fn):
887 891 if self.entries is None:
888 892 self._load()
889 893 if fn in self.addls:
890 894 self.addls.remove(fn)
891 895 return
892 896 try:
893 897 self.entries.remove(fn)
894 898 self._dirty = True
895 899 except KeyError:
896 900 pass
897 901
898 902 def __contains__(self, fn):
899 903 if fn in self.addls:
900 904 return True
901 905 if self.entries is None:
902 906 self._load()
903 907 return fn in self.entries
904 908
905 909 def __iter__(self):
906 910 if self.entries is None:
907 911 self._load()
908 912 return iter(self.entries | self.addls)
909 913
910 914
911 915 class _fncachevfs(vfsmod.proxyvfs):
912 916 def __init__(self, vfs, fnc, encode):
913 917 vfsmod.proxyvfs.__init__(self, vfs)
914 918 self.fncache = fnc
915 919 self.encode = encode
916 920
917 921 def __call__(self, path, mode=b'r', *args, **kw):
918 922 encoded = self.encode(path)
919 923 if (
920 924 mode not in (b'r', b'rb')
921 925 and (path.startswith(b'data/') or path.startswith(b'meta/'))
922 926 and revlog_type(path) is not None
923 927 ):
924 928 # do not trigger a fncache load when adding a file that already is
925 929 # known to exist.
926 930 notload = self.fncache.entries is None and self.vfs.exists(encoded)
927 931 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
928 932 # when appending to an existing file, if the file has size zero,
929 933 # it should be considered as missing. Such zero-size files are
930 934 # the result of truncation when a transaction is aborted.
931 935 notload = False
932 936 if not notload:
933 937 self.fncache.add(path)
934 938 return self.vfs(encoded, mode, *args, **kw)
935 939
936 940 def join(self, path):
937 941 if path:
938 942 return self.vfs.join(self.encode(path))
939 943 else:
940 944 return self.vfs.join(path)
941 945
942 946 def register_file(self, path):
943 947 """generic hook point to lets fncache steer its stew"""
944 948 if path.startswith(b'data/') or path.startswith(b'meta/'):
945 949 self.fncache.add(path)
946 950
947 951
948 952 class fncachestore(basicstore):
949 953 def __init__(self, path, vfstype, dotencode):
950 954 if dotencode:
951 955 encode = _pathencode
952 956 else:
953 957 encode = _plainhybridencode
954 958 self.encode = encode
955 959 vfs = vfstype(path + b'/store')
956 960 self.path = vfs.base
957 961 self.pathsep = self.path + b'/'
958 962 self.createmode = _calcmode(vfs)
959 963 vfs.createmode = self.createmode
960 964 self.rawvfs = vfs
961 965 fnc = fncache(vfs)
962 966 self.fncache = fnc
963 967 self.vfs = _fncachevfs(vfs, fnc, encode)
964 968 self.opener = self.vfs
965 969
966 970 def join(self, f):
967 971 return self.pathsep + self.encode(f)
968 972
969 973 def getsize(self, path):
970 974 return self.rawvfs.stat(path).st_size
971 975
972 976 def datafiles(
973 977 self, matcher=None, undecodable=None
974 978 ) -> Generator[BaseStoreEntry, None, None]:
975 979 files = ((f, revlog_type(f)) for f in self.fncache)
976 980 # Note: all files in fncache should be revlog related, However the
977 981 # fncache might contains such file added by previous version of
978 982 # Mercurial.
979 983 files = (f for f in files if f[1] is not None)
980 984 by_revlog = _gather_revlog(files)
981 985 for revlog, details in by_revlog:
982 986 if revlog.startswith(b'data/'):
983 987 rl_type = FILEFLAGS_FILELOG
984 988 revlog_target_id = revlog.split(b'/', 1)[1]
985 989 elif revlog.startswith(b'meta/'):
986 990 rl_type = FILEFLAGS_MANIFESTLOG
987 991 # drop the initial directory and the `00manifest` file part
988 992 tmp = revlog.split(b'/', 1)[1]
989 993 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
990 994 else:
991 995 # unreachable
992 996 assert False, revlog
993 997 for ext, t in sorted(details.items()):
994 998 f = revlog + ext
995 999 if not _matchtrackedpath(f, matcher):
996 1000 continue
997 1001 yield RevlogStoreEntry(
998 1002 unencoded_path=f,
999 1003 revlog_type=rl_type,
1000 1004 target_id=revlog_target_id,
1001 1005 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1002 1006 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1003 1007 )
1004 1008
1005 1009 def copylist(self):
1006 1010 d = (
1007 1011 b'bookmarks',
1008 1012 b'narrowspec',
1009 1013 b'data',
1010 1014 b'meta',
1011 1015 b'dh',
1012 1016 b'fncache',
1013 1017 b'phaseroots',
1014 1018 b'obsstore',
1015 1019 b'00manifest.d',
1016 1020 b'00manifest.i',
1017 1021 b'00changelog.d',
1018 1022 b'00changelog.i',
1019 1023 b'requires',
1020 1024 )
1021 1025 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1022 1026
1023 1027 def write(self, tr):
1024 1028 self.fncache.write(tr)
1025 1029
1026 1030 def invalidatecaches(self):
1027 1031 self.fncache.entries = None
1028 1032 self.fncache.addls = set()
1029 1033
1030 1034 def markremoved(self, fn):
1031 1035 self.fncache.remove(fn)
1032 1036
1033 1037 def _exists(self, f):
1034 1038 ef = self.encode(f)
1035 1039 try:
1036 1040 self.getsize(ef)
1037 1041 return True
1038 1042 except FileNotFoundError:
1039 1043 return False
1040 1044
1041 1045 def __contains__(self, path):
1042 1046 '''Checks if the store contains path'''
1043 1047 path = b"/".join((b"data", path))
1044 1048 # check for files (exact match)
1045 1049 e = path + b'.i'
1046 1050 if e in self.fncache and self._exists(e):
1047 1051 return True
1048 1052 # now check for directories (prefix match)
1049 1053 if not path.endswith(b'/'):
1050 1054 path += b'/'
1051 1055 for e in self.fncache:
1052 1056 if e.startswith(path) and self._exists(e):
1053 1057 return True
1054 1058 return False
@@ -1,668 +1,668 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import stat
10 10
11 11 from ..i18n import _
12 12 from ..pycompat import getattr
13 13 from .. import (
14 14 changelog,
15 15 error,
16 16 filelog,
17 17 manifest,
18 18 metadata,
19 19 pycompat,
20 20 requirements,
21 21 scmutil,
22 22 store,
23 23 util,
24 24 vfs as vfsmod,
25 25 )
26 26 from ..revlogutils import (
27 27 constants as revlogconst,
28 28 flagutil,
29 29 nodemap,
30 30 sidedata as sidedatamod,
31 31 )
32 32 from . import actions as upgrade_actions
33 33
34 34
35 35 def get_sidedata_helpers(srcrepo, dstrepo):
36 36 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
37 37 sequential = pycompat.iswindows or not use_w
38 38 if not sequential:
39 39 srcrepo.register_sidedata_computer(
40 40 revlogconst.KIND_CHANGELOG,
41 41 sidedatamod.SD_FILES,
42 42 (sidedatamod.SD_FILES,),
43 43 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
44 44 flagutil.REVIDX_HASCOPIESINFO,
45 45 replace=True,
46 46 )
47 47 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
48 48
49 49
50 50 def _revlog_from_store_entry(repo, entry):
51 51 """Obtain a revlog from a repo store entry.
52 52
53 53 An instance of the appropriate class is returned.
54 54 """
55 55 if entry.revlog_type == store.FILEFLAGS_CHANGELOG:
56 56 return changelog.changelog(repo.svfs)
57 57 elif entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
58 58 mandir = entry.target_id.rstrip(b'/')
59 59 return manifest.manifestrevlog(
60 60 repo.nodeconstants, repo.svfs, tree=mandir
61 61 )
62 62 else:
63 63 return filelog.filelog(repo.svfs, entry.target_id)
64 64
65 65
66 66 def _copyrevlog(tr, destrepo, oldrl, entry):
67 67 """copy all relevant files for `oldrl` into `destrepo` store
68 68
69 69 Files are copied "as is" without any transformation. The copy is performed
70 70 without extra checks. Callers are responsible for making sure the copied
71 71 content is compatible with format of the destination repository.
72 72 """
73 73 oldrl = getattr(oldrl, '_revlog', oldrl)
74 74 newrl = _revlog_from_store_entry(destrepo, entry)
75 75 newrl = getattr(newrl, '_revlog', newrl)
76 76
77 77 oldvfs = oldrl.opener
78 78 newvfs = newrl.opener
79 79 oldindex = oldvfs.join(oldrl._indexfile)
80 80 newindex = newvfs.join(newrl._indexfile)
81 81 olddata = oldvfs.join(oldrl._datafile)
82 82 newdata = newvfs.join(newrl._datafile)
83 83
84 84 with newvfs(newrl._indexfile, b'w'):
85 85 pass # create all the directories
86 86
87 87 util.copyfile(oldindex, newindex)
88 88 copydata = oldrl.opener.exists(oldrl._datafile)
89 89 if copydata:
90 90 util.copyfile(olddata, newdata)
91 91
92 92 if entry.revlog_type & store.FILEFLAGS_FILELOG:
93 unencodedname = entry.unencoded_path
93 unencodedname = entry.main_file_path()
94 94 destrepo.svfs.fncache.add(unencodedname)
95 95 if copydata:
96 96 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
97 97
98 98
99 99 UPGRADE_CHANGELOG = b"changelog"
100 100 UPGRADE_MANIFEST = b"manifest"
101 101 UPGRADE_FILELOGS = b"all-filelogs"
102 102
103 103 UPGRADE_ALL_REVLOGS = frozenset(
104 104 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
105 105 )
106 106
107 107
108 108 def matchrevlog(revlogfilter, rl_type):
109 109 """check if a revlog is selected for cloning.
110 110
111 111 In other words, are there any updates which need to be done on revlog
112 112 or it can be blindly copied.
113 113
114 114 The store entry is checked against the passed filter"""
115 115 if rl_type & store.FILEFLAGS_CHANGELOG:
116 116 return UPGRADE_CHANGELOG in revlogfilter
117 117 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
118 118 return UPGRADE_MANIFEST in revlogfilter
119 119 assert rl_type & store.FILEFLAGS_FILELOG
120 120 return UPGRADE_FILELOGS in revlogfilter
121 121
122 122
123 123 def _perform_clone(
124 124 ui,
125 125 dstrepo,
126 126 tr,
127 127 old_revlog,
128 128 entry,
129 129 upgrade_op,
130 130 sidedata_helpers,
131 131 oncopiedrevision,
132 132 ):
133 133 """returns the new revlog object created"""
134 134 newrl = None
135 revlog_path = entry.unencoded_path
135 revlog_path = entry.main_file_path()
136 136 if matchrevlog(upgrade_op.revlogs_to_process, entry.revlog_type):
137 137 ui.note(
138 138 _(b'cloning %d revisions from %s\n')
139 139 % (len(old_revlog), revlog_path)
140 140 )
141 141 newrl = _revlog_from_store_entry(dstrepo, entry)
142 142 old_revlog.clone(
143 143 tr,
144 144 newrl,
145 145 addrevisioncb=oncopiedrevision,
146 146 deltareuse=upgrade_op.delta_reuse_mode,
147 147 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
148 148 sidedata_helpers=sidedata_helpers,
149 149 )
150 150 else:
151 151 msg = _(b'blindly copying %s containing %i revisions\n')
152 152 ui.note(msg % (revlog_path, len(old_revlog)))
153 153 _copyrevlog(tr, dstrepo, old_revlog, entry)
154 154
155 155 newrl = _revlog_from_store_entry(dstrepo, entry)
156 156 return newrl
157 157
158 158
159 159 def _clonerevlogs(
160 160 ui,
161 161 srcrepo,
162 162 dstrepo,
163 163 tr,
164 164 upgrade_op,
165 165 ):
166 166 """Copy revlogs between 2 repos."""
167 167 revcount = 0
168 168 srcsize = 0
169 169 srcrawsize = 0
170 170 dstsize = 0
171 171 fcount = 0
172 172 frevcount = 0
173 173 fsrcsize = 0
174 174 frawsize = 0
175 175 fdstsize = 0
176 176 mcount = 0
177 177 mrevcount = 0
178 178 msrcsize = 0
179 179 mrawsize = 0
180 180 mdstsize = 0
181 181 crevcount = 0
182 182 csrcsize = 0
183 183 crawsize = 0
184 184 cdstsize = 0
185 185
186 186 alldatafiles = list(srcrepo.store.walk())
187 187 # mapping of data files which needs to be cloned
188 188 # key is unencoded filename
189 189 # value is revlog_object_from_srcrepo
190 190 manifests = {}
191 191 changelogs = {}
192 192 filelogs = {}
193 193
194 194 # Perform a pass to collect metadata. This validates we can open all
195 195 # source files and allows a unified progress bar to be displayed.
196 196 for entry in alldatafiles:
197 197 if not (entry.is_revlog and entry.is_revlog_main):
198 198 continue
199 199
200 200 rl = _revlog_from_store_entry(srcrepo, entry)
201 201
202 202 info = rl.storageinfo(
203 203 exclusivefiles=True,
204 204 revisionscount=True,
205 205 trackedsize=True,
206 206 storedsize=True,
207 207 )
208 208
209 209 revcount += info[b'revisionscount'] or 0
210 210 datasize = info[b'storedsize'] or 0
211 211 rawsize = info[b'trackedsize'] or 0
212 212
213 213 srcsize += datasize
214 214 srcrawsize += rawsize
215 215
216 216 # This is for the separate progress bars.
217 217 if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
218 218 changelogs[entry.target_id] = entry
219 219 crevcount += len(rl)
220 220 csrcsize += datasize
221 221 crawsize += rawsize
222 222 elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
223 223 manifests[entry.target_id] = entry
224 224 mcount += 1
225 225 mrevcount += len(rl)
226 226 msrcsize += datasize
227 227 mrawsize += rawsize
228 228 elif entry.revlog_type & store.FILEFLAGS_FILELOG:
229 229 filelogs[entry.target_id] = entry
230 230 fcount += 1
231 231 frevcount += len(rl)
232 232 fsrcsize += datasize
233 233 frawsize += rawsize
234 234 else:
235 235 error.ProgrammingError(b'unknown revlog type')
236 236
237 237 if not revcount:
238 238 return
239 239
240 240 ui.status(
241 241 _(
242 242 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
243 243 b'%d in changelog)\n'
244 244 )
245 245 % (revcount, frevcount, mrevcount, crevcount)
246 246 )
247 247 ui.status(
248 248 _(b'migrating %s in store; %s tracked data\n')
249 249 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
250 250 )
251 251
252 252 # Used to keep track of progress.
253 253 progress = None
254 254
255 255 def oncopiedrevision(rl, rev, node):
256 256 progress.increment()
257 257
258 258 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
259 259
260 260 # Migrating filelogs
261 261 ui.status(
262 262 _(
263 263 b'migrating %d filelogs containing %d revisions '
264 264 b'(%s in store; %s tracked data)\n'
265 265 )
266 266 % (
267 267 fcount,
268 268 frevcount,
269 269 util.bytecount(fsrcsize),
270 270 util.bytecount(frawsize),
271 271 )
272 272 )
273 273 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
274 274 for target_id, entry in sorted(filelogs.items()):
275 275 oldrl = _revlog_from_store_entry(srcrepo, entry)
276 276
277 277 newrl = _perform_clone(
278 278 ui,
279 279 dstrepo,
280 280 tr,
281 281 oldrl,
282 282 entry,
283 283 upgrade_op,
284 284 sidedata_helpers,
285 285 oncopiedrevision,
286 286 )
287 287 info = newrl.storageinfo(storedsize=True)
288 288 fdstsize += info[b'storedsize'] or 0
289 289 ui.status(
290 290 _(
291 291 b'finished migrating %d filelog revisions across %d '
292 292 b'filelogs; change in size: %s\n'
293 293 )
294 294 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
295 295 )
296 296
297 297 # Migrating manifests
298 298 ui.status(
299 299 _(
300 300 b'migrating %d manifests containing %d revisions '
301 301 b'(%s in store; %s tracked data)\n'
302 302 )
303 303 % (
304 304 mcount,
305 305 mrevcount,
306 306 util.bytecount(msrcsize),
307 307 util.bytecount(mrawsize),
308 308 )
309 309 )
310 310 if progress:
311 311 progress.complete()
312 312 progress = srcrepo.ui.makeprogress(
313 313 _(b'manifest revisions'), total=mrevcount
314 314 )
315 315 for target_id, entry in sorted(manifests.items()):
316 316 oldrl = _revlog_from_store_entry(srcrepo, entry)
317 317 newrl = _perform_clone(
318 318 ui,
319 319 dstrepo,
320 320 tr,
321 321 oldrl,
322 322 entry,
323 323 upgrade_op,
324 324 sidedata_helpers,
325 325 oncopiedrevision,
326 326 )
327 327 info = newrl.storageinfo(storedsize=True)
328 328 mdstsize += info[b'storedsize'] or 0
329 329 ui.status(
330 330 _(
331 331 b'finished migrating %d manifest revisions across %d '
332 332 b'manifests; change in size: %s\n'
333 333 )
334 334 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
335 335 )
336 336
337 337 # Migrating changelog
338 338 ui.status(
339 339 _(
340 340 b'migrating changelog containing %d revisions '
341 341 b'(%s in store; %s tracked data)\n'
342 342 )
343 343 % (
344 344 crevcount,
345 345 util.bytecount(csrcsize),
346 346 util.bytecount(crawsize),
347 347 )
348 348 )
349 349 if progress:
350 350 progress.complete()
351 351 progress = srcrepo.ui.makeprogress(
352 352 _(b'changelog revisions'), total=crevcount
353 353 )
354 354 for target_id, entry in sorted(changelogs.items()):
355 355 oldrl = _revlog_from_store_entry(srcrepo, entry)
356 356 newrl = _perform_clone(
357 357 ui,
358 358 dstrepo,
359 359 tr,
360 360 oldrl,
361 361 entry,
362 362 upgrade_op,
363 363 sidedata_helpers,
364 364 oncopiedrevision,
365 365 )
366 366 info = newrl.storageinfo(storedsize=True)
367 367 cdstsize += info[b'storedsize'] or 0
368 368 progress.complete()
369 369 ui.status(
370 370 _(
371 371 b'finished migrating %d changelog revisions; change in size: '
372 372 b'%s\n'
373 373 )
374 374 % (crevcount, util.bytecount(cdstsize - csrcsize))
375 375 )
376 376
377 377 dstsize = fdstsize + mdstsize + cdstsize
378 378 ui.status(
379 379 _(
380 380 b'finished migrating %d total revisions; total change in store '
381 381 b'size: %s\n'
382 382 )
383 383 % (revcount, util.bytecount(dstsize - srcsize))
384 384 )
385 385
386 386
387 387 def _files_to_copy_post_revlog_clone(srcrepo):
388 388 """yields files which should be copied to destination after revlogs
389 389 are cloned"""
390 390 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
391 391 # don't copy revlogs as they are already cloned
392 392 if store.revlog_type(path) is not None:
393 393 continue
394 394 # Skip transaction related files.
395 395 if path.startswith(b'undo'):
396 396 continue
397 397 # Only copy regular files.
398 398 if kind != stat.S_IFREG:
399 399 continue
400 400 # Skip other skipped files.
401 401 if path in (b'lock', b'fncache'):
402 402 continue
403 403 # TODO: should we skip cache too?
404 404
405 405 yield path
406 406
407 407
408 408 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
409 409 """Replace the stores after current repository is upgraded
410 410
411 411 Creates a backup of current repository store at backup path
412 412 Replaces upgraded store files in current repo from upgraded one
413 413
414 414 Arguments:
415 415 currentrepo: repo object of current repository
416 416 upgradedrepo: repo object of the upgraded data
417 417 backupvfs: vfs object for the backup path
418 418 upgrade_op: upgrade operation object
419 419 to be used to decide what all is upgraded
420 420 """
421 421 # TODO: don't blindly rename everything in store
422 422 # There can be upgrades where store is not touched at all
423 423 if upgrade_op.backup_store:
424 424 util.rename(currentrepo.spath, backupvfs.join(b'store'))
425 425 else:
426 426 currentrepo.vfs.rmtree(b'store', forcibly=True)
427 427 util.rename(upgradedrepo.spath, currentrepo.spath)
428 428
429 429
430 430 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
431 431 """Hook point for extensions to perform additional actions during upgrade.
432 432
433 433 This function is called after revlogs and store files have been copied but
434 434 before the new store is swapped into the original location.
435 435 """
436 436
437 437
438 438 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
439 439 """Do the low-level work of upgrading a repository.
440 440
441 441 The upgrade is effectively performed as a copy between a source
442 442 repository and a temporary destination repository.
443 443
444 444 The source repository is unmodified for as long as possible so the
445 445 upgrade can abort at any time without causing loss of service for
446 446 readers and without corrupting the source repository.
447 447 """
448 448 assert srcrepo.currentwlock()
449 449 assert dstrepo.currentwlock()
450 450 backuppath = None
451 451 backupvfs = None
452 452
453 453 ui.status(
454 454 _(
455 455 b'(it is safe to interrupt this process any time before '
456 456 b'data migration completes)\n'
457 457 )
458 458 )
459 459
460 460 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
461 461 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
462 462 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
463 463 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
464 464
465 465 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
466 466 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
467 467 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
468 468 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
469 469
470 470 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
471 471 ui.status(_(b'create dirstate-tracked-hint file\n'))
472 472 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
473 473 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
474 474 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
475 475 ui.status(_(b'remove dirstate-tracked-hint file\n'))
476 476 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
477 477 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
478 478
479 479 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
480 480 return
481 481
482 482 if upgrade_op.requirements_only:
483 483 ui.status(_(b'upgrading repository requirements\n'))
484 484 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
485 485 # if there is only one action and that is persistent nodemap upgrade
486 486 # directly write the nodemap file and update requirements instead of going
487 487 # through the whole cloning process
488 488 elif (
489 489 len(upgrade_op.upgrade_actions) == 1
490 490 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
491 491 and not upgrade_op.removed_actions
492 492 ):
493 493 ui.status(
494 494 _(b'upgrading repository to use persistent nodemap feature\n')
495 495 )
496 496 with srcrepo.transaction(b'upgrade') as tr:
497 497 unfi = srcrepo.unfiltered()
498 498 cl = unfi.changelog
499 499 nodemap.persist_nodemap(tr, cl, force=True)
500 500 # we want to directly operate on the underlying revlog to force
501 501 # create a nodemap file. This is fine since this is upgrade code
502 502 # and it heavily relies on repository being revlog based
503 503 # hence accessing private attributes can be justified
504 504 nodemap.persist_nodemap(
505 505 tr, unfi.manifestlog._rootstore._revlog, force=True
506 506 )
507 507 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
508 508 elif (
509 509 len(upgrade_op.removed_actions) == 1
510 510 and [
511 511 x
512 512 for x in upgrade_op.removed_actions
513 513 if x.name == b'persistent-nodemap'
514 514 ]
515 515 and not upgrade_op.upgrade_actions
516 516 ):
517 517 ui.status(
518 518 _(b'downgrading repository to not use persistent nodemap feature\n')
519 519 )
520 520 with srcrepo.transaction(b'upgrade') as tr:
521 521 unfi = srcrepo.unfiltered()
522 522 cl = unfi.changelog
523 523 nodemap.delete_nodemap(tr, srcrepo, cl)
524 524 # check comment 20 lines above for accessing private attributes
525 525 nodemap.delete_nodemap(
526 526 tr, srcrepo, unfi.manifestlog._rootstore._revlog
527 527 )
528 528 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
529 529 else:
530 530 with dstrepo.transaction(b'upgrade') as tr:
531 531 _clonerevlogs(
532 532 ui,
533 533 srcrepo,
534 534 dstrepo,
535 535 tr,
536 536 upgrade_op,
537 537 )
538 538
539 539 # Now copy other files in the store directory.
540 540 for p in _files_to_copy_post_revlog_clone(srcrepo):
541 541 srcrepo.ui.status(_(b'copying %s\n') % p)
542 542 src = srcrepo.store.rawvfs.join(p)
543 543 dst = dstrepo.store.rawvfs.join(p)
544 544 util.copyfile(src, dst, copystat=True)
545 545
546 546 finishdatamigration(ui, srcrepo, dstrepo, requirements)
547 547
548 548 ui.status(_(b'data fully upgraded in a temporary repository\n'))
549 549
550 550 if upgrade_op.backup_store:
551 551 backuppath = pycompat.mkdtemp(
552 552 prefix=b'upgradebackup.', dir=srcrepo.path
553 553 )
554 554 backupvfs = vfsmod.vfs(backuppath)
555 555
556 556 # Make a backup of requires file first, as it is the first to be modified.
557 557 util.copyfile(
558 558 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
559 559 )
560 560
561 561 # We install an arbitrary requirement that clients must not support
562 562 # as a mechanism to lock out new clients during the data swap. This is
563 563 # better than allowing a client to continue while the repository is in
564 564 # an inconsistent state.
565 565 ui.status(
566 566 _(
567 567 b'marking source repository as being upgraded; clients will be '
568 568 b'unable to read from repository\n'
569 569 )
570 570 )
571 571 scmutil.writereporequirements(
572 572 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
573 573 )
574 574
575 575 ui.status(_(b'starting in-place swap of repository data\n'))
576 576 if upgrade_op.backup_store:
577 577 ui.status(
578 578 _(b'replaced files will be backed up at %s\n') % backuppath
579 579 )
580 580
581 581 # Now swap in the new store directory. Doing it as a rename should make
582 582 # the operation nearly instantaneous and atomic (at least in well-behaved
583 583 # environments).
584 584 ui.status(_(b'replacing store...\n'))
585 585 tstart = util.timer()
586 586 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
587 587 elapsed = util.timer() - tstart
588 588 ui.status(
589 589 _(
590 590 b'store replacement complete; repository was inconsistent for '
591 591 b'%0.1fs\n'
592 592 )
593 593 % elapsed
594 594 )
595 595
596 596 # We first write the requirements file. Any new requirements will lock
597 597 # out legacy clients.
598 598 ui.status(
599 599 _(
600 600 b'finalizing requirements file and making repository readable '
601 601 b'again\n'
602 602 )
603 603 )
604 604 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
605 605
606 606 if upgrade_op.backup_store:
607 607 # The lock file from the old store won't be removed because nothing has a
608 608 # reference to its new location. So clean it up manually. Alternatively, we
609 609 # could update srcrepo.svfs and other variables to point to the new
610 610 # location. This is simpler.
611 611 assert backupvfs is not None # help pytype
612 612 backupvfs.unlink(b'store/lock')
613 613
614 614 return backuppath
615 615
616 616
617 617 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
618 618 if upgrade_op.backup_store:
619 619 backuppath = pycompat.mkdtemp(
620 620 prefix=b'upgradebackup.', dir=srcrepo.path
621 621 )
622 622 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
623 623 backupvfs = vfsmod.vfs(backuppath)
624 624 util.copyfile(
625 625 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
626 626 )
627 627 try:
628 628 util.copyfile(
629 629 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
630 630 )
631 631 except FileNotFoundError:
632 632 # The dirstate does not exist on an empty repo or a repo with no
633 633 # revision checked out
634 634 pass
635 635
636 636 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
637 637 use_v2 = new == b'v2'
638 638 if use_v2:
639 639 # Write the requirements *before* upgrading
640 640 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
641 641
642 642 srcrepo.dirstate._map.preload()
643 643 srcrepo.dirstate._use_dirstate_v2 = use_v2
644 644 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
645 645 srcrepo.dirstate._dirty = True
646 646 try:
647 647 srcrepo.vfs.unlink(b'dirstate')
648 648 except FileNotFoundError:
649 649 # The dirstate does not exist on an empty repo or a repo with no
650 650 # revision checked out
651 651 pass
652 652
653 653 srcrepo.dirstate.write(None)
654 654 if not use_v2:
655 655 # Remove the v2 requirement *after* downgrading
656 656 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
657 657
658 658
659 659 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
660 660 if add:
661 661 srcrepo.dirstate._use_tracked_hint = True
662 662 srcrepo.dirstate._dirty = True
663 663 srcrepo.dirstate._dirty_tracked_set = True
664 664 srcrepo.dirstate.write(None)
665 665 if not add:
666 666 srcrepo.dirstate.delete_tracked_hint()
667 667
668 668 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
General Comments 0
You need to be logged in to leave comments. Login now