##// END OF EJS Templates
store: stop relying on a `revlog_type` property...
marmoute -
r51564:e06d1a77 default
parent child Browse files
Show More
@@ -1,1218 +1,1200 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 filelog,
23 23 manifest,
24 24 policy,
25 25 pycompat,
26 26 util,
27 27 vfs as vfsmod,
28 28 )
29 29 from .utils import hashutil
30 30
31 31 parsers = policy.importmod('parsers')
32 32 # how much bytes should be read from fncache in one read
33 33 # It is done to prevent loading large fncache files into memory
34 34 fncache_chunksize = 10 ** 6
35 35
36 36
37 37 def _match_tracked_entry(entry, matcher):
38 38 """parses a fncache entry and returns whether the entry is tracking a path
39 39 matched by matcher or not.
40 40
41 41 If matcher is None, returns True"""
42 42
43 43 if matcher is None:
44 44 return True
45 45 if entry.is_filelog:
46 46 return matcher(entry.target_id)
47 47 elif entry.is_manifestlog:
48 48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50 50
51 51
52 52 # This avoids a collision between a file named foo and a dir named
53 53 # foo.i or foo.d
54 54 def _encodedir(path):
55 55 """
56 56 >>> _encodedir(b'data/foo.i')
57 57 'data/foo.i'
58 58 >>> _encodedir(b'data/foo.i/bla.i')
59 59 'data/foo.i.hg/bla.i'
60 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 61 'data/foo.i.hg.hg/bla.i'
62 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 64 """
65 65 return (
66 66 path.replace(b".hg/", b".hg.hg/")
67 67 .replace(b".i/", b".i.hg/")
68 68 .replace(b".d/", b".d.hg/")
69 69 )
70 70
71 71
72 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 73
74 74
75 75 def decodedir(path):
76 76 """
77 77 >>> decodedir(b'data/foo.i')
78 78 'data/foo.i'
79 79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 80 'data/foo.i/bla.i'
81 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 82 'data/foo.i.hg/bla.i'
83 83 """
84 84 if b".hg/" not in path:
85 85 return path
86 86 return (
87 87 path.replace(b".d.hg/", b".d/")
88 88 .replace(b".i.hg/", b".i/")
89 89 .replace(b".hg.hg/", b".hg/")
90 90 )
91 91
92 92
93 93 def _reserved():
94 94 """characters that are problematic for filesystems
95 95
96 96 * ascii escapes (0..31)
97 97 * ascii hi (126..255)
98 98 * windows specials
99 99
100 100 these characters will be escaped by encodefunctions
101 101 """
102 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 103 for x in range(32):
104 104 yield x
105 105 for x in range(126, 256):
106 106 yield x
107 107 for x in winreserved:
108 108 yield x
109 109
110 110
111 111 def _buildencodefun():
112 112 """
113 113 >>> enc, dec = _buildencodefun()
114 114
115 115 >>> enc(b'nothing/special.txt')
116 116 'nothing/special.txt'
117 117 >>> dec(b'nothing/special.txt')
118 118 'nothing/special.txt'
119 119
120 120 >>> enc(b'HELLO')
121 121 '_h_e_l_l_o'
122 122 >>> dec(b'_h_e_l_l_o')
123 123 'HELLO'
124 124
125 125 >>> enc(b'hello:world?')
126 126 'hello~3aworld~3f'
127 127 >>> dec(b'hello~3aworld~3f')
128 128 'hello:world?'
129 129
130 130 >>> enc(b'the\\x07quick\\xADshot')
131 131 'the~07quick~adshot'
132 132 >>> dec(b'the~07quick~adshot')
133 133 'the\\x07quick\\xadshot'
134 134 """
135 135 e = b'_'
136 136 xchr = pycompat.bytechr
137 137 asciistr = list(map(xchr, range(127)))
138 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 139
140 140 cmap = {x: x for x in asciistr}
141 141 for x in _reserved():
142 142 cmap[xchr(x)] = b"~%02x" % x
143 143 for x in capitals + [ord(e)]:
144 144 cmap[xchr(x)] = e + xchr(x).lower()
145 145
146 146 dmap = {}
147 147 for k, v in cmap.items():
148 148 dmap[v] = k
149 149
150 150 def decode(s):
151 151 i = 0
152 152 while i < len(s):
153 153 for l in range(1, 4):
154 154 try:
155 155 yield dmap[s[i : i + l]]
156 156 i += l
157 157 break
158 158 except KeyError:
159 159 pass
160 160 else:
161 161 raise KeyError
162 162
163 163 return (
164 164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 165 lambda s: b''.join(list(decode(s))),
166 166 )
167 167
168 168
169 169 _encodefname, _decodefname = _buildencodefun()
170 170
171 171
172 172 def encodefilename(s):
173 173 """
174 174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 176 """
177 177 return _encodefname(encodedir(s))
178 178
179 179
180 180 def decodefilename(s):
181 181 """
182 182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 184 """
185 185 return decodedir(_decodefname(s))
186 186
187 187
188 188 def _buildlowerencodefun():
189 189 """
190 190 >>> f = _buildlowerencodefun()
191 191 >>> f(b'nothing/special.txt')
192 192 'nothing/special.txt'
193 193 >>> f(b'HELLO')
194 194 'hello'
195 195 >>> f(b'hello:world?')
196 196 'hello~3aworld~3f'
197 197 >>> f(b'the\\x07quick\\xADshot')
198 198 'the~07quick~adshot'
199 199 """
200 200 xchr = pycompat.bytechr
201 201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 202 for x in _reserved():
203 203 cmap[xchr(x)] = b"~%02x" % x
204 204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 205 cmap[xchr(x)] = xchr(x).lower()
206 206
207 207 def lowerencode(s):
208 208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 209
210 210 return lowerencode
211 211
212 212
213 213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 214
215 215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 218
219 219
220 220 def _auxencode(path, dotencode):
221 221 """
222 222 Encodes filenames containing names reserved by Windows or which end in
223 223 period or space. Does not touch other single reserved characters c.
224 224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 225 Additionally encodes space or period at the beginning, if dotencode is
226 226 True. Parameter path is assumed to be all lowercase.
227 227 A segment only needs encoding if a reserved name appears as a
228 228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 229 doesn't need encoding.
230 230
231 231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 232 >>> _auxencode(s.split(b'/'), True)
233 233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 235 >>> _auxencode(s.split(b'/'), False)
236 236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 237 >>> _auxencode([b'foo. '], True)
238 238 ['foo.~20']
239 239 >>> _auxencode([b' .foo'], True)
240 240 ['~20.foo']
241 241 """
242 242 for i, n in enumerate(path):
243 243 if not n:
244 244 continue
245 245 if dotencode and n[0] in b'. ':
246 246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 247 path[i] = n
248 248 else:
249 249 l = n.find(b'.')
250 250 if l == -1:
251 251 l = len(n)
252 252 if (l == 3 and n[:3] in _winres3) or (
253 253 l == 4
254 254 and n[3:4] <= b'9'
255 255 and n[3:4] >= b'1'
256 256 and n[:3] in _winres4
257 257 ):
258 258 # encode third letter ('aux' -> 'au~78')
259 259 ec = b"~%02x" % ord(n[2:3])
260 260 n = n[0:2] + ec + n[3:]
261 261 path[i] = n
262 262 if n[-1] in b'. ':
263 263 # encode last period or space ('foo...' -> 'foo..~2e')
264 264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 265 return path
266 266
267 267
268 268 _maxstorepathlen = 120
269 269 _dirprefixlen = 8
270 270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 271
272 272
273 273 def _hashencode(path, dotencode):
274 274 digest = hex(hashutil.sha1(path).digest())
275 275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 276 parts = _auxencode(le, dotencode)
277 277 basename = parts[-1]
278 278 _root, ext = os.path.splitext(basename)
279 279 sdirs = []
280 280 sdirslen = 0
281 281 for p in parts[:-1]:
282 282 d = p[:_dirprefixlen]
283 283 if d[-1] in b'. ':
284 284 # Windows can't access dirs ending in period or space
285 285 d = d[:-1] + b'_'
286 286 if sdirslen == 0:
287 287 t = len(d)
288 288 else:
289 289 t = sdirslen + 1 + len(d)
290 290 if t > _maxshortdirslen:
291 291 break
292 292 sdirs.append(d)
293 293 sdirslen = t
294 294 dirs = b'/'.join(sdirs)
295 295 if len(dirs) > 0:
296 296 dirs += b'/'
297 297 res = b'dh/' + dirs + digest + ext
298 298 spaceleft = _maxstorepathlen - len(res)
299 299 if spaceleft > 0:
300 300 filler = basename[:spaceleft]
301 301 res = b'dh/' + dirs + filler + digest + ext
302 302 return res
303 303
304 304
305 305 def _hybridencode(path, dotencode):
306 306 """encodes path with a length limit
307 307
308 308 Encodes all paths that begin with 'data/', according to the following.
309 309
310 310 Default encoding (reversible):
311 311
312 312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 313 characters are encoded as '~xx', where xx is the two digit hex code
314 314 of the character (see encodefilename).
315 315 Relevant path components consisting of Windows reserved filenames are
316 316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 317
318 318 Hashed encoding (not reversible):
319 319
320 320 If the default-encoded path is longer than _maxstorepathlen, a
321 321 non-reversible hybrid hashing of the path is done instead.
322 322 This encoding uses up to _dirprefixlen characters of all directory
323 323 levels of the lowerencoded path, but not more levels than can fit into
324 324 _maxshortdirslen.
325 325 Then follows the filler followed by the sha digest of the full path.
326 326 The filler is the beginning of the basename of the lowerencoded path
327 327 (the basename is everything after the last path separator). The filler
328 328 is as long as possible, filling in characters from the basename until
329 329 the encoded path has _maxstorepathlen characters (or all chars of the
330 330 basename have been taken).
331 331 The extension (e.g. '.i' or '.d') is preserved.
332 332
333 333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 334 encoding was used.
335 335 """
336 336 path = encodedir(path)
337 337 ef = _encodefname(path).split(b'/')
338 338 res = b'/'.join(_auxencode(ef, dotencode))
339 339 if len(res) > _maxstorepathlen:
340 340 res = _hashencode(path, dotencode)
341 341 return res
342 342
343 343
344 344 def _pathencode(path):
345 345 de = encodedir(path)
346 346 if len(path) > _maxstorepathlen:
347 347 return _hashencode(de, True)
348 348 ef = _encodefname(de).split(b'/')
349 349 res = b'/'.join(_auxencode(ef, True))
350 350 if len(res) > _maxstorepathlen:
351 351 return _hashencode(de, True)
352 352 return res
353 353
354 354
355 355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 356
357 357
358 358 def _plainhybridencode(f):
359 359 return _hybridencode(f, False)
360 360
361 361
362 362 def _calcmode(vfs):
363 363 try:
364 364 # files in .hg/ will be created using this mode
365 365 mode = vfs.stat().st_mode
366 366 # avoid some useless chmods
367 367 if (0o777 & ~util.umask) == (0o777 & mode):
368 368 mode = None
369 369 except OSError:
370 370 mode = None
371 371 return mode
372 372
373 373
374 374 _data = [
375 375 b'bookmarks',
376 376 b'narrowspec',
377 377 b'data',
378 378 b'meta',
379 379 b'00manifest.d',
380 380 b'00manifest.i',
381 381 b'00changelog.d',
382 382 b'00changelog.i',
383 383 b'phaseroots',
384 384 b'obsstore',
385 385 b'requires',
386 386 ]
387 387
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_OTHER_EXT = (
388 REVLOG_FILES_EXT = (
389 b'.i',
390 390 b'.idx',
391 391 b'.d',
392 392 b'.dat',
393 393 b'.n',
394 394 b'.nd',
395 395 b'.sda',
396 396 )
397 397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 398 REVLOG_FILES_LONG_EXT = (
399 399 b'.nd',
400 400 b'.idx',
401 401 b'.dat',
402 402 b'.sda',
403 403 )
404 404 # files that are "volatile" and might change between listing and streaming
405 405 #
406 406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 407 # deleted.
408 408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 409
410 410 # some exception to the above matching
411 411 #
412 412 # XXX This is currently not in use because of issue6542
413 413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 414
415 415
416 416 def is_revlog(f, kind, st):
417 417 if kind != stat.S_IFREG:
418 return None
419 return revlog_type(f)
418 return False
419 if f.endswith(REVLOG_FILES_EXT):
420 return True
421 return False
420 422
421 423
422 def revlog_type(f):
423 # XXX we need to filter `undo.` created by the transaction here, however
424 # being naive about it also filter revlog for `undo.*` files, leading to
425 # issue6542. So we no longer use EXCLUDED.
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 return FILEFLAGS_REVLOG_MAIN
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 t = FILETYPE_FILELOG_OTHER
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 t |= FILEFLAGS_VOLATILE
432 return t
433 return None
424 def is_revlog_file(f):
425 if f.endswith(REVLOG_FILES_EXT):
426 return True
427 return False
434 428
435 429
436 430 # the file is part of changelog data
437 431 FILEFLAGS_CHANGELOG = 1 << 13
438 432 # the file is part of manifest data
439 433 FILEFLAGS_MANIFESTLOG = 1 << 12
440 434 # the file is part of filelog data
441 435 FILEFLAGS_FILELOG = 1 << 11
442 436 # file that are not directly part of a revlog
443 437 FILEFLAGS_OTHER = 1 << 10
444 438
445 439 # the main entry point for a revlog
446 440 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 441 # a secondary file for a revlog
448 442 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 443
450 444 # files that are "volatile" and might change between listing and streaming
451 445 FILEFLAGS_VOLATILE = 1 << 20
452 446
453 447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 453 FILETYPE_OTHER = FILEFLAGS_OTHER
460 454
461 455
462 456 @attr.s(slots=True)
463 457 class StoreFile:
464 458 """a file matching a store entry"""
465 459
466 460 unencoded_path = attr.ib()
467 461 _file_size = attr.ib(default=None)
468 462 is_volatile = attr.ib(default=False)
469 463
470 464 def file_size(self, vfs):
471 465 if self._file_size is None:
472 466 if vfs is None:
473 467 msg = b"calling vfs-less file_size without prior call: %s"
474 468 msg %= self.unencoded_path
475 469 raise error.ProgrammingError(msg)
476 470 try:
477 471 self._file_size = vfs.stat(self.unencoded_path).st_size
478 472 except FileNotFoundError:
479 473 self._file_size = 0
480 474 return self._file_size
481 475
482 476 def get_stream(self, vfs, copies):
483 477 """return data "stream" information for this file
484 478
485 479 (unencoded_file_path, content_iterator, content_size)
486 480 """
487 481 size = self.file_size(None)
488 482
489 483 def get_stream():
490 484 actual_path = copies[vfs.join(self.unencoded_path)]
491 485 with open(actual_path, 'rb') as fp:
492 486 yield None # ready to stream
493 487 if size <= 65536:
494 488 yield fp.read(size)
495 489 else:
496 490 yield from util.filechunkiter(fp, limit=size)
497 491
498 492 s = get_stream()
499 493 next(s)
500 494 return (self.unencoded_path, s, size)
501 495
502 496
503 497 @attr.s(slots=True, init=False)
504 498 class BaseStoreEntry:
505 499 """An entry in the store
506 500
507 501 This is returned by `store.walk` and represent some data in the store."""
508 502
509 503 def files(self) -> List[StoreFile]:
510 504 raise NotImplementedError
511 505
512 506 def get_streams(
513 507 self,
514 508 repo=None,
515 509 vfs=None,
516 510 copies=None,
517 511 max_changeset=None,
518 512 ):
519 513 """return a list of data stream associated to files for this entry
520 514
521 515 return [(unencoded_file_path, content_iterator, content_size), …]
522 516 """
523 517 assert vfs is not None
524 518 return [f.get_stream(vfs, copies) for f in self.files()]
525 519
526 520
527 521 @attr.s(slots=True, init=False)
528 522 class SimpleStoreEntry(BaseStoreEntry):
529 523 """A generic entry in the store"""
530 524
531 525 is_revlog = False
532 526
533 527 _entry_path = attr.ib()
534 528 _is_volatile = attr.ib(default=False)
535 529 _file_size = attr.ib(default=None)
536 530 _files = attr.ib(default=None)
537 531
538 532 def __init__(
539 533 self,
540 534 entry_path,
541 535 is_volatile=False,
542 536 file_size=None,
543 537 ):
544 538 super().__init__()
545 539 self._entry_path = entry_path
546 540 self._is_volatile = is_volatile
547 541 self._file_size = file_size
548 542 self._files = None
549 543
550 544 def files(self) -> List[StoreFile]:
551 545 if self._files is None:
552 546 self._files = [
553 547 StoreFile(
554 548 unencoded_path=self._entry_path,
555 549 file_size=self._file_size,
556 550 is_volatile=self._is_volatile,
557 551 )
558 552 ]
559 553 return self._files
560 554
561 555
562 556 @attr.s(slots=True, init=False)
563 557 class RevlogStoreEntry(BaseStoreEntry):
564 558 """A revlog entry in the store"""
565 559
566 560 is_revlog = True
567 561
568 562 revlog_type = attr.ib(default=None)
569 563 target_id = attr.ib(default=None)
570 564 _path_prefix = attr.ib(default=None)
571 565 _details = attr.ib(default=None)
572 566 _files = attr.ib(default=None)
573 567
574 568 def __init__(
575 569 self,
576 570 revlog_type,
577 571 path_prefix,
578 572 target_id,
579 573 details,
580 574 ):
581 575 super().__init__()
582 576 self.revlog_type = revlog_type
583 577 self.target_id = target_id
584 578 self._path_prefix = path_prefix
585 579 assert b'.i' in details, (path_prefix, details)
586 580 self._details = details
587 581 self._files = None
588 582
589 583 @property
590 584 def is_changelog(self):
591 585 return self.revlog_type & FILEFLAGS_CHANGELOG
592 586
593 587 @property
594 588 def is_manifestlog(self):
595 589 return self.revlog_type & FILEFLAGS_MANIFESTLOG
596 590
597 591 @property
598 592 def is_filelog(self):
599 593 return self.revlog_type & FILEFLAGS_FILELOG
600 594
601 595 def main_file_path(self):
602 596 """unencoded path of the main revlog file"""
603 597 return self._path_prefix + b'.i'
604 598
605 599 def files(self) -> List[StoreFile]:
606 600 if self._files is None:
607 601 self._files = []
608 602 for ext in sorted(self._details, key=_ext_key):
609 603 path = self._path_prefix + ext
610 604 file_size = self._details[ext]
611 605 # files that are "volatile" and might change between
612 606 # listing and streaming
613 607 #
614 608 # note: the ".nd" file are nodemap data and won't "change"
615 609 # but they might be deleted.
616 610 volatile = ext.endswith(REVLOG_FILES_VOLATILE_EXT)
617 611 f = StoreFile(path, file_size, volatile)
618 612 self._files.append(f)
619 613 return self._files
620 614
621 615 def get_streams(
622 616 self,
623 617 repo=None,
624 618 vfs=None,
625 619 copies=None,
626 620 max_changeset=None,
627 621 ):
628 622 if repo is None or max_changeset is None:
629 623 return super().get_streams(
630 624 repo=repo,
631 625 vfs=vfs,
632 626 copies=copies,
633 627 max_changeset=max_changeset,
634 628 )
635 629 if any(k.endswith(b'.idx') for k in self._details.keys()):
636 630 # This use revlog-v2, ignore for now
637 631 return super().get_streams(
638 632 repo=repo,
639 633 vfs=vfs,
640 634 copies=copies,
641 635 max_changeset=max_changeset,
642 636 )
643 637 name_to_ext = {}
644 638 for ext in self._details.keys():
645 639 name_to_ext[self._path_prefix + ext] = ext
646 640 name_to_size = {}
647 641 for f in self.files():
648 642 name_to_size[f.unencoded_path] = f.file_size(None)
649 643 stream = [
650 644 f.get_stream(vfs, copies)
651 645 for f in self.files()
652 646 if name_to_ext[f.unencoded_path] not in (b'.d', b'.i')
653 647 ]
654 648
655 649 is_inline = b'.d' not in self._details
656 650
657 651 rl = self.get_revlog_instance(repo).get_revlog()
658 652 rl_stream = rl.get_streams(max_changeset, force_inline=is_inline)
659 653
660 654 for name, s, size in rl_stream:
661 655 if name_to_size.get(name, 0) != size:
662 656 msg = _(b"expected %d bytes but %d provided for %s")
663 657 msg %= name_to_size.get(name, 0), size, name
664 658 raise error.Abort(msg)
665 659 stream.extend(rl_stream)
666 660 files = self.files()
667 661 assert len(stream) == len(files), (
668 662 stream,
669 663 files,
670 664 self._path_prefix,
671 665 self.target_id,
672 666 )
673 667 return stream
674 668
675 669 def get_revlog_instance(self, repo):
676 670 """Obtain a revlog instance from this store entry
677 671
678 672 An instance of the appropriate class is returned.
679 673 """
680 674 if self.is_changelog:
681 675 return changelog.changelog(repo.svfs)
682 676 elif self.is_manifestlog:
683 677 mandir = self.target_id
684 678 return manifest.manifestrevlog(
685 679 repo.nodeconstants, repo.svfs, tree=mandir
686 680 )
687 681 else:
688 682 return filelog.filelog(repo.svfs, self.target_id)
689 683
690 684
691 685 def _gather_revlog(files_data):
692 686 """group files per revlog prefix
693 687
694 688 The returns a two level nested dict. The top level key is the revlog prefix
695 689 without extension, the second level is all the file "suffix" that were
696 690 seen for this revlog and arbitrary file data as value.
697 691 """
698 692 revlogs = collections.defaultdict(dict)
699 693 for u, value in files_data:
700 694 name, ext = _split_revlog_ext(u)
701 695 revlogs[name][ext] = value
702 696 return sorted(revlogs.items())
703 697
704 698
705 699 def _split_revlog_ext(filename):
706 700 """split the revlog file prefix from the variable extension"""
707 701 if filename.endswith(REVLOG_FILES_LONG_EXT):
708 702 char = b'-'
709 703 else:
710 704 char = b'.'
711 705 idx = filename.rfind(char)
712 706 return filename[:idx], filename[idx:]
713 707
714 708
715 709 def _ext_key(ext):
716 710 """a key to order revlog suffix
717 711
718 712 important to issue .i after other entry."""
719 713 # the only important part of this order is to keep the `.i` last.
720 714 if ext.endswith(b'.n'):
721 715 return (0, ext)
722 716 elif ext.endswith(b'.nd'):
723 717 return (10, ext)
724 718 elif ext.endswith(b'.d'):
725 719 return (20, ext)
726 720 elif ext.endswith(b'.i'):
727 721 return (50, ext)
728 722 else:
729 723 return (40, ext)
730 724
731 725
732 726 class basicstore:
733 727 '''base class for local repository stores'''
734 728
735 729 def __init__(self, path, vfstype):
736 730 vfs = vfstype(path)
737 731 self.path = vfs.base
738 732 self.createmode = _calcmode(vfs)
739 733 vfs.createmode = self.createmode
740 734 self.rawvfs = vfs
741 735 self.vfs = vfsmod.filtervfs(vfs, encodedir)
742 736 self.opener = self.vfs
743 737
744 738 def join(self, f):
745 739 return self.path + b'/' + encodedir(f)
746 740
747 741 def _walk(self, relpath, recurse, undecodable=None):
748 742 '''yields (revlog_type, unencoded, size)'''
749 743 path = self.path
750 744 if relpath:
751 745 path += b'/' + relpath
752 746 striplen = len(self.path) + 1
753 747 l = []
754 748 if self.rawvfs.isdir(path):
755 749 visit = [path]
756 750 readdir = self.rawvfs.readdir
757 751 while visit:
758 752 p = visit.pop()
759 753 for f, kind, st in readdir(p, stat=True):
760 754 fp = p + b'/' + f
761 rl_type = is_revlog(f, kind, st)
762 if rl_type is not None:
755 if is_revlog(f, kind, st):
763 756 n = util.pconvert(fp[striplen:])
764 l.append((decodedir(n), (rl_type, st.st_size)))
757 l.append((decodedir(n), st.st_size))
765 758 elif kind == stat.S_IFDIR and recurse:
766 759 visit.append(fp)
767 760
768 761 l.sort()
769 762 return l
770 763
771 764 def changelog(self, trypending, concurrencychecker=None):
772 765 return changelog.changelog(
773 766 self.vfs,
774 767 trypending=trypending,
775 768 concurrencychecker=concurrencychecker,
776 769 )
777 770
778 771 def manifestlog(self, repo, storenarrowmatch):
779 772 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
780 773 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
781 774
782 775 def data_entries(
783 776 self, matcher=None, undecodable=None
784 777 ) -> Generator[BaseStoreEntry, None, None]:
785 778 """Like walk, but excluding the changelog and root manifest.
786 779
787 780 When [undecodable] is None, revlogs names that can't be
788 781 decoded cause an exception. When it is provided, it should
789 782 be a list and the filenames that can't be decoded are added
790 783 to it instead. This is very rarely needed."""
791 784 dirs = [
792 785 (b'data', FILEFLAGS_FILELOG, False),
793 786 (b'meta', FILEFLAGS_MANIFESTLOG, True),
794 787 ]
795 788 for base_dir, rl_type, strip_filename in dirs:
796 789 files = self._walk(base_dir, True, undecodable=undecodable)
797 files = (f for f in files if f[1][0] is not None)
798 790 for revlog, details in _gather_revlog(files):
799 file_details = {}
800 791 revlog_target_id = revlog.split(b'/', 1)[1]
801 792 if strip_filename and b'/' in revlog:
802 793 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
803 794 revlog_target_id += b'/'
804 for ext, (t, size) in sorted(details.items()):
805 file_details[ext] = size
806 795 yield RevlogStoreEntry(
807 796 path_prefix=revlog,
808 797 revlog_type=rl_type,
809 798 target_id=revlog_target_id,
810 details=file_details,
799 details=details,
811 800 )
812 801
813 802 def top_entries(
814 803 self, phase=False, obsolescence=False
815 804 ) -> Generator[BaseStoreEntry, None, None]:
816 805 if phase and self.vfs.exists(b'phaseroots'):
817 806 yield SimpleStoreEntry(
818 807 entry_path=b'phaseroots',
819 808 is_volatile=True,
820 809 )
821 810
822 811 if obsolescence and self.vfs.exists(b'obsstore'):
823 812 # XXX if we had the file size it could be non-volatile
824 813 yield SimpleStoreEntry(
825 814 entry_path=b'obsstore',
826 815 is_volatile=True,
827 816 )
828 817
829 818 files = reversed(self._walk(b'', False))
830 819
831 820 changelogs = collections.defaultdict(dict)
832 821 manifestlogs = collections.defaultdict(dict)
833 822
834 for u, (t, s) in files:
823 for u, s in files:
835 824 if u.startswith(b'00changelog'):
836 825 name, ext = _split_revlog_ext(u)
837 changelogs[name][ext] = (t, s)
826 changelogs[name][ext] = s
838 827 elif u.startswith(b'00manifest'):
839 828 name, ext = _split_revlog_ext(u)
840 manifestlogs[name][ext] = (t, s)
829 manifestlogs[name][ext] = s
841 830 else:
842 831 yield SimpleStoreEntry(
843 832 entry_path=u,
844 is_volatile=bool(t & FILEFLAGS_VOLATILE),
833 is_volatile=False,
845 834 file_size=s,
846 835 )
847 836 # yield manifest before changelog
848 837 top_rl = [
849 838 (manifestlogs, FILEFLAGS_MANIFESTLOG),
850 839 (changelogs, FILEFLAGS_CHANGELOG),
851 840 ]
852 841 assert len(manifestlogs) <= 1
853 842 assert len(changelogs) <= 1
854 843 for data, revlog_type in top_rl:
855 844 for revlog, details in sorted(data.items()):
856 file_details = {}
857 for ext, (t, size) in details.items():
858 file_details[ext] = size
859 845 yield RevlogStoreEntry(
860 846 path_prefix=revlog,
861 847 revlog_type=revlog_type,
862 848 target_id=b'',
863 details=file_details,
849 details=details,
864 850 )
865 851
866 852 def walk(
867 853 self, matcher=None, phase=False, obsolescence=False
868 854 ) -> Generator[BaseStoreEntry, None, None]:
869 855 """return files related to data storage (ie: revlogs)
870 856
871 857 yields instance from BaseStoreEntry subclasses
872 858
873 859 if a matcher is passed, storage files of only those tracked paths
874 860 are passed with matches the matcher
875 861 """
876 862 # yield data files first
877 863 for x in self.data_entries(matcher):
878 864 yield x
879 865 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
880 866 yield x
881 867
882 868 def copylist(self):
883 869 return _data
884 870
885 871 def write(self, tr):
886 872 pass
887 873
888 874 def invalidatecaches(self):
889 875 pass
890 876
891 877 def markremoved(self, fn):
892 878 pass
893 879
894 880 def __contains__(self, path):
895 881 '''Checks if the store contains path'''
896 882 path = b"/".join((b"data", path))
897 883 # file?
898 884 if self.vfs.exists(path + b".i"):
899 885 return True
900 886 # dir?
901 887 if not path.endswith(b"/"):
902 888 path = path + b"/"
903 889 return self.vfs.exists(path)
904 890
905 891
906 892 class encodedstore(basicstore):
907 893 def __init__(self, path, vfstype):
908 894 vfs = vfstype(path + b'/store')
909 895 self.path = vfs.base
910 896 self.createmode = _calcmode(vfs)
911 897 vfs.createmode = self.createmode
912 898 self.rawvfs = vfs
913 899 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
914 900 self.opener = self.vfs
915 901
916 902 def _walk(self, relpath, recurse, undecodable=None):
917 903 old = super()._walk(relpath, recurse)
918 904 new = []
919 905 for f1, value in old:
920 906 try:
921 907 f2 = decodefilename(f1)
922 908 except KeyError:
923 909 if undecodable is None:
924 910 msg = _(b'undecodable revlog name %s') % f1
925 911 raise error.StorageError(msg)
926 912 else:
927 913 undecodable.append(f1)
928 914 continue
929 915 new.append((f2, value))
930 916 return new
931 917
932 918 def data_entries(
933 919 self, matcher=None, undecodable=None
934 920 ) -> Generator[BaseStoreEntry, None, None]:
935 921 entries = super(encodedstore, self).data_entries(
936 922 undecodable=undecodable
937 923 )
938 924 for entry in entries:
939 925 if _match_tracked_entry(entry, matcher):
940 926 yield entry
941 927
942 928 def join(self, f):
943 929 return self.path + b'/' + encodefilename(f)
944 930
945 931 def copylist(self):
946 932 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
947 933
948 934
949 935 class fncache:
950 936 # the filename used to be partially encoded
951 937 # hence the encodedir/decodedir dance
952 938 def __init__(self, vfs):
953 939 self.vfs = vfs
954 940 self._ignores = set()
955 941 self.entries = None
956 942 self._dirty = False
957 943 # set of new additions to fncache
958 944 self.addls = set()
959 945
960 946 def ensureloaded(self, warn=None):
961 947 """read the fncache file if not already read.
962 948
963 949 If the file on disk is corrupted, raise. If warn is provided,
964 950 warn and keep going instead."""
965 951 if self.entries is None:
966 952 self._load(warn)
967 953
968 954 def _load(self, warn=None):
969 955 '''fill the entries from the fncache file'''
970 956 self._dirty = False
971 957 try:
972 958 fp = self.vfs(b'fncache', mode=b'rb')
973 959 except IOError:
974 960 # skip nonexistent file
975 961 self.entries = set()
976 962 return
977 963
978 964 self.entries = set()
979 965 chunk = b''
980 966 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
981 967 chunk += c
982 968 try:
983 969 p = chunk.rindex(b'\n')
984 970 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
985 971 chunk = chunk[p + 1 :]
986 972 except ValueError:
987 973 # substring '\n' not found, maybe the entry is bigger than the
988 974 # chunksize, so let's keep iterating
989 975 pass
990 976
991 977 if chunk:
992 978 msg = _(b"fncache does not ends with a newline")
993 979 if warn:
994 980 warn(msg + b'\n')
995 981 else:
996 982 raise error.Abort(
997 983 msg,
998 984 hint=_(
999 985 b"use 'hg debugrebuildfncache' to "
1000 986 b"rebuild the fncache"
1001 987 ),
1002 988 )
1003 989 self._checkentries(fp, warn)
1004 990 fp.close()
1005 991
1006 992 def _checkentries(self, fp, warn):
1007 993 """make sure there is no empty string in entries"""
1008 994 if b'' in self.entries:
1009 995 fp.seek(0)
1010 996 for n, line in enumerate(fp):
1011 997 if not line.rstrip(b'\n'):
1012 998 t = _(b'invalid entry in fncache, line %d') % (n + 1)
1013 999 if warn:
1014 1000 warn(t + b'\n')
1015 1001 else:
1016 1002 raise error.Abort(t)
1017 1003
1018 1004 def write(self, tr):
1019 1005 if self._dirty:
1020 1006 assert self.entries is not None
1021 1007 self.entries = self.entries | self.addls
1022 1008 self.addls = set()
1023 1009 tr.addbackup(b'fncache')
1024 1010 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
1025 1011 if self.entries:
1026 1012 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
1027 1013 fp.close()
1028 1014 self._dirty = False
1029 1015 if self.addls:
1030 1016 # if we have just new entries, let's append them to the fncache
1031 1017 tr.addbackup(b'fncache')
1032 1018 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
1033 1019 if self.addls:
1034 1020 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
1035 1021 fp.close()
1036 1022 self.entries = None
1037 1023 self.addls = set()
1038 1024
1039 1025 def addignore(self, fn):
1040 1026 self._ignores.add(fn)
1041 1027
1042 1028 def add(self, fn):
1043 1029 if fn in self._ignores:
1044 1030 return
1045 1031 if self.entries is None:
1046 1032 self._load()
1047 1033 if fn not in self.entries:
1048 1034 self.addls.add(fn)
1049 1035
1050 1036 def remove(self, fn):
1051 1037 if self.entries is None:
1052 1038 self._load()
1053 1039 if fn in self.addls:
1054 1040 self.addls.remove(fn)
1055 1041 return
1056 1042 try:
1057 1043 self.entries.remove(fn)
1058 1044 self._dirty = True
1059 1045 except KeyError:
1060 1046 pass
1061 1047
1062 1048 def __contains__(self, fn):
1063 1049 if fn in self.addls:
1064 1050 return True
1065 1051 if self.entries is None:
1066 1052 self._load()
1067 1053 return fn in self.entries
1068 1054
1069 1055 def __iter__(self):
1070 1056 if self.entries is None:
1071 1057 self._load()
1072 1058 return iter(self.entries | self.addls)
1073 1059
1074 1060
1075 1061 class _fncachevfs(vfsmod.proxyvfs):
1076 1062 def __init__(self, vfs, fnc, encode):
1077 1063 vfsmod.proxyvfs.__init__(self, vfs)
1078 1064 self.fncache = fnc
1079 1065 self.encode = encode
1080 1066
1081 1067 def __call__(self, path, mode=b'r', *args, **kw):
1082 1068 encoded = self.encode(path)
1083 1069 if (
1084 1070 mode not in (b'r', b'rb')
1085 1071 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1086 and revlog_type(path) is not None
1072 and is_revlog_file(path)
1087 1073 ):
1088 1074 # do not trigger a fncache load when adding a file that already is
1089 1075 # known to exist.
1090 1076 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1091 1077 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1092 1078 # when appending to an existing file, if the file has size zero,
1093 1079 # it should be considered as missing. Such zero-size files are
1094 1080 # the result of truncation when a transaction is aborted.
1095 1081 notload = False
1096 1082 if not notload:
1097 1083 self.fncache.add(path)
1098 1084 return self.vfs(encoded, mode, *args, **kw)
1099 1085
1100 1086 def join(self, path):
1101 1087 if path:
1102 1088 return self.vfs.join(self.encode(path))
1103 1089 else:
1104 1090 return self.vfs.join(path)
1105 1091
1106 1092 def register_file(self, path):
1107 1093 """generic hook point to lets fncache steer its stew"""
1108 1094 if path.startswith(b'data/') or path.startswith(b'meta/'):
1109 1095 self.fncache.add(path)
1110 1096
1111 1097
1112 1098 class fncachestore(basicstore):
1113 1099 def __init__(self, path, vfstype, dotencode):
1114 1100 if dotencode:
1115 1101 encode = _pathencode
1116 1102 else:
1117 1103 encode = _plainhybridencode
1118 1104 self.encode = encode
1119 1105 vfs = vfstype(path + b'/store')
1120 1106 self.path = vfs.base
1121 1107 self.pathsep = self.path + b'/'
1122 1108 self.createmode = _calcmode(vfs)
1123 1109 vfs.createmode = self.createmode
1124 1110 self.rawvfs = vfs
1125 1111 fnc = fncache(vfs)
1126 1112 self.fncache = fnc
1127 1113 self.vfs = _fncachevfs(vfs, fnc, encode)
1128 1114 self.opener = self.vfs
1129 1115
1130 1116 def join(self, f):
1131 1117 return self.pathsep + self.encode(f)
1132 1118
1133 1119 def getsize(self, path):
1134 1120 return self.rawvfs.stat(path).st_size
1135 1121
1136 1122 def data_entries(
1137 1123 self, matcher=None, undecodable=None
1138 1124 ) -> Generator[BaseStoreEntry, None, None]:
1139 files = ((f, revlog_type(f)) for f in self.fncache)
1140 1125 # Note: all files in fncache should be revlog related, However the
1141 1126 # fncache might contains such file added by previous version of
1142 1127 # Mercurial.
1143 files = (f for f in files if f[1] is not None)
1128 files = ((f, None) for f in self.fncache if is_revlog_file(f))
1144 1129 by_revlog = _gather_revlog(files)
1145 1130 for revlog, details in by_revlog:
1146 file_details = {}
1147 1131 if revlog.startswith(b'data/'):
1148 1132 rl_type = FILEFLAGS_FILELOG
1149 1133 revlog_target_id = revlog.split(b'/', 1)[1]
1150 1134 elif revlog.startswith(b'meta/'):
1151 1135 rl_type = FILEFLAGS_MANIFESTLOG
1152 1136 # drop the initial directory and the `00manifest` file part
1153 1137 tmp = revlog.split(b'/', 1)[1]
1154 1138 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1155 1139 else:
1156 1140 # unreachable
1157 1141 assert False, revlog
1158 for ext in details:
1159 file_details[ext] = None
1160 1142 entry = RevlogStoreEntry(
1161 1143 path_prefix=revlog,
1162 1144 revlog_type=rl_type,
1163 1145 target_id=revlog_target_id,
1164 details=file_details,
1146 details=details,
1165 1147 )
1166 1148 if _match_tracked_entry(entry, matcher):
1167 1149 yield entry
1168 1150
1169 1151 def copylist(self):
1170 1152 d = (
1171 1153 b'bookmarks',
1172 1154 b'narrowspec',
1173 1155 b'data',
1174 1156 b'meta',
1175 1157 b'dh',
1176 1158 b'fncache',
1177 1159 b'phaseroots',
1178 1160 b'obsstore',
1179 1161 b'00manifest.d',
1180 1162 b'00manifest.i',
1181 1163 b'00changelog.d',
1182 1164 b'00changelog.i',
1183 1165 b'requires',
1184 1166 )
1185 1167 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1186 1168
1187 1169 def write(self, tr):
1188 1170 self.fncache.write(tr)
1189 1171
1190 1172 def invalidatecaches(self):
1191 1173 self.fncache.entries = None
1192 1174 self.fncache.addls = set()
1193 1175
1194 1176 def markremoved(self, fn):
1195 1177 self.fncache.remove(fn)
1196 1178
1197 1179 def _exists(self, f):
1198 1180 ef = self.encode(f)
1199 1181 try:
1200 1182 self.getsize(ef)
1201 1183 return True
1202 1184 except FileNotFoundError:
1203 1185 return False
1204 1186
1205 1187 def __contains__(self, path):
1206 1188 '''Checks if the store contains path'''
1207 1189 path = b"/".join((b"data", path))
1208 1190 # check for files (exact match)
1209 1191 e = path + b'.i'
1210 1192 if e in self.fncache and self._exists(e):
1211 1193 return True
1212 1194 # now check for directories (prefix match)
1213 1195 if not path.endswith(b'/'):
1214 1196 path += b'/'
1215 1197 for e in self.fncache:
1216 1198 if e.startswith(path) and self._exists(e):
1217 1199 return True
1218 1200 return False
@@ -1,649 +1,649 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import stat
10 10
11 11 from ..i18n import _
12 12 from ..pycompat import getattr
13 13 from .. import (
14 14 error,
15 15 metadata,
16 16 pycompat,
17 17 requirements,
18 18 scmutil,
19 19 store,
20 20 util,
21 21 vfs as vfsmod,
22 22 )
23 23 from ..revlogutils import (
24 24 constants as revlogconst,
25 25 flagutil,
26 26 nodemap,
27 27 sidedata as sidedatamod,
28 28 )
29 29 from . import actions as upgrade_actions
30 30
31 31
32 32 def get_sidedata_helpers(srcrepo, dstrepo):
33 33 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
34 34 sequential = pycompat.iswindows or not use_w
35 35 if not sequential:
36 36 srcrepo.register_sidedata_computer(
37 37 revlogconst.KIND_CHANGELOG,
38 38 sidedatamod.SD_FILES,
39 39 (sidedatamod.SD_FILES,),
40 40 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
41 41 flagutil.REVIDX_HASCOPIESINFO,
42 42 replace=True,
43 43 )
44 44 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
45 45
46 46
47 47 def _copyrevlog(tr, destrepo, oldrl, entry):
48 48 """copy all relevant files for `oldrl` into `destrepo` store
49 49
50 50 Files are copied "as is" without any transformation. The copy is performed
51 51 without extra checks. Callers are responsible for making sure the copied
52 52 content is compatible with format of the destination repository.
53 53 """
54 54 oldrl = getattr(oldrl, '_revlog', oldrl)
55 55 newrl = entry.get_revlog_instance(destrepo)
56 56 newrl = getattr(newrl, '_revlog', newrl)
57 57
58 58 oldvfs = oldrl.opener
59 59 newvfs = newrl.opener
60 60 oldindex = oldvfs.join(oldrl._indexfile)
61 61 newindex = newvfs.join(newrl._indexfile)
62 62 olddata = oldvfs.join(oldrl._datafile)
63 63 newdata = newvfs.join(newrl._datafile)
64 64
65 65 with newvfs(newrl._indexfile, b'w'):
66 66 pass # create all the directories
67 67
68 68 util.copyfile(oldindex, newindex)
69 69 copydata = oldrl.opener.exists(oldrl._datafile)
70 70 if copydata:
71 71 util.copyfile(olddata, newdata)
72 72
73 73 if entry.is_filelog:
74 74 unencodedname = entry.main_file_path()
75 75 destrepo.svfs.fncache.add(unencodedname)
76 76 if copydata:
77 77 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
78 78
79 79
80 80 UPGRADE_CHANGELOG = b"changelog"
81 81 UPGRADE_MANIFEST = b"manifest"
82 82 UPGRADE_FILELOGS = b"all-filelogs"
83 83
84 84 UPGRADE_ALL_REVLOGS = frozenset(
85 85 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
86 86 )
87 87
88 88
89 89 def matchrevlog(revlogfilter, entry):
90 90 """check if a revlog is selected for cloning.
91 91
92 92 In other words, are there any updates which need to be done on revlog
93 93 or it can be blindly copied.
94 94
95 95 The store entry is checked against the passed filter"""
96 96 if entry.is_changelog:
97 97 return UPGRADE_CHANGELOG in revlogfilter
98 98 elif entry.is_manifestlog:
99 99 return UPGRADE_MANIFEST in revlogfilter
100 100 assert entry.is_filelog
101 101 return UPGRADE_FILELOGS in revlogfilter
102 102
103 103
104 104 def _perform_clone(
105 105 ui,
106 106 dstrepo,
107 107 tr,
108 108 old_revlog,
109 109 entry,
110 110 upgrade_op,
111 111 sidedata_helpers,
112 112 oncopiedrevision,
113 113 ):
114 114 """returns the new revlog object created"""
115 115 newrl = None
116 116 revlog_path = entry.main_file_path()
117 117 if matchrevlog(upgrade_op.revlogs_to_process, entry):
118 118 ui.note(
119 119 _(b'cloning %d revisions from %s\n')
120 120 % (len(old_revlog), revlog_path)
121 121 )
122 122 newrl = entry.get_revlog_instance(dstrepo)
123 123 old_revlog.clone(
124 124 tr,
125 125 newrl,
126 126 addrevisioncb=oncopiedrevision,
127 127 deltareuse=upgrade_op.delta_reuse_mode,
128 128 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
129 129 sidedata_helpers=sidedata_helpers,
130 130 )
131 131 else:
132 132 msg = _(b'blindly copying %s containing %i revisions\n')
133 133 ui.note(msg % (revlog_path, len(old_revlog)))
134 134 _copyrevlog(tr, dstrepo, old_revlog, entry)
135 135
136 136 newrl = entry.get_revlog_instance(dstrepo)
137 137 return newrl
138 138
139 139
140 140 def _clonerevlogs(
141 141 ui,
142 142 srcrepo,
143 143 dstrepo,
144 144 tr,
145 145 upgrade_op,
146 146 ):
147 147 """Copy revlogs between 2 repos."""
148 148 revcount = 0
149 149 srcsize = 0
150 150 srcrawsize = 0
151 151 dstsize = 0
152 152 fcount = 0
153 153 frevcount = 0
154 154 fsrcsize = 0
155 155 frawsize = 0
156 156 fdstsize = 0
157 157 mcount = 0
158 158 mrevcount = 0
159 159 msrcsize = 0
160 160 mrawsize = 0
161 161 mdstsize = 0
162 162 crevcount = 0
163 163 csrcsize = 0
164 164 crawsize = 0
165 165 cdstsize = 0
166 166
167 167 alldatafiles = list(srcrepo.store.walk())
168 168 # mapping of data files which needs to be cloned
169 169 # key is unencoded filename
170 170 # value is revlog_object_from_srcrepo
171 171 manifests = {}
172 172 changelogs = {}
173 173 filelogs = {}
174 174
175 175 # Perform a pass to collect metadata. This validates we can open all
176 176 # source files and allows a unified progress bar to be displayed.
177 177 for entry in alldatafiles:
178 178 if not entry.is_revlog:
179 179 continue
180 180
181 181 rl = entry.get_revlog_instance(srcrepo)
182 182
183 183 info = rl.storageinfo(
184 184 exclusivefiles=True,
185 185 revisionscount=True,
186 186 trackedsize=True,
187 187 storedsize=True,
188 188 )
189 189
190 190 revcount += info[b'revisionscount'] or 0
191 191 datasize = info[b'storedsize'] or 0
192 192 rawsize = info[b'trackedsize'] or 0
193 193
194 194 srcsize += datasize
195 195 srcrawsize += rawsize
196 196
197 197 # This is for the separate progress bars.
198 198 if entry.is_changelog:
199 199 changelogs[entry.target_id] = entry
200 200 crevcount += len(rl)
201 201 csrcsize += datasize
202 202 crawsize += rawsize
203 203 elif entry.is_manifestlog:
204 204 manifests[entry.target_id] = entry
205 205 mcount += 1
206 206 mrevcount += len(rl)
207 207 msrcsize += datasize
208 208 mrawsize += rawsize
209 209 elif entry.is_filelog:
210 210 filelogs[entry.target_id] = entry
211 211 fcount += 1
212 212 frevcount += len(rl)
213 213 fsrcsize += datasize
214 214 frawsize += rawsize
215 215 else:
216 216 error.ProgrammingError(b'unknown revlog type')
217 217
218 218 if not revcount:
219 219 return
220 220
221 221 ui.status(
222 222 _(
223 223 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
224 224 b'%d in changelog)\n'
225 225 )
226 226 % (revcount, frevcount, mrevcount, crevcount)
227 227 )
228 228 ui.status(
229 229 _(b'migrating %s in store; %s tracked data\n')
230 230 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
231 231 )
232 232
233 233 # Used to keep track of progress.
234 234 progress = None
235 235
236 236 def oncopiedrevision(rl, rev, node):
237 237 progress.increment()
238 238
239 239 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
240 240
241 241 # Migrating filelogs
242 242 ui.status(
243 243 _(
244 244 b'migrating %d filelogs containing %d revisions '
245 245 b'(%s in store; %s tracked data)\n'
246 246 )
247 247 % (
248 248 fcount,
249 249 frevcount,
250 250 util.bytecount(fsrcsize),
251 251 util.bytecount(frawsize),
252 252 )
253 253 )
254 254 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
255 255 for target_id, entry in sorted(filelogs.items()):
256 256 oldrl = entry.get_revlog_instance(srcrepo)
257 257
258 258 newrl = _perform_clone(
259 259 ui,
260 260 dstrepo,
261 261 tr,
262 262 oldrl,
263 263 entry,
264 264 upgrade_op,
265 265 sidedata_helpers,
266 266 oncopiedrevision,
267 267 )
268 268 info = newrl.storageinfo(storedsize=True)
269 269 fdstsize += info[b'storedsize'] or 0
270 270 ui.status(
271 271 _(
272 272 b'finished migrating %d filelog revisions across %d '
273 273 b'filelogs; change in size: %s\n'
274 274 )
275 275 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
276 276 )
277 277
278 278 # Migrating manifests
279 279 ui.status(
280 280 _(
281 281 b'migrating %d manifests containing %d revisions '
282 282 b'(%s in store; %s tracked data)\n'
283 283 )
284 284 % (
285 285 mcount,
286 286 mrevcount,
287 287 util.bytecount(msrcsize),
288 288 util.bytecount(mrawsize),
289 289 )
290 290 )
291 291 if progress:
292 292 progress.complete()
293 293 progress = srcrepo.ui.makeprogress(
294 294 _(b'manifest revisions'), total=mrevcount
295 295 )
296 296 for target_id, entry in sorted(manifests.items()):
297 297 oldrl = entry.get_revlog_instance(srcrepo)
298 298 newrl = _perform_clone(
299 299 ui,
300 300 dstrepo,
301 301 tr,
302 302 oldrl,
303 303 entry,
304 304 upgrade_op,
305 305 sidedata_helpers,
306 306 oncopiedrevision,
307 307 )
308 308 info = newrl.storageinfo(storedsize=True)
309 309 mdstsize += info[b'storedsize'] or 0
310 310 ui.status(
311 311 _(
312 312 b'finished migrating %d manifest revisions across %d '
313 313 b'manifests; change in size: %s\n'
314 314 )
315 315 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
316 316 )
317 317
318 318 # Migrating changelog
319 319 ui.status(
320 320 _(
321 321 b'migrating changelog containing %d revisions '
322 322 b'(%s in store; %s tracked data)\n'
323 323 )
324 324 % (
325 325 crevcount,
326 326 util.bytecount(csrcsize),
327 327 util.bytecount(crawsize),
328 328 )
329 329 )
330 330 if progress:
331 331 progress.complete()
332 332 progress = srcrepo.ui.makeprogress(
333 333 _(b'changelog revisions'), total=crevcount
334 334 )
335 335 for target_id, entry in sorted(changelogs.items()):
336 336 oldrl = entry.get_revlog_instance(srcrepo)
337 337 newrl = _perform_clone(
338 338 ui,
339 339 dstrepo,
340 340 tr,
341 341 oldrl,
342 342 entry,
343 343 upgrade_op,
344 344 sidedata_helpers,
345 345 oncopiedrevision,
346 346 )
347 347 info = newrl.storageinfo(storedsize=True)
348 348 cdstsize += info[b'storedsize'] or 0
349 349 progress.complete()
350 350 ui.status(
351 351 _(
352 352 b'finished migrating %d changelog revisions; change in size: '
353 353 b'%s\n'
354 354 )
355 355 % (crevcount, util.bytecount(cdstsize - csrcsize))
356 356 )
357 357
358 358 dstsize = fdstsize + mdstsize + cdstsize
359 359 ui.status(
360 360 _(
361 361 b'finished migrating %d total revisions; total change in store '
362 362 b'size: %s\n'
363 363 )
364 364 % (revcount, util.bytecount(dstsize - srcsize))
365 365 )
366 366
367 367
368 368 def _files_to_copy_post_revlog_clone(srcrepo):
369 369 """yields files which should be copied to destination after revlogs
370 370 are cloned"""
371 371 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
372 372 # don't copy revlogs as they are already cloned
373 if store.revlog_type(path) is not None:
373 if store.is_revlog_file(path):
374 374 continue
375 375 # Skip transaction related files.
376 376 if path.startswith(b'undo'):
377 377 continue
378 378 # Only copy regular files.
379 379 if kind != stat.S_IFREG:
380 380 continue
381 381 # Skip other skipped files.
382 382 if path in (b'lock', b'fncache'):
383 383 continue
384 384 # TODO: should we skip cache too?
385 385
386 386 yield path
387 387
388 388
389 389 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
390 390 """Replace the stores after current repository is upgraded
391 391
392 392 Creates a backup of current repository store at backup path
393 393 Replaces upgraded store files in current repo from upgraded one
394 394
395 395 Arguments:
396 396 currentrepo: repo object of current repository
397 397 upgradedrepo: repo object of the upgraded data
398 398 backupvfs: vfs object for the backup path
399 399 upgrade_op: upgrade operation object
400 400 to be used to decide what all is upgraded
401 401 """
402 402 # TODO: don't blindly rename everything in store
403 403 # There can be upgrades where store is not touched at all
404 404 if upgrade_op.backup_store:
405 405 util.rename(currentrepo.spath, backupvfs.join(b'store'))
406 406 else:
407 407 currentrepo.vfs.rmtree(b'store', forcibly=True)
408 408 util.rename(upgradedrepo.spath, currentrepo.spath)
409 409
410 410
411 411 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
412 412 """Hook point for extensions to perform additional actions during upgrade.
413 413
414 414 This function is called after revlogs and store files have been copied but
415 415 before the new store is swapped into the original location.
416 416 """
417 417
418 418
419 419 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
420 420 """Do the low-level work of upgrading a repository.
421 421
422 422 The upgrade is effectively performed as a copy between a source
423 423 repository and a temporary destination repository.
424 424
425 425 The source repository is unmodified for as long as possible so the
426 426 upgrade can abort at any time without causing loss of service for
427 427 readers and without corrupting the source repository.
428 428 """
429 429 assert srcrepo.currentwlock()
430 430 assert dstrepo.currentwlock()
431 431 backuppath = None
432 432 backupvfs = None
433 433
434 434 ui.status(
435 435 _(
436 436 b'(it is safe to interrupt this process any time before '
437 437 b'data migration completes)\n'
438 438 )
439 439 )
440 440
441 441 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
442 442 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
443 443 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
444 444 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
445 445
446 446 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
447 447 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
448 448 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
449 449 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
450 450
451 451 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
452 452 ui.status(_(b'create dirstate-tracked-hint file\n'))
453 453 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
454 454 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
455 455 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
456 456 ui.status(_(b'remove dirstate-tracked-hint file\n'))
457 457 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
458 458 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
459 459
460 460 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
461 461 return
462 462
463 463 if upgrade_op.requirements_only:
464 464 ui.status(_(b'upgrading repository requirements\n'))
465 465 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
466 466 # if there is only one action and that is persistent nodemap upgrade
467 467 # directly write the nodemap file and update requirements instead of going
468 468 # through the whole cloning process
469 469 elif (
470 470 len(upgrade_op.upgrade_actions) == 1
471 471 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
472 472 and not upgrade_op.removed_actions
473 473 ):
474 474 ui.status(
475 475 _(b'upgrading repository to use persistent nodemap feature\n')
476 476 )
477 477 with srcrepo.transaction(b'upgrade') as tr:
478 478 unfi = srcrepo.unfiltered()
479 479 cl = unfi.changelog
480 480 nodemap.persist_nodemap(tr, cl, force=True)
481 481 # we want to directly operate on the underlying revlog to force
482 482 # create a nodemap file. This is fine since this is upgrade code
483 483 # and it heavily relies on repository being revlog based
484 484 # hence accessing private attributes can be justified
485 485 nodemap.persist_nodemap(
486 486 tr, unfi.manifestlog._rootstore._revlog, force=True
487 487 )
488 488 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
489 489 elif (
490 490 len(upgrade_op.removed_actions) == 1
491 491 and [
492 492 x
493 493 for x in upgrade_op.removed_actions
494 494 if x.name == b'persistent-nodemap'
495 495 ]
496 496 and not upgrade_op.upgrade_actions
497 497 ):
498 498 ui.status(
499 499 _(b'downgrading repository to not use persistent nodemap feature\n')
500 500 )
501 501 with srcrepo.transaction(b'upgrade') as tr:
502 502 unfi = srcrepo.unfiltered()
503 503 cl = unfi.changelog
504 504 nodemap.delete_nodemap(tr, srcrepo, cl)
505 505 # check comment 20 lines above for accessing private attributes
506 506 nodemap.delete_nodemap(
507 507 tr, srcrepo, unfi.manifestlog._rootstore._revlog
508 508 )
509 509 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
510 510 else:
511 511 with dstrepo.transaction(b'upgrade') as tr:
512 512 _clonerevlogs(
513 513 ui,
514 514 srcrepo,
515 515 dstrepo,
516 516 tr,
517 517 upgrade_op,
518 518 )
519 519
520 520 # Now copy other files in the store directory.
521 521 for p in _files_to_copy_post_revlog_clone(srcrepo):
522 522 srcrepo.ui.status(_(b'copying %s\n') % p)
523 523 src = srcrepo.store.rawvfs.join(p)
524 524 dst = dstrepo.store.rawvfs.join(p)
525 525 util.copyfile(src, dst, copystat=True)
526 526
527 527 finishdatamigration(ui, srcrepo, dstrepo, requirements)
528 528
529 529 ui.status(_(b'data fully upgraded in a temporary repository\n'))
530 530
531 531 if upgrade_op.backup_store:
532 532 backuppath = pycompat.mkdtemp(
533 533 prefix=b'upgradebackup.', dir=srcrepo.path
534 534 )
535 535 backupvfs = vfsmod.vfs(backuppath)
536 536
537 537 # Make a backup of requires file first, as it is the first to be modified.
538 538 util.copyfile(
539 539 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
540 540 )
541 541
542 542 # We install an arbitrary requirement that clients must not support
543 543 # as a mechanism to lock out new clients during the data swap. This is
544 544 # better than allowing a client to continue while the repository is in
545 545 # an inconsistent state.
546 546 ui.status(
547 547 _(
548 548 b'marking source repository as being upgraded; clients will be '
549 549 b'unable to read from repository\n'
550 550 )
551 551 )
552 552 scmutil.writereporequirements(
553 553 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
554 554 )
555 555
556 556 ui.status(_(b'starting in-place swap of repository data\n'))
557 557 if upgrade_op.backup_store:
558 558 ui.status(
559 559 _(b'replaced files will be backed up at %s\n') % backuppath
560 560 )
561 561
562 562 # Now swap in the new store directory. Doing it as a rename should make
563 563 # the operation nearly instantaneous and atomic (at least in well-behaved
564 564 # environments).
565 565 ui.status(_(b'replacing store...\n'))
566 566 tstart = util.timer()
567 567 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
568 568 elapsed = util.timer() - tstart
569 569 ui.status(
570 570 _(
571 571 b'store replacement complete; repository was inconsistent for '
572 572 b'%0.1fs\n'
573 573 )
574 574 % elapsed
575 575 )
576 576
577 577 # We first write the requirements file. Any new requirements will lock
578 578 # out legacy clients.
579 579 ui.status(
580 580 _(
581 581 b'finalizing requirements file and making repository readable '
582 582 b'again\n'
583 583 )
584 584 )
585 585 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
586 586
587 587 if upgrade_op.backup_store:
588 588 # The lock file from the old store won't be removed because nothing has a
589 589 # reference to its new location. So clean it up manually. Alternatively, we
590 590 # could update srcrepo.svfs and other variables to point to the new
591 591 # location. This is simpler.
592 592 assert backupvfs is not None # help pytype
593 593 backupvfs.unlink(b'store/lock')
594 594
595 595 return backuppath
596 596
597 597
598 598 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
599 599 if upgrade_op.backup_store:
600 600 backuppath = pycompat.mkdtemp(
601 601 prefix=b'upgradebackup.', dir=srcrepo.path
602 602 )
603 603 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
604 604 backupvfs = vfsmod.vfs(backuppath)
605 605 util.copyfile(
606 606 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
607 607 )
608 608 try:
609 609 util.copyfile(
610 610 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
611 611 )
612 612 except FileNotFoundError:
613 613 # The dirstate does not exist on an empty repo or a repo with no
614 614 # revision checked out
615 615 pass
616 616
617 617 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
618 618 use_v2 = new == b'v2'
619 619 if use_v2:
620 620 # Write the requirements *before* upgrading
621 621 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
622 622
623 623 srcrepo.dirstate._map.preload()
624 624 srcrepo.dirstate._use_dirstate_v2 = use_v2
625 625 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
626 626 srcrepo.dirstate._dirty = True
627 627 try:
628 628 srcrepo.vfs.unlink(b'dirstate')
629 629 except FileNotFoundError:
630 630 # The dirstate does not exist on an empty repo or a repo with no
631 631 # revision checked out
632 632 pass
633 633
634 634 srcrepo.dirstate.write(None)
635 635 if not use_v2:
636 636 # Remove the v2 requirement *after* downgrading
637 637 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
638 638
639 639
640 640 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
641 641 if add:
642 642 srcrepo.dirstate._use_tracked_hint = True
643 643 srcrepo.dirstate._dirty = True
644 644 srcrepo.dirstate._dirty_tracked_set = True
645 645 srcrepo.dirstate.write(None)
646 646 if not add:
647 647 srcrepo.dirstate.delete_tracked_hint()
648 648
649 649 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
General Comments 0
You need to be logged in to leave comments. Login now