##// END OF EJS Templates
store: use a StoreEntry object instead of tuple for store files...
marmoute -
r51364:521fec11 default
parent child Browse files
Show More
@@ -1,823 +1,824 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14 import stat
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.node import hex
18 18 from mercurial.pycompat import open
19 19
20 20 from mercurial import (
21 21 dirstate,
22 22 encoding,
23 23 error,
24 24 httpconnection,
25 25 match as matchmod,
26 26 pycompat,
27 27 requirements,
28 28 scmutil,
29 29 sparse,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33 from mercurial.utils import hashutil
34 34 from mercurial.dirstateutils import timestamp
35 35
36 36 shortname = b'.hglf'
37 37 shortnameslash = shortname + b'/'
38 38 longname = b'largefiles'
39 39
40 40 # -- Private worker functions ------------------------------------------
41 41
42 42
43 43 @contextlib.contextmanager
44 44 def lfstatus(repo, value=True):
45 45 oldvalue = getattr(repo, 'lfstatus', False)
46 46 repo.lfstatus = value
47 47 try:
48 48 yield
49 49 finally:
50 50 repo.lfstatus = oldvalue
51 51
52 52
53 53 def getminsize(ui, assumelfiles, opt, default=10):
54 54 lfsize = opt
55 55 if not lfsize and assumelfiles:
56 56 lfsize = ui.config(longname, b'minsize', default=default)
57 57 if lfsize:
58 58 try:
59 59 lfsize = float(lfsize)
60 60 except ValueError:
61 61 raise error.Abort(
62 62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 63 )
64 64 if lfsize is None:
65 65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 66 return lfsize
67 67
68 68
69 69 def link(src, dest):
70 70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 71 util.makedirs(os.path.dirname(dest))
72 72 try:
73 73 util.oslink(src, dest)
74 74 except OSError:
75 75 # if hardlinks fail, fallback on atomic copy
76 76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 77 for chunk in util.filechunkiter(srcf):
78 78 dstf.write(chunk)
79 79 os.chmod(dest, os.stat(src).st_mode)
80 80
81 81
82 82 def usercachepath(ui, hash):
83 83 """Return the correct location in the "global" largefiles cache for a file
84 84 with the given hash.
85 85 This cache is used for sharing of largefiles across repositories - both
86 86 to preserve download bandwidth and storage space."""
87 87 return os.path.join(_usercachedir(ui), hash)
88 88
89 89
90 90 def _usercachedir(ui, name=longname):
91 91 '''Return the location of the "global" largefiles cache.'''
92 92 path = ui.configpath(name, b'usercache')
93 93 if path:
94 94 return path
95 95
96 96 hint = None
97 97
98 98 if pycompat.iswindows:
99 99 appdata = encoding.environ.get(
100 100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
101 101 )
102 102 if appdata:
103 103 return os.path.join(appdata, name)
104 104
105 105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
106 106 b"LOCALAPPDATA",
107 107 b"APPDATA",
108 108 name,
109 109 )
110 110 elif pycompat.isdarwin:
111 111 home = encoding.environ.get(b'HOME')
112 112 if home:
113 113 return os.path.join(home, b'Library', b'Caches', name)
114 114
115 115 hint = _(b"define %s in the environment, or set %s.usercache") % (
116 116 b"HOME",
117 117 name,
118 118 )
119 119 elif pycompat.isposix:
120 120 path = encoding.environ.get(b'XDG_CACHE_HOME')
121 121 if path:
122 122 return os.path.join(path, name)
123 123 home = encoding.environ.get(b'HOME')
124 124 if home:
125 125 return os.path.join(home, b'.cache', name)
126 126
127 127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
128 128 b"XDG_CACHE_HOME",
129 129 b"HOME",
130 130 name,
131 131 )
132 132 else:
133 133 raise error.Abort(
134 134 _(b'unknown operating system: %s\n') % pycompat.osname
135 135 )
136 136
137 137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
138 138
139 139
140 140 def inusercache(ui, hash):
141 141 path = usercachepath(ui, hash)
142 142 return os.path.exists(path)
143 143
144 144
145 145 def findfile(repo, hash):
146 146 """Return store path of the largefile with the specified hash.
147 147 As a side effect, the file might be linked from user cache.
148 148 Return None if the file can't be found locally."""
149 149 path, exists = findstorepath(repo, hash)
150 150 if exists:
151 151 repo.ui.note(_(b'found %s in store\n') % hash)
152 152 return path
153 153 elif inusercache(repo.ui, hash):
154 154 repo.ui.note(_(b'found %s in system cache\n') % hash)
155 155 path = storepath(repo, hash)
156 156 link(usercachepath(repo.ui, hash), path)
157 157 return path
158 158 return None
159 159
160 160
161 161 class largefilesdirstate(dirstate.dirstate):
162 162 _large_file_dirstate = True
163 163 _tr_key_suffix = b'-large-files'
164 164
165 165 def __getitem__(self, key):
166 166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
167 167
168 168 def set_tracked(self, f):
169 169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
170 170
171 171 def set_untracked(self, f):
172 172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
173 173
174 174 def normal(self, f, parentfiledata=None):
175 175 # not sure if we should pass the `parentfiledata` down or throw it
176 176 # away. So throwing it away to stay on the safe side.
177 177 return super(largefilesdirstate, self).normal(unixpath(f))
178 178
179 179 def remove(self, f):
180 180 return super(largefilesdirstate, self).remove(unixpath(f))
181 181
182 182 def add(self, f):
183 183 return super(largefilesdirstate, self).add(unixpath(f))
184 184
185 185 def drop(self, f):
186 186 return super(largefilesdirstate, self).drop(unixpath(f))
187 187
188 188 def forget(self, f):
189 189 return super(largefilesdirstate, self).forget(unixpath(f))
190 190
191 191 def normallookup(self, f):
192 192 return super(largefilesdirstate, self).normallookup(unixpath(f))
193 193
194 194 def _ignore(self, f):
195 195 return False
196 196
197 197 def write(self, tr):
198 198 # (1) disable PENDING mode always
199 199 # (lfdirstate isn't yet managed as a part of the transaction)
200 200 # (2) avoid develwarn 'use dirstate.write with ....'
201 201 if tr:
202 202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
203 203 super(largefilesdirstate, self).write(None)
204 204
205 205
206 206 def openlfdirstate(ui, repo, create=True):
207 207 """
208 208 Return a dirstate object that tracks largefiles: i.e. its root is
209 209 the repo root, but it is saved in .hg/largefiles/dirstate.
210 210
211 211 If a dirstate object already exists and is being used for a 'changing_*'
212 212 context, it will be returned.
213 213 """
214 214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
215 215 if sub_dirstate is not None:
216 216 return sub_dirstate
217 217 vfs = repo.vfs
218 218 lfstoredir = longname
219 219 opener = vfsmod.vfs(vfs.join(lfstoredir))
220 220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
221 221 lfdirstate = largefilesdirstate(
222 222 opener,
223 223 ui,
224 224 repo.root,
225 225 repo.dirstate._validate,
226 226 lambda: sparse.matcher(repo),
227 227 repo.nodeconstants,
228 228 use_dirstate_v2,
229 229 )
230 230
231 231 # If the largefiles dirstate does not exist, populate and create
232 232 # it. This ensures that we create it on the first meaningful
233 233 # largefiles operation in a new clone.
234 234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
235 235 try:
236 236 with repo.wlock(wait=False), lfdirstate.changing_files(repo):
237 237 matcher = getstandinmatcher(repo)
238 238 standins = repo.dirstate.walk(
239 239 matcher, subrepos=[], unknown=False, ignored=False
240 240 )
241 241
242 242 if len(standins) > 0:
243 243 vfs.makedirs(lfstoredir)
244 244
245 245 for standin in standins:
246 246 lfile = splitstandin(standin)
247 247 lfdirstate.hacky_extension_update_file(
248 248 lfile,
249 249 p1_tracked=True,
250 250 wc_tracked=True,
251 251 possibly_dirty=True,
252 252 )
253 253 except error.LockError:
254 254 # Assume that whatever was holding the lock was important.
255 255 # If we were doing something important, we would already have
256 256 # either the lock or a largefile dirstate.
257 257 pass
258 258 return lfdirstate
259 259
260 260
261 261 def lfdirstatestatus(lfdirstate, repo):
262 262 pctx = repo[b'.']
263 263 match = matchmod.always()
264 264 unsure, s, mtime_boundary = lfdirstate.status(
265 265 match, subrepos=[], ignored=False, clean=False, unknown=False
266 266 )
267 267 modified, clean = s.modified, s.clean
268 268 wctx = repo[None]
269 269 for lfile in unsure:
270 270 try:
271 271 fctx = pctx[standin(lfile)]
272 272 except LookupError:
273 273 fctx = None
274 274 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
275 275 modified.append(lfile)
276 276 else:
277 277 clean.append(lfile)
278 278 st = wctx[lfile].lstat()
279 279 mode = st.st_mode
280 280 size = st.st_size
281 281 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
282 282 if mtime is not None:
283 283 cache_data = (mode, size, mtime)
284 284 lfdirstate.set_clean(lfile, cache_data)
285 285 return s
286 286
287 287
288 288 def listlfiles(repo, rev=None, matcher=None):
289 289 """return a list of largefiles in the working copy or the
290 290 specified changeset"""
291 291
292 292 if matcher is None:
293 293 matcher = getstandinmatcher(repo)
294 294
295 295 # ignore unknown files in working directory
296 296 return [
297 297 splitstandin(f)
298 298 for f in repo[rev].walk(matcher)
299 299 if rev is not None or repo.dirstate.get_entry(f).any_tracked
300 300 ]
301 301
302 302
303 303 def instore(repo, hash, forcelocal=False):
304 304 '''Return true if a largefile with the given hash exists in the store'''
305 305 return os.path.exists(storepath(repo, hash, forcelocal))
306 306
307 307
308 308 def storepath(repo, hash, forcelocal=False):
309 309 """Return the correct location in the repository largefiles store for a
310 310 file with the given hash."""
311 311 if not forcelocal and repo.shared():
312 312 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
313 313 return repo.vfs.join(longname, hash)
314 314
315 315
316 316 def findstorepath(repo, hash):
317 317 """Search through the local store path(s) to find the file for the given
318 318 hash. If the file is not found, its path in the primary store is returned.
319 319 The return value is a tuple of (path, exists(path)).
320 320 """
321 321 # For shared repos, the primary store is in the share source. But for
322 322 # backward compatibility, force a lookup in the local store if it wasn't
323 323 # found in the share source.
324 324 path = storepath(repo, hash, False)
325 325
326 326 if instore(repo, hash):
327 327 return (path, True)
328 328 elif repo.shared() and instore(repo, hash, True):
329 329 return storepath(repo, hash, True), True
330 330
331 331 return (path, False)
332 332
333 333
334 334 def copyfromcache(repo, hash, filename):
335 335 """Copy the specified largefile from the repo or system cache to
336 336 filename in the repository. Return true on success or false if the
337 337 file was not found in either cache (which should not happened:
338 338 this is meant to be called only after ensuring that the needed
339 339 largefile exists in the cache)."""
340 340 wvfs = repo.wvfs
341 341 path = findfile(repo, hash)
342 342 if path is None:
343 343 return False
344 344 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
345 345 # The write may fail before the file is fully written, but we
346 346 # don't use atomic writes in the working copy.
347 347 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
348 348 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
349 349 if gothash != hash:
350 350 repo.ui.warn(
351 351 _(b'%s: data corruption in %s with hash %s\n')
352 352 % (filename, path, gothash)
353 353 )
354 354 wvfs.unlink(filename)
355 355 return False
356 356 return True
357 357
358 358
359 359 def copytostore(repo, ctx, file, fstandin):
360 360 wvfs = repo.wvfs
361 361 hash = readasstandin(ctx[fstandin])
362 362 if instore(repo, hash):
363 363 return
364 364 if wvfs.exists(file):
365 365 copytostoreabsolute(repo, wvfs.join(file), hash)
366 366 else:
367 367 repo.ui.warn(
368 368 _(b"%s: largefile %s not available from local store\n")
369 369 % (file, hash)
370 370 )
371 371
372 372
373 373 def copyalltostore(repo, node):
374 374 '''Copy all largefiles in a given revision to the store'''
375 375
376 376 ctx = repo[node]
377 377 for filename in ctx.files():
378 378 realfile = splitstandin(filename)
379 379 if realfile is not None and filename in ctx.manifest():
380 380 copytostore(repo, ctx, realfile, filename)
381 381
382 382
383 383 def copytostoreabsolute(repo, file, hash):
384 384 if inusercache(repo.ui, hash):
385 385 link(usercachepath(repo.ui, hash), storepath(repo, hash))
386 386 else:
387 387 util.makedirs(os.path.dirname(storepath(repo, hash)))
388 388 with open(file, b'rb') as srcf:
389 389 with util.atomictempfile(
390 390 storepath(repo, hash), createmode=repo.store.createmode
391 391 ) as dstf:
392 392 for chunk in util.filechunkiter(srcf):
393 393 dstf.write(chunk)
394 394 linktousercache(repo, hash)
395 395
396 396
397 397 def linktousercache(repo, hash):
398 398 """Link / copy the largefile with the specified hash from the store
399 399 to the cache."""
400 400 path = usercachepath(repo.ui, hash)
401 401 link(storepath(repo, hash), path)
402 402
403 403
404 404 def getstandinmatcher(repo, rmatcher=None):
405 405 '''Return a match object that applies rmatcher to the standin directory'''
406 406 wvfs = repo.wvfs
407 407 standindir = shortname
408 408
409 409 # no warnings about missing files or directories
410 410 badfn = lambda f, msg: None
411 411
412 412 if rmatcher and not rmatcher.always():
413 413 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
414 414 if not pats:
415 415 pats = [wvfs.join(standindir)]
416 416 match = scmutil.match(repo[None], pats, badfn=badfn)
417 417 else:
418 418 # no patterns: relative to repo root
419 419 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
420 420 return match
421 421
422 422
423 423 def composestandinmatcher(repo, rmatcher):
424 424 """Return a matcher that accepts standins corresponding to the
425 425 files accepted by rmatcher. Pass the list of files in the matcher
426 426 as the paths specified by the user."""
427 427 smatcher = getstandinmatcher(repo, rmatcher)
428 428 isstandin = smatcher.matchfn
429 429
430 430 def composedmatchfn(f):
431 431 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
432 432
433 433 smatcher.matchfn = composedmatchfn
434 434
435 435 return smatcher
436 436
437 437
438 438 def standin(filename):
439 439 """Return the repo-relative path to the standin for the specified big
440 440 file."""
441 441 # Notes:
442 442 # 1) Some callers want an absolute path, but for instance addlargefiles
443 443 # needs it repo-relative so it can be passed to repo[None].add(). So
444 444 # leave it up to the caller to use repo.wjoin() to get an absolute path.
445 445 # 2) Join with '/' because that's what dirstate always uses, even on
446 446 # Windows. Change existing separator to '/' first in case we are
447 447 # passed filenames from an external source (like the command line).
448 448 return shortnameslash + util.pconvert(filename)
449 449
450 450
451 451 def isstandin(filename):
452 452 """Return true if filename is a big file standin. filename must be
453 453 in Mercurial's internal form (slash-separated)."""
454 454 return filename.startswith(shortnameslash)
455 455
456 456
457 457 def splitstandin(filename):
458 458 # Split on / because that's what dirstate always uses, even on Windows.
459 459 # Change local separator to / first just in case we are passed filenames
460 460 # from an external source (like the command line).
461 461 bits = util.pconvert(filename).split(b'/', 1)
462 462 if len(bits) == 2 and bits[0] == shortname:
463 463 return bits[1]
464 464 else:
465 465 return None
466 466
467 467
468 468 def updatestandin(repo, lfile, standin):
469 469 """Re-calculate hash value of lfile and write it into standin
470 470
471 471 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
472 472 """
473 473 file = repo.wjoin(lfile)
474 474 if repo.wvfs.exists(lfile):
475 475 hash = hashfile(file)
476 476 executable = getexecutable(file)
477 477 writestandin(repo, standin, hash, executable)
478 478 else:
479 479 raise error.Abort(_(b'%s: file not found!') % lfile)
480 480
481 481
482 482 def readasstandin(fctx):
483 483 """read hex hash from given filectx of standin file
484 484
485 485 This encapsulates how "standin" data is stored into storage layer."""
486 486 return fctx.data().strip()
487 487
488 488
489 489 def writestandin(repo, standin, hash, executable):
490 490 '''write hash to <repo.root>/<standin>'''
491 491 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
492 492
493 493
494 494 def copyandhash(instream, outfile):
495 495 """Read bytes from instream (iterable) and write them to outfile,
496 496 computing the SHA-1 hash of the data along the way. Return the hash."""
497 497 hasher = hashutil.sha1(b'')
498 498 for data in instream:
499 499 hasher.update(data)
500 500 outfile.write(data)
501 501 return hex(hasher.digest())
502 502
503 503
504 504 def hashfile(file):
505 505 if not os.path.exists(file):
506 506 return b''
507 507 with open(file, b'rb') as fd:
508 508 return hexsha1(fd)
509 509
510 510
511 511 def getexecutable(filename):
512 512 mode = os.stat(filename).st_mode
513 513 return (
514 514 (mode & stat.S_IXUSR)
515 515 and (mode & stat.S_IXGRP)
516 516 and (mode & stat.S_IXOTH)
517 517 )
518 518
519 519
520 520 def urljoin(first, second, *arg):
521 521 def join(left, right):
522 522 if not left.endswith(b'/'):
523 523 left += b'/'
524 524 if right.startswith(b'/'):
525 525 right = right[1:]
526 526 return left + right
527 527
528 528 url = join(first, second)
529 529 for a in arg:
530 530 url = join(url, a)
531 531 return url
532 532
533 533
534 534 def hexsha1(fileobj):
535 535 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
536 536 object data"""
537 537 h = hashutil.sha1()
538 538 for chunk in util.filechunkiter(fileobj):
539 539 h.update(chunk)
540 540 return hex(h.digest())
541 541
542 542
543 543 def httpsendfile(ui, filename):
544 544 return httpconnection.httpsendfile(ui, filename, b'rb')
545 545
546 546
547 547 def unixpath(path):
548 548 '''Return a version of path normalized for use with the lfdirstate.'''
549 549 return util.pconvert(os.path.normpath(path))
550 550
551 551
552 552 def islfilesrepo(repo):
553 553 '''Return true if the repo is a largefile repo.'''
554 554 if b'largefiles' in repo.requirements and any(
555 shortnameslash in f[1] for f in repo.store.datafiles()
555 shortnameslash in entry.unencoded_path
556 for entry in repo.store.datafiles()
556 557 ):
557 558 return True
558 559
559 560 return any(openlfdirstate(repo.ui, repo, False))
560 561
561 562
562 563 class storeprotonotcapable(Exception):
563 564 def __init__(self, storetypes):
564 565 self.storetypes = storetypes
565 566
566 567
567 568 def getstandinsstate(repo):
568 569 standins = []
569 570 matcher = getstandinmatcher(repo)
570 571 wctx = repo[None]
571 572 for standin in repo.dirstate.walk(
572 573 matcher, subrepos=[], unknown=False, ignored=False
573 574 ):
574 575 lfile = splitstandin(standin)
575 576 try:
576 577 hash = readasstandin(wctx[standin])
577 578 except IOError:
578 579 hash = None
579 580 standins.append((lfile, hash))
580 581 return standins
581 582
582 583
583 584 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
584 585 lfstandin = standin(lfile)
585 586 if lfstandin not in repo.dirstate:
586 587 lfdirstate.hacky_extension_update_file(
587 588 lfile,
588 589 p1_tracked=False,
589 590 wc_tracked=False,
590 591 )
591 592 else:
592 593 entry = repo.dirstate.get_entry(lfstandin)
593 594 lfdirstate.hacky_extension_update_file(
594 595 lfile,
595 596 wc_tracked=entry.tracked,
596 597 p1_tracked=entry.p1_tracked,
597 598 p2_info=entry.p2_info,
598 599 possibly_dirty=True,
599 600 )
600 601
601 602
602 603 def markcommitted(orig, ctx, node):
603 604 repo = ctx.repo()
604 605
605 606 with repo.dirstate.changing_parents(repo):
606 607 orig(node)
607 608
608 609 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
609 610 # because files coming from the 2nd parent are omitted in the latter.
610 611 #
611 612 # The former should be used to get targets of "synclfdirstate",
612 613 # because such files:
613 614 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
614 615 # - have to be marked as "n" after commit, but
615 616 # - aren't listed in "repo[node].files()"
616 617
617 618 lfdirstate = openlfdirstate(repo.ui, repo)
618 619 for f in ctx.files():
619 620 lfile = splitstandin(f)
620 621 if lfile is not None:
621 622 synclfdirstate(repo, lfdirstate, lfile, False)
622 623
623 624 # As part of committing, copy all of the largefiles into the cache.
624 625 #
625 626 # Using "node" instead of "ctx" implies additional "repo[node]"
626 627 # lookup while copyalltostore(), but can omit redundant check for
627 628 # files comming from the 2nd parent, which should exist in store
628 629 # at merging.
629 630 copyalltostore(repo, node)
630 631
631 632
632 633 def getlfilestoupdate(oldstandins, newstandins):
633 634 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
634 635 filelist = []
635 636 for f in changedstandins:
636 637 if f[0] not in filelist:
637 638 filelist.append(f[0])
638 639 return filelist
639 640
640 641
641 642 def getlfilestoupload(repo, missing, addfunc):
642 643 makeprogress = repo.ui.makeprogress
643 644 with makeprogress(
644 645 _(b'finding outgoing largefiles'),
645 646 unit=_(b'revisions'),
646 647 total=len(missing),
647 648 ) as progress:
648 649 for i, n in enumerate(missing):
649 650 progress.update(i)
650 651 parents = [p for p in repo[n].parents() if p != repo.nullid]
651 652
652 653 with lfstatus(repo, value=False):
653 654 ctx = repo[n]
654 655
655 656 files = set(ctx.files())
656 657 if len(parents) == 2:
657 658 mc = ctx.manifest()
658 659 mp1 = ctx.p1().manifest()
659 660 mp2 = ctx.p2().manifest()
660 661 for f in mp1:
661 662 if f not in mc:
662 663 files.add(f)
663 664 for f in mp2:
664 665 if f not in mc:
665 666 files.add(f)
666 667 for f in mc:
667 668 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
668 669 files.add(f)
669 670 for fn in files:
670 671 if isstandin(fn) and fn in ctx:
671 672 addfunc(fn, readasstandin(ctx[fn]))
672 673
673 674
674 675 def updatestandinsbymatch(repo, match):
675 676 """Update standins in the working directory according to specified match
676 677
677 678 This returns (possibly modified) ``match`` object to be used for
678 679 subsequent commit process.
679 680 """
680 681
681 682 ui = repo.ui
682 683
683 684 # Case 1: user calls commit with no specific files or
684 685 # include/exclude patterns: refresh and commit all files that
685 686 # are "dirty".
686 687 if match is None or match.always():
687 688 # Spend a bit of time here to get a list of files we know
688 689 # are modified so we can compare only against those.
689 690 # It can cost a lot of time (several seconds)
690 691 # otherwise to update all standins if the largefiles are
691 692 # large.
692 693 dirtymatch = matchmod.always()
693 694 with repo.dirstate.running_status(repo):
694 695 lfdirstate = openlfdirstate(ui, repo)
695 696 unsure, s, mtime_boundary = lfdirstate.status(
696 697 dirtymatch,
697 698 subrepos=[],
698 699 ignored=False,
699 700 clean=False,
700 701 unknown=False,
701 702 )
702 703 modifiedfiles = unsure + s.modified + s.added + s.removed
703 704 lfiles = listlfiles(repo)
704 705 # this only loops through largefiles that exist (not
705 706 # removed/renamed)
706 707 for lfile in lfiles:
707 708 if lfile in modifiedfiles:
708 709 fstandin = standin(lfile)
709 710 if repo.wvfs.exists(fstandin):
710 711 # this handles the case where a rebase is being
711 712 # performed and the working copy is not updated
712 713 # yet.
713 714 if repo.wvfs.exists(lfile):
714 715 updatestandin(repo, lfile, fstandin)
715 716
716 717 return match
717 718
718 719 lfiles = listlfiles(repo)
719 720 match._files = repo._subdirlfs(match.files(), lfiles)
720 721
721 722 # Case 2: user calls commit with specified patterns: refresh
722 723 # any matching big files.
723 724 smatcher = composestandinmatcher(repo, match)
724 725 standins = repo.dirstate.walk(
725 726 smatcher, subrepos=[], unknown=False, ignored=False
726 727 )
727 728
728 729 # No matching big files: get out of the way and pass control to
729 730 # the usual commit() method.
730 731 if not standins:
731 732 return match
732 733
733 734 # Refresh all matching big files. It's possible that the
734 735 # commit will end up failing, in which case the big files will
735 736 # stay refreshed. No harm done: the user modified them and
736 737 # asked to commit them, so sooner or later we're going to
737 738 # refresh the standins. Might as well leave them refreshed.
738 739 lfdirstate = openlfdirstate(ui, repo)
739 740 for fstandin in standins:
740 741 lfile = splitstandin(fstandin)
741 742 if lfdirstate.get_entry(lfile).tracked:
742 743 updatestandin(repo, lfile, fstandin)
743 744
744 745 # Cook up a new matcher that only matches regular files or
745 746 # standins corresponding to the big files requested by the
746 747 # user. Have to modify _files to prevent commit() from
747 748 # complaining "not tracked" for big files.
748 749 match = copy.copy(match)
749 750 origmatchfn = match.matchfn
750 751
751 752 # Check both the list of largefiles and the list of
752 753 # standins because if a largefile was removed, it
753 754 # won't be in the list of largefiles at this point
754 755 match._files += sorted(standins)
755 756
756 757 actualfiles = []
757 758 for f in match._files:
758 759 fstandin = standin(f)
759 760
760 761 # For largefiles, only one of the normal and standin should be
761 762 # committed (except if one of them is a remove). In the case of a
762 763 # standin removal, drop the normal file if it is unknown to dirstate.
763 764 # Thus, skip plain largefile names but keep the standin.
764 765 if f in lfiles or fstandin in standins:
765 766 if not repo.dirstate.get_entry(fstandin).removed:
766 767 if not repo.dirstate.get_entry(f).removed:
767 768 continue
768 769 elif not repo.dirstate.get_entry(f).any_tracked:
769 770 continue
770 771
771 772 actualfiles.append(f)
772 773 match._files = actualfiles
773 774
774 775 def matchfn(f):
775 776 if origmatchfn(f):
776 777 return f not in lfiles
777 778 else:
778 779 return f in standins
779 780
780 781 match.matchfn = matchfn
781 782
782 783 return match
783 784
784 785
785 786 class automatedcommithook:
786 787 """Stateful hook to update standins at the 1st commit of resuming
787 788
788 789 For efficiency, updating standins in the working directory should
789 790 be avoided while automated committing (like rebase, transplant and
790 791 so on), because they should be updated before committing.
791 792
792 793 But the 1st commit of resuming automated committing (e.g. ``rebase
793 794 --continue``) should update them, because largefiles may be
794 795 modified manually.
795 796 """
796 797
797 798 def __init__(self, resuming):
798 799 self.resuming = resuming
799 800
800 801 def __call__(self, repo, match):
801 802 if self.resuming:
802 803 self.resuming = False # avoids updating at subsequent commits
803 804 return updatestandinsbymatch(repo, match)
804 805 else:
805 806 return match
806 807
807 808
808 809 def getstatuswriter(ui, repo, forcibly=None):
809 810 """Return the function to write largefiles specific status out
810 811
811 812 If ``forcibly`` is ``None``, this returns the last element of
812 813 ``repo._lfstatuswriters`` as "default" writer function.
813 814
814 815 Otherwise, this returns the function to always write out (or
815 816 ignore if ``not forcibly``) status.
816 817 """
817 818 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
818 819 return repo._lfstatuswriters[-1]
819 820 else:
820 821 if forcibly:
821 822 return ui.status # forcibly WRITE OUT
822 823 else:
823 824 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,469 +1,470 b''
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''setup for largefiles repositories: reposetup'''
10 10
11 11 import copy
12 12
13 13 from mercurial.i18n import _
14 14
15 15 from mercurial import (
16 16 error,
17 17 extensions,
18 18 localrepo,
19 19 match as matchmod,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 from mercurial.dirstateutils import timestamp
25 25
26 26 from . import (
27 27 lfcommands,
28 28 lfutil,
29 29 )
30 30
31 31
32 32 def reposetup(ui, repo):
33 33 # wire repositories should be given new wireproto functions
34 34 # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
35 35 if not repo.local():
36 36 return
37 37
38 38 class lfilesrepo(repo.__class__):
39 39 # the mark to examine whether "repo" object enables largefiles or not
40 40 _largefilesenabled = True
41 41
42 42 lfstatus = False
43 43
44 44 # When lfstatus is set, return a context that gives the names
45 45 # of largefiles instead of their corresponding standins and
46 46 # identifies the largefiles as always binary, regardless of
47 47 # their actual contents.
48 48 def __getitem__(self, changeid):
49 49 ctx = super(lfilesrepo, self).__getitem__(changeid)
50 50 if self.lfstatus:
51 51
52 52 def files(orig):
53 53 filenames = orig()
54 54 return [lfutil.splitstandin(f) or f for f in filenames]
55 55
56 56 extensions.wrapfunction(ctx, 'files', files)
57 57
58 58 def manifest(orig):
59 59 man1 = orig()
60 60
61 61 class lfilesmanifest(man1.__class__):
62 62 def __contains__(self, filename):
63 63 orig = super(lfilesmanifest, self).__contains__
64 64 return orig(filename) or orig(
65 65 lfutil.standin(filename)
66 66 )
67 67
68 68 man1.__class__ = lfilesmanifest
69 69 return man1
70 70
71 71 extensions.wrapfunction(ctx, 'manifest', manifest)
72 72
73 73 def filectx(orig, path, fileid=None, filelog=None):
74 74 try:
75 75 if filelog is not None:
76 76 result = orig(path, fileid, filelog)
77 77 else:
78 78 result = orig(path, fileid)
79 79 except error.LookupError:
80 80 # Adding a null character will cause Mercurial to
81 81 # identify this as a binary file.
82 82 if filelog is not None:
83 83 result = orig(lfutil.standin(path), fileid, filelog)
84 84 else:
85 85 result = orig(lfutil.standin(path), fileid)
86 86 olddata = result.data
87 87 result.data = lambda: olddata() + b'\0'
88 88 return result
89 89
90 90 extensions.wrapfunction(ctx, 'filectx', filectx)
91 91
92 92 return ctx
93 93
94 94 # Figure out the status of big files and insert them into the
95 95 # appropriate list in the result. Also removes standin files
96 96 # from the listing. Revert to the original status if
97 97 # self.lfstatus is False.
98 98 # XXX large file status is buggy when used on repo proxy.
99 99 # XXX this needs to be investigated.
100 100 @localrepo.unfilteredmethod
101 101 def status(
102 102 self,
103 103 node1=b'.',
104 104 node2=None,
105 105 match=None,
106 106 ignored=False,
107 107 clean=False,
108 108 unknown=False,
109 109 listsubrepos=False,
110 110 ):
111 111 listignored, listclean, listunknown = ignored, clean, unknown
112 112 orig = super(lfilesrepo, self).status
113 113 if not self.lfstatus:
114 114 return orig(
115 115 node1,
116 116 node2,
117 117 match,
118 118 listignored,
119 119 listclean,
120 120 listunknown,
121 121 listsubrepos,
122 122 )
123 123
124 124 # some calls in this function rely on the old version of status
125 125 self.lfstatus = False
126 126 ctx1 = self[node1]
127 127 ctx2 = self[node2]
128 128 working = ctx2.rev() is None
129 129 parentworking = working and ctx1 == self[b'.']
130 130
131 131 if match is None:
132 132 match = matchmod.always()
133 133
134 134 try:
135 135 # updating the dirstate is optional
136 136 # so we don't wait on the lock
137 137 wlock = self.wlock(False)
138 138 gotlock = True
139 139 except error.LockError:
140 140 wlock = util.nullcontextmanager()
141 141 gotlock = False
142 142 with wlock, self.dirstate.running_status(self):
143 143
144 144 # First check if paths or patterns were specified on the
145 145 # command line. If there were, and they don't match any
146 146 # largefiles, we should just bail here and let super
147 147 # handle it -- thus gaining a big performance boost.
148 148 lfdirstate = lfutil.openlfdirstate(ui, self)
149 149 if not match.always():
150 150 for f in lfdirstate:
151 151 if match(f):
152 152 break
153 153 else:
154 154 return orig(
155 155 node1,
156 156 node2,
157 157 match,
158 158 listignored,
159 159 listclean,
160 160 listunknown,
161 161 listsubrepos,
162 162 )
163 163
164 164 # Create a copy of match that matches standins instead
165 165 # of largefiles.
166 166 def tostandins(files):
167 167 if not working:
168 168 return files
169 169 newfiles = []
170 170 dirstate = self.dirstate
171 171 for f in files:
172 172 sf = lfutil.standin(f)
173 173 if sf in dirstate:
174 174 newfiles.append(sf)
175 175 elif dirstate.hasdir(sf):
176 176 # Directory entries could be regular or
177 177 # standin, check both
178 178 newfiles.extend((f, sf))
179 179 else:
180 180 newfiles.append(f)
181 181 return newfiles
182 182
183 183 m = copy.copy(match)
184 184 m._files = tostandins(m._files)
185 185
186 186 result = orig(
187 187 node1, node2, m, ignored, clean, unknown, listsubrepos
188 188 )
189 189 if working:
190 190
191 191 def sfindirstate(f):
192 192 sf = lfutil.standin(f)
193 193 dirstate = self.dirstate
194 194 return sf in dirstate or dirstate.hasdir(sf)
195 195
196 196 match._files = [f for f in match._files if sfindirstate(f)]
197 197 # Don't waste time getting the ignored and unknown
198 198 # files from lfdirstate
199 199 unsure, s, mtime_boundary = lfdirstate.status(
200 200 match,
201 201 subrepos=[],
202 202 ignored=False,
203 203 clean=listclean,
204 204 unknown=False,
205 205 )
206 206 (modified, added, removed, deleted, clean) = (
207 207 s.modified,
208 208 s.added,
209 209 s.removed,
210 210 s.deleted,
211 211 s.clean,
212 212 )
213 213 if parentworking:
214 214 wctx = repo[None]
215 215 for lfile in unsure:
216 216 standin = lfutil.standin(lfile)
217 217 if standin not in ctx1:
218 218 # from second parent
219 219 modified.append(lfile)
220 220 elif lfutil.readasstandin(
221 221 ctx1[standin]
222 222 ) != lfutil.hashfile(self.wjoin(lfile)):
223 223 modified.append(lfile)
224 224 else:
225 225 if listclean:
226 226 clean.append(lfile)
227 227 s = wctx[lfile].lstat()
228 228 mode = s.st_mode
229 229 size = s.st_size
230 230 mtime = timestamp.reliable_mtime_of(
231 231 s, mtime_boundary
232 232 )
233 233 if mtime is not None:
234 234 cache_data = (mode, size, mtime)
235 235 lfdirstate.set_clean(lfile, cache_data)
236 236 else:
237 237 tocheck = unsure + modified + added + clean
238 238 modified, added, clean = [], [], []
239 239 checkexec = self.dirstate._checkexec
240 240
241 241 for lfile in tocheck:
242 242 standin = lfutil.standin(lfile)
243 243 if standin in ctx1:
244 244 abslfile = self.wjoin(lfile)
245 245 if (
246 246 lfutil.readasstandin(ctx1[standin])
247 247 != lfutil.hashfile(abslfile)
248 248 ) or (
249 249 checkexec
250 250 and (b'x' in ctx1.flags(standin))
251 251 != bool(lfutil.getexecutable(abslfile))
252 252 ):
253 253 modified.append(lfile)
254 254 elif listclean:
255 255 clean.append(lfile)
256 256 else:
257 257 added.append(lfile)
258 258
259 259 # at this point, 'removed' contains largefiles
260 260 # marked as 'R' in the working context.
261 261 # then, largefiles not managed also in the target
262 262 # context should be excluded from 'removed'.
263 263 removed = [
264 264 lfile
265 265 for lfile in removed
266 266 if lfutil.standin(lfile) in ctx1
267 267 ]
268 268
269 269 # Standins no longer found in lfdirstate have been deleted
270 270 for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
271 271 lfile = lfutil.splitstandin(standin)
272 272 if not match(lfile):
273 273 continue
274 274 if lfile not in lfdirstate:
275 275 deleted.append(lfile)
276 276 # Sync "largefile has been removed" back to the
277 277 # standin. Removing a file as a side effect of
278 278 # running status is gross, but the alternatives (if
279 279 # any) are worse.
280 280 self.wvfs.unlinkpath(standin, ignoremissing=True)
281 281
282 282 # Filter result lists
283 283 result = list(result)
284 284
285 285 # Largefiles are not really removed when they're
286 286 # still in the normal dirstate. Likewise, normal
287 287 # files are not really removed if they are still in
288 288 # lfdirstate. This happens in merges where files
289 289 # change type.
290 290 removed = [f for f in removed if f not in self.dirstate]
291 291 result[2] = [f for f in result[2] if f not in lfdirstate]
292 292
293 293 lfiles = set(lfdirstate)
294 294 # Unknown files
295 295 result[4] = set(result[4]).difference(lfiles)
296 296 # Ignored files
297 297 result[5] = set(result[5]).difference(lfiles)
298 298 # combine normal files and largefiles
299 299 normals = [
300 300 [fn for fn in filelist if not lfutil.isstandin(fn)]
301 301 for filelist in result
302 302 ]
303 303 lfstatus = (
304 304 modified,
305 305 added,
306 306 removed,
307 307 deleted,
308 308 [],
309 309 [],
310 310 clean,
311 311 )
312 312 result = [
313 313 sorted(list1 + list2)
314 314 for (list1, list2) in zip(normals, lfstatus)
315 315 ]
316 316 else: # not against working directory
317 317 result = [
318 318 [lfutil.splitstandin(f) or f for f in items]
319 319 for items in result
320 320 ]
321 321
322 322 if gotlock:
323 323 lfdirstate.write(self.currenttransaction())
324 324 else:
325 325 lfdirstate.invalidate()
326 326
327 327 self.lfstatus = True
328 328 return scmutil.status(*result)
329 329
330 330 def commitctx(self, ctx, *args, **kwargs):
331 331 node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
332 332
333 333 class lfilesctx(ctx.__class__):
334 334 def markcommitted(self, node):
335 335 orig = super(lfilesctx, self).markcommitted
336 336 return lfutil.markcommitted(orig, self, node)
337 337
338 338 ctx.__class__ = lfilesctx
339 339 return node
340 340
341 341 # Before commit, largefile standins have not had their
342 342 # contents updated to reflect the hash of their largefile.
343 343 # Do that here.
344 344 def commit(
345 345 self,
346 346 text=b"",
347 347 user=None,
348 348 date=None,
349 349 match=None,
350 350 force=False,
351 351 editor=False,
352 352 extra=None,
353 353 ):
354 354 if extra is None:
355 355 extra = {}
356 356 orig = super(lfilesrepo, self).commit
357 357
358 358 with self.wlock():
359 359 lfcommithook = self._lfcommithooks[-1]
360 360 match = lfcommithook(self, match)
361 361 result = orig(
362 362 text=text,
363 363 user=user,
364 364 date=date,
365 365 match=match,
366 366 force=force,
367 367 editor=editor,
368 368 extra=extra,
369 369 )
370 370 return result
371 371
372 372 # TODO: _subdirlfs should be moved into "lfutil.py", because
373 373 # it is referred only from "lfutil.updatestandinsbymatch"
374 374 def _subdirlfs(self, files, lfiles):
375 375 """
376 376 Adjust matched file list
377 377 If we pass a directory to commit whose only committable files
378 378 are largefiles, the core commit code aborts before finding
379 379 the largefiles.
380 380 So we do the following:
381 381 For directories that only have largefiles as matches,
382 382 we explicitly add the largefiles to the match list and remove
383 383 the directory.
384 384 In other cases, we leave the match list unmodified.
385 385 """
386 386 actualfiles = []
387 387 dirs = []
388 388 regulars = []
389 389
390 390 for f in files:
391 391 if lfutil.isstandin(f + b'/'):
392 392 raise error.Abort(
393 393 _(b'file "%s" is a largefile standin') % f,
394 394 hint=b'commit the largefile itself instead',
395 395 )
396 396 # Scan directories
397 397 if self.wvfs.isdir(f):
398 398 dirs.append(f)
399 399 else:
400 400 regulars.append(f)
401 401
402 402 for f in dirs:
403 403 matcheddir = False
404 404 d = self.dirstate.normalize(f) + b'/'
405 405 # Check for matched normal files
406 406 for mf in regulars:
407 407 if self.dirstate.normalize(mf).startswith(d):
408 408 actualfiles.append(f)
409 409 matcheddir = True
410 410 break
411 411 if not matcheddir:
412 412 # If no normal match, manually append
413 413 # any matching largefiles
414 414 for lf in lfiles:
415 415 if self.dirstate.normalize(lf).startswith(d):
416 416 actualfiles.append(lf)
417 417 if not matcheddir:
418 418 # There may still be normal files in the dir, so
419 419 # add a directory to the list, which
420 420 # forces status/dirstate to walk all files and
421 421 # call the match function on the matcher, even
422 422 # on case sensitive filesystems.
423 423 actualfiles.append(b'.')
424 424 matcheddir = True
425 425 # Nothing in dir, so readd it
426 426 # and let commit reject it
427 427 if not matcheddir:
428 428 actualfiles.append(f)
429 429
430 430 # Always add normal files
431 431 actualfiles += regulars
432 432 return actualfiles
433 433
434 434 repo.__class__ = lfilesrepo
435 435
436 436 # stack of hooks being executed before committing.
437 437 # only last element ("_lfcommithooks[-1]") is used for each committing.
438 438 repo._lfcommithooks = [lfutil.updatestandinsbymatch]
439 439
440 440 # Stack of status writer functions taking "*msg, **opts" arguments
441 441 # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
442 442 # is used to write status out.
443 443 repo._lfstatuswriters = [ui.status]
444 444
445 445 def prepushoutgoinghook(pushop):
446 446 """Push largefiles for pushop before pushing revisions."""
447 447 lfrevs = pushop.lfrevs
448 448 if lfrevs is None:
449 449 lfrevs = pushop.outgoing.missing
450 450 if lfrevs:
451 451 toupload = set()
452 452 addfunc = lambda fn, lfhash: toupload.add(lfhash)
453 453 lfutil.getlfilestoupload(pushop.repo, lfrevs, addfunc)
454 454 lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
455 455
456 456 repo.prepushoutgoinghooks.add(b"largefiles", prepushoutgoinghook)
457 457
458 458 def checkrequireslfiles(ui, repo, **kwargs):
459 459 with repo.lock():
460 460 if b'largefiles' not in repo.requirements and any(
461 lfutil.shortname + b'/' in f[1] for f in repo.store.datafiles()
461 lfutil.shortname + b'/' in entry.unencoded_path
462 for entry in repo.store.datafiles()
462 463 ):
463 464 repo.requirements.add(b'largefiles')
464 465 scmutil.writereporequirements(repo)
465 466
466 467 ui.setconfig(
467 468 b'hooks', b'changegroup.lfiles', checkrequireslfiles, b'largefiles'
468 469 )
469 470 ui.setconfig(b'hooks', b'commit.lfiles', checkrequireslfiles, b'largefiles')
@@ -1,695 +1,696 b''
1 1 # narrowcommands.py - command modifications for narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import itertools
9 9 import os
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial.node import (
13 13 hex,
14 14 short,
15 15 )
16 16 from mercurial import (
17 17 bundle2,
18 18 cmdutil,
19 19 commands,
20 20 discovery,
21 21 encoding,
22 22 error,
23 23 exchange,
24 24 extensions,
25 25 hg,
26 26 narrowspec,
27 27 pathutil,
28 28 pycompat,
29 29 registrar,
30 30 repair,
31 31 repoview,
32 32 requirements,
33 33 sparse,
34 34 util,
35 35 wireprototypes,
36 36 )
37 37 from mercurial.utils import (
38 38 urlutil,
39 39 )
40 40
41 41 table = {}
42 42 command = registrar.command(table)
43 43
44 44
45 45 def setup():
46 46 """Wraps user-facing mercurial commands with narrow-aware versions."""
47 47
48 48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
49 49 entry[1].append(
50 50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
51 51 )
52 52 entry[1].append(
53 53 (
54 54 b'',
55 55 b'depth',
56 56 b'',
57 57 _(b"limit the history fetched by distance from heads"),
58 58 )
59 59 )
60 60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
61 61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
62 62 if b'sparse' not in extensions.enabled():
63 63 entry[1].append(
64 64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
65 65 )
66 66 entry[1].append(
67 67 (
68 68 b'',
69 69 b'exclude',
70 70 [],
71 71 _(b"do not fetch this file/directory, even if included"),
72 72 )
73 73 )
74 74
75 75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
76 76 entry[1].append(
77 77 (
78 78 b'',
79 79 b'depth',
80 80 b'',
81 81 _(b"limit the history fetched by distance from heads"),
82 82 )
83 83 )
84 84
85 85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
86 86
87 87
88 88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
89 89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
90 90 opts = pycompat.byteskwargs(opts)
91 91 wrappedextraprepare = util.nullcontextmanager()
92 92 narrowspecfile = opts[b'narrowspec']
93 93
94 94 if narrowspecfile:
95 95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
96 96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
97 97 try:
98 98 fdata = util.readfile(filepath)
99 99 except IOError as inst:
100 100 raise error.Abort(
101 101 _(b"cannot read narrowspecs from '%s': %s")
102 102 % (filepath, encoding.strtolocal(inst.strerror))
103 103 )
104 104
105 105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
106 106 if profiles:
107 107 raise error.ConfigError(
108 108 _(
109 109 b"cannot specify other files using '%include' in"
110 110 b" narrowspec"
111 111 )
112 112 )
113 113
114 114 narrowspec.validatepatterns(includes)
115 115 narrowspec.validatepatterns(excludes)
116 116
117 117 # narrowspec is passed so we should assume that user wants narrow clone
118 118 opts[b'narrow'] = True
119 119 opts[b'include'].extend(includes)
120 120 opts[b'exclude'].extend(excludes)
121 121
122 122 if opts[b'narrow']:
123 123
124 124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
125 125 orig(pullop, kwargs)
126 126
127 127 if opts.get(b'depth'):
128 128 kwargs[b'depth'] = opts[b'depth']
129 129
130 130 wrappedextraprepare = extensions.wrappedfunction(
131 131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
132 132 )
133 133
134 134 with wrappedextraprepare:
135 135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
136 136
137 137
138 138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
139 139 """Wraps pull command to allow modifying narrow spec."""
140 140 wrappedextraprepare = util.nullcontextmanager()
141 141 if requirements.NARROW_REQUIREMENT in repo.requirements:
142 142
143 143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
144 144 orig(pullop, kwargs)
145 145 if opts.get('depth'):
146 146 kwargs[b'depth'] = opts['depth']
147 147
148 148 wrappedextraprepare = extensions.wrappedfunction(
149 149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
150 150 )
151 151
152 152 with wrappedextraprepare:
153 153 return orig(ui, repo, *args, **opts)
154 154
155 155
156 156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
157 157 """Wraps archive command to narrow the default includes."""
158 158 if requirements.NARROW_REQUIREMENT in repo.requirements:
159 159 repo_includes, repo_excludes = repo.narrowpats
160 160 includes = set(opts.get('include', []))
161 161 excludes = set(opts.get('exclude', []))
162 162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
163 163 includes, excludes, repo_includes, repo_excludes
164 164 )
165 165 if includes:
166 166 opts['include'] = includes
167 167 if excludes:
168 168 opts['exclude'] = excludes
169 169 return orig(ui, repo, *args, **opts)
170 170
171 171
172 172 def pullbundle2extraprepare(orig, pullop, kwargs):
173 173 repo = pullop.repo
174 174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
175 175 return orig(pullop, kwargs)
176 176
177 177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
178 178 raise error.Abort(_(b"server does not support narrow clones"))
179 179 orig(pullop, kwargs)
180 180 kwargs[b'narrow'] = True
181 181 include, exclude = repo.narrowpats
182 182 kwargs[b'oldincludepats'] = include
183 183 kwargs[b'oldexcludepats'] = exclude
184 184 if include:
185 185 kwargs[b'includepats'] = include
186 186 if exclude:
187 187 kwargs[b'excludepats'] = exclude
188 188 # calculate known nodes only in ellipses cases because in non-ellipses cases
189 189 # we have all the nodes
190 190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
191 191 kwargs[b'known'] = [
192 192 hex(ctx.node())
193 193 for ctx in repo.set(b'::%ln', pullop.common)
194 194 if ctx.node() != repo.nullid
195 195 ]
196 196 if not kwargs[b'known']:
197 197 # Mercurial serializes an empty list as '' and deserializes it as
198 198 # [''], so delete it instead to avoid handling the empty string on
199 199 # the server.
200 200 del kwargs[b'known']
201 201
202 202
203 203 extensions.wrapfunction(
204 204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
205 205 )
206 206
207 207
208 208 def _narrow(
209 209 ui,
210 210 repo,
211 211 remote,
212 212 commoninc,
213 213 oldincludes,
214 214 oldexcludes,
215 215 newincludes,
216 216 newexcludes,
217 217 force,
218 218 backup,
219 219 ):
220 220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
221 221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
222 222
223 223 # This is essentially doing "hg outgoing" to find all local-only
224 224 # commits. We will then check that the local-only commits don't
225 225 # have any changes to files that will be untracked.
226 226 unfi = repo.unfiltered()
227 227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
228 228 ui.status(_(b'looking for local changes to affected paths\n'))
229 229 progress = ui.makeprogress(
230 230 topic=_(b'changesets'),
231 231 unit=_(b'changesets'),
232 232 total=len(outgoing.missing) + len(outgoing.excluded),
233 233 )
234 234 localnodes = []
235 235 with progress:
236 236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
237 237 progress.increment()
238 238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
239 239 localnodes.append(n)
240 240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
241 241 hiddenrevs = repoview.filterrevs(repo, b'visible')
242 242 visibletostrip = list(
243 243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
244 244 )
245 245 if visibletostrip:
246 246 ui.status(
247 247 _(
248 248 b'The following changeset(s) or their ancestors have '
249 249 b'local changes not on the remote:\n'
250 250 )
251 251 )
252 252 maxnodes = 10
253 253 if ui.verbose or len(visibletostrip) <= maxnodes:
254 254 for n in visibletostrip:
255 255 ui.status(b'%s\n' % short(n))
256 256 else:
257 257 for n in visibletostrip[:maxnodes]:
258 258 ui.status(b'%s\n' % short(n))
259 259 ui.status(
260 260 _(b'...and %d more, use --verbose to list all\n')
261 261 % (len(visibletostrip) - maxnodes)
262 262 )
263 263 if not force:
264 264 raise error.StateError(
265 265 _(b'local changes found'),
266 266 hint=_(b'use --force-delete-local-changes to ignore'),
267 267 )
268 268
269 269 with ui.uninterruptible():
270 270 if revstostrip:
271 271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
272 272 if repo[b'.'].node() in tostrip:
273 273 # stripping working copy, so move to a different commit first
274 274 urev = max(
275 275 repo.revs(
276 276 b'(::%n) - %ln + null',
277 277 repo[b'.'].node(),
278 278 visibletostrip,
279 279 )
280 280 )
281 281 hg.clean(repo, urev)
282 282 overrides = {(b'devel', b'strip-obsmarkers'): False}
283 283 if backup:
284 284 ui.status(_(b'moving unwanted changesets to backup\n'))
285 285 else:
286 286 ui.status(_(b'deleting unwanted changesets\n'))
287 287 with ui.configoverride(overrides, b'narrow'):
288 288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
289 289
290 290 todelete = []
291 for t, f, size in repo.store.datafiles():
291 for entry in repo.store.datafiles():
292 f = entry.unencoded_path
292 293 if f.startswith(b'data/'):
293 294 file = f[5:-2]
294 295 if not newmatch(file):
295 296 todelete.append(f)
296 297 elif f.startswith(b'meta/'):
297 298 dir = f[5:-13]
298 299 dirs = sorted(pathutil.dirs({dir})) + [dir]
299 300 include = True
300 301 for d in dirs:
301 302 visit = newmatch.visitdir(d)
302 303 if not visit:
303 304 include = False
304 305 break
305 306 if visit == b'all':
306 307 break
307 308 if not include:
308 309 todelete.append(f)
309 310
310 311 repo.destroying()
311 312
312 313 with repo.transaction(b'narrowing'):
313 314 # Update narrowspec before removing revlogs, so repo won't be
314 315 # corrupt in case of crash
315 316 repo.setnarrowpats(newincludes, newexcludes)
316 317
317 318 for f in todelete:
318 319 ui.status(_(b'deleting %s\n') % f)
319 320 util.unlinkpath(repo.svfs.join(f))
320 321 repo.store.markremoved(f)
321 322
322 323 ui.status(_(b'deleting unwanted files from working copy\n'))
323 324 with repo.dirstate.changing_parents(repo):
324 325 narrowspec.updateworkingcopy(repo, assumeclean=True)
325 326 narrowspec.copytoworkingcopy(repo)
326 327
327 328 repo.destroyed()
328 329
329 330
330 331 def _widen(
331 332 ui,
332 333 repo,
333 334 remote,
334 335 commoninc,
335 336 oldincludes,
336 337 oldexcludes,
337 338 newincludes,
338 339 newexcludes,
339 340 ):
340 341 # for now we assume that if a server has ellipses enabled, we will be
341 342 # exchanging ellipses nodes. In future we should add ellipses as a client
342 343 # side requirement (maybe) to distinguish a client is shallow or not and
343 344 # then send that information to server whether we want ellipses or not.
344 345 # Theoretically a non-ellipses repo should be able to use narrow
345 346 # functionality from an ellipses enabled server
346 347 remotecap = remote.capabilities()
347 348 ellipsesremote = any(
348 349 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
349 350 )
350 351
351 352 # check whether we are talking to a server which supports old version of
352 353 # ellipses capabilities
353 354 isoldellipses = (
354 355 ellipsesremote
355 356 and wireprototypes.ELLIPSESCAP1 in remotecap
356 357 and wireprototypes.ELLIPSESCAP not in remotecap
357 358 )
358 359
359 360 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
360 361 orig(pullop, kwargs)
361 362 # The old{in,ex}cludepats have already been set by orig()
362 363 kwargs[b'includepats'] = newincludes
363 364 kwargs[b'excludepats'] = newexcludes
364 365
365 366 wrappedextraprepare = extensions.wrappedfunction(
366 367 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
367 368 )
368 369
369 370 # define a function that narrowbundle2 can call after creating the
370 371 # backup bundle, but before applying the bundle from the server
371 372 def setnewnarrowpats():
372 373 repo.setnarrowpats(newincludes, newexcludes)
373 374
374 375 repo.setnewnarrowpats = setnewnarrowpats
375 376 # silence the devel-warning of applying an empty changegroup
376 377 overrides = {(b'devel', b'all-warnings'): False}
377 378
378 379 common = commoninc[0]
379 380 with ui.uninterruptible():
380 381 if ellipsesremote:
381 382 ds = repo.dirstate
382 383 p1, p2 = ds.p1(), ds.p2()
383 384 with ds.changing_parents(repo):
384 385 ds.setparents(repo.nullid, repo.nullid)
385 386 if isoldellipses:
386 387 with wrappedextraprepare:
387 388 exchange.pull(repo, remote, heads=common)
388 389 else:
389 390 known = []
390 391 if ellipsesremote:
391 392 known = [
392 393 ctx.node()
393 394 for ctx in repo.set(b'::%ln', common)
394 395 if ctx.node() != repo.nullid
395 396 ]
396 397 with remote.commandexecutor() as e:
397 398 bundle = e.callcommand(
398 399 b'narrow_widen',
399 400 {
400 401 b'oldincludes': oldincludes,
401 402 b'oldexcludes': oldexcludes,
402 403 b'newincludes': newincludes,
403 404 b'newexcludes': newexcludes,
404 405 b'cgversion': b'03',
405 406 b'commonheads': common,
406 407 b'known': known,
407 408 b'ellipses': ellipsesremote,
408 409 },
409 410 ).result()
410 411
411 412 trmanager = exchange.transactionmanager(
412 413 repo, b'widen', remote.url()
413 414 )
414 415 with trmanager, repo.ui.configoverride(overrides, b'widen'):
415 416 op = bundle2.bundleoperation(
416 417 repo, trmanager.transaction, source=b'widen'
417 418 )
418 419 # TODO: we should catch error.Abort here
419 420 bundle2.processbundle(repo, bundle, op=op, remote=remote)
420 421
421 422 if ellipsesremote:
422 423 with ds.changing_parents(repo):
423 424 ds.setparents(p1, p2)
424 425
425 426 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
426 427 repo
427 428 ):
428 429 repo.setnewnarrowpats()
429 430 narrowspec.updateworkingcopy(repo)
430 431 narrowspec.copytoworkingcopy(repo)
431 432
432 433
433 434 # TODO(rdamazio): Make new matcher format and update description
434 435 @command(
435 436 b'tracked',
436 437 [
437 438 (b'', b'addinclude', [], _(b'new paths to include')),
438 439 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
439 440 (
440 441 b'',
441 442 b'auto-remove-includes',
442 443 False,
443 444 _(b'automatically choose unused includes to remove'),
444 445 ),
445 446 (b'', b'addexclude', [], _(b'new paths to exclude')),
446 447 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
447 448 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
448 449 (
449 450 b'',
450 451 b'clear',
451 452 False,
452 453 _(b'whether to replace the existing narrowspec'),
453 454 ),
454 455 (
455 456 b'',
456 457 b'force-delete-local-changes',
457 458 False,
458 459 _(b'forces deletion of local changes when narrowing'),
459 460 ),
460 461 (
461 462 b'',
462 463 b'backup',
463 464 True,
464 465 _(b'back up local changes when narrowing'),
465 466 ),
466 467 (
467 468 b'',
468 469 b'update-working-copy',
469 470 False,
470 471 _(b'update working copy when the store has changed'),
471 472 ),
472 473 ]
473 474 + commands.remoteopts,
474 475 _(b'[OPTIONS]... [REMOTE]'),
475 476 inferrepo=True,
476 477 helpcategory=command.CATEGORY_MAINTENANCE,
477 478 )
478 479 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
479 480 """show or change the current narrowspec
480 481
481 482 With no argument, shows the current narrowspec entries, one per line. Each
482 483 line will be prefixed with 'I' or 'X' for included or excluded patterns,
483 484 respectively.
484 485
485 486 The narrowspec is comprised of expressions to match remote files and/or
486 487 directories that should be pulled into your client.
487 488 The narrowspec has *include* and *exclude* expressions, with excludes always
488 489 trumping includes: that is, if a file matches an exclude expression, it will
489 490 be excluded even if it also matches an include expression.
490 491 Excluding files that were never included has no effect.
491 492
492 493 Each included or excluded entry is in the format described by
493 494 'hg help patterns'.
494 495
495 496 The options allow you to add or remove included and excluded expressions.
496 497
497 498 If --clear is specified, then all previous includes and excludes are DROPPED
498 499 and replaced by the new ones specified to --addinclude and --addexclude.
499 500 If --clear is specified without any further options, the narrowspec will be
500 501 empty and will not match any files.
501 502
502 503 If --auto-remove-includes is specified, then those includes that don't match
503 504 any files modified by currently visible local commits (those not shared by
504 505 the remote) will be added to the set of explicitly specified includes to
505 506 remove.
506 507
507 508 --import-rules accepts a path to a file containing rules, allowing you to
508 509 add --addinclude, --addexclude rules in bulk. Like the other include and
509 510 exclude switches, the changes are applied immediately.
510 511 """
511 512 opts = pycompat.byteskwargs(opts)
512 513 if requirements.NARROW_REQUIREMENT not in repo.requirements:
513 514 raise error.InputError(
514 515 _(
515 516 b'the tracked command is only supported on '
516 517 b'repositories cloned with --narrow'
517 518 )
518 519 )
519 520
520 521 # Before supporting, decide whether it "hg tracked --clear" should mean
521 522 # tracking no paths or all paths.
522 523 if opts[b'clear']:
523 524 raise error.InputError(_(b'the --clear option is not yet supported'))
524 525
525 526 # import rules from a file
526 527 newrules = opts.get(b'import_rules')
527 528 if newrules:
528 529 try:
529 530 filepath = os.path.join(encoding.getcwd(), newrules)
530 531 fdata = util.readfile(filepath)
531 532 except IOError as inst:
532 533 raise error.StorageError(
533 534 _(b"cannot read narrowspecs from '%s': %s")
534 535 % (filepath, encoding.strtolocal(inst.strerror))
535 536 )
536 537 includepats, excludepats, profiles = sparse.parseconfig(
537 538 ui, fdata, b'narrow'
538 539 )
539 540 if profiles:
540 541 raise error.InputError(
541 542 _(
542 543 b"including other spec files using '%include' "
543 544 b"is not supported in narrowspec"
544 545 )
545 546 )
546 547 opts[b'addinclude'].extend(includepats)
547 548 opts[b'addexclude'].extend(excludepats)
548 549
549 550 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
550 551 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
551 552 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
552 553 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
553 554 autoremoveincludes = opts[b'auto_remove_includes']
554 555
555 556 update_working_copy = opts[b'update_working_copy']
556 557 only_show = not (
557 558 addedincludes
558 559 or removedincludes
559 560 or addedexcludes
560 561 or removedexcludes
561 562 or newrules
562 563 or autoremoveincludes
563 564 or update_working_copy
564 565 )
565 566
566 567 # Only print the current narrowspec.
567 568 if only_show:
568 569 oldincludes, oldexcludes = repo.narrowpats
569 570 ui.pager(b'tracked')
570 571 fm = ui.formatter(b'narrow', opts)
571 572 for i in sorted(oldincludes):
572 573 fm.startitem()
573 574 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
574 575 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
575 576 for i in sorted(oldexcludes):
576 577 fm.startitem()
577 578 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
578 579 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
579 580 fm.end()
580 581 return 0
581 582
582 583 with repo.wlock(), repo.lock():
583 584 oldincludes, oldexcludes = repo.narrowpats
584 585
585 586 # filter the user passed additions and deletions into actual additions and
586 587 # deletions of excludes and includes
587 588 addedincludes -= oldincludes
588 589 removedincludes &= oldincludes
589 590 addedexcludes -= oldexcludes
590 591 removedexcludes &= oldexcludes
591 592
592 593 widening = addedincludes or removedexcludes
593 594 narrowing = removedincludes or addedexcludes
594 595
595 596 if update_working_copy:
596 597 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
597 598 repo
598 599 ):
599 600 narrowspec.updateworkingcopy(repo)
600 601 narrowspec.copytoworkingcopy(repo)
601 602 return 0
602 603
603 604 if not (widening or narrowing or autoremoveincludes):
604 605 ui.status(_(b"nothing to widen or narrow\n"))
605 606 return 0
606 607
607 608 cmdutil.bailifchanged(repo)
608 609
609 610 # Find the revisions we have in common with the remote. These will
610 611 # be used for finding local-only changes for narrowing. They will
611 612 # also define the set of revisions to update for widening.
612 613 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
613 614 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
614 615 remote = hg.peer(repo, opts, path)
615 616
616 617 try:
617 618 # check narrow support before doing anything if widening needs to be
618 619 # performed. In future we should also abort if client is ellipses and
619 620 # server does not support ellipses
620 621 if (
621 622 widening
622 623 and wireprototypes.NARROWCAP not in remote.capabilities()
623 624 ):
624 625 raise error.Abort(_(b"server does not support narrow clones"))
625 626
626 627 commoninc = discovery.findcommonincoming(repo, remote)
627 628
628 629 if autoremoveincludes:
629 630 outgoing = discovery.findcommonoutgoing(
630 631 repo, remote, commoninc=commoninc
631 632 )
632 633 ui.status(_(b'looking for unused includes to remove\n'))
633 634 localfiles = set()
634 635 for n in itertools.chain(outgoing.missing, outgoing.excluded):
635 636 localfiles.update(repo[n].files())
636 637 suggestedremovals = []
637 638 for include in sorted(oldincludes):
638 639 match = narrowspec.match(repo.root, [include], oldexcludes)
639 640 if not any(match(f) for f in localfiles):
640 641 suggestedremovals.append(include)
641 642 if suggestedremovals:
642 643 for s in suggestedremovals:
643 644 ui.status(b'%s\n' % s)
644 645 if (
645 646 ui.promptchoice(
646 647 _(
647 648 b'remove these unused includes (yn)?'
648 649 b'$$ &Yes $$ &No'
649 650 )
650 651 )
651 652 == 0
652 653 ):
653 654 removedincludes.update(suggestedremovals)
654 655 narrowing = True
655 656 else:
656 657 ui.status(_(b'found no unused includes\n'))
657 658
658 659 if narrowing:
659 660 newincludes = oldincludes - removedincludes
660 661 newexcludes = oldexcludes | addedexcludes
661 662 _narrow(
662 663 ui,
663 664 repo,
664 665 remote,
665 666 commoninc,
666 667 oldincludes,
667 668 oldexcludes,
668 669 newincludes,
669 670 newexcludes,
670 671 opts[b'force_delete_local_changes'],
671 672 opts[b'backup'],
672 673 )
673 674 # _narrow() updated the narrowspec and _widen() below needs to
674 675 # use the updated values as its base (otherwise removed includes
675 676 # and addedexcludes will be lost in the resulting narrowspec)
676 677 oldincludes = newincludes
677 678 oldexcludes = newexcludes
678 679
679 680 if widening:
680 681 newincludes = oldincludes | addedincludes
681 682 newexcludes = oldexcludes - removedexcludes
682 683 _widen(
683 684 ui,
684 685 repo,
685 686 remote,
686 687 commoninc,
687 688 oldincludes,
688 689 oldexcludes,
689 690 newincludes,
690 691 newexcludes,
691 692 )
692 693 finally:
693 694 remote.close()
694 695
695 696 return 0
@@ -1,437 +1,446 b''
1 1 # remotefilelogserver.py - server logic for a remotefilelog server
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import os
9 9 import stat
10 10 import time
11 11 import zlib
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.node import bin, hex
15 15 from mercurial.pycompat import open
16 16 from mercurial import (
17 17 changegroup,
18 18 changelog,
19 19 context,
20 20 error,
21 21 extensions,
22 22 match,
23 23 scmutil,
24 24 store,
25 25 streamclone,
26 26 util,
27 27 wireprotoserver,
28 28 wireprototypes,
29 29 wireprotov1server,
30 30 )
31 31 from . import (
32 32 constants,
33 33 shallowutil,
34 34 )
35 35
36 36 _sshv1server = wireprotoserver.sshv1protocolhandler
37 37
38 38
39 39 def setupserver(ui, repo):
40 40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 41 onetimesetup(ui)
42 42
43 43 # don't send files to shallow clients during pulls
44 44 def generatefiles(
45 45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 46 ):
47 47 caps = self._bundlecaps or []
48 48 if constants.BUNDLE2_CAPABLITY in caps:
49 49 # only send files that don't match the specified patterns
50 50 includepattern = None
51 51 excludepattern = None
52 52 for cap in self._bundlecaps or []:
53 53 if cap.startswith(b"includepattern="):
54 54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 55 elif cap.startswith(b"excludepattern="):
56 56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57 57
58 58 m = match.always()
59 59 if includepattern or excludepattern:
60 60 m = match.match(
61 61 repo.root, b'', None, includepattern, excludepattern
62 62 )
63 63
64 64 changedfiles = list([f for f in changedfiles if not m(f)])
65 65 return orig(
66 66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 67 )
68 68
69 69 extensions.wrapfunction(
70 70 changegroup.cgpacker, b'generatefiles', generatefiles
71 71 )
72 72
73 73
74 74 onetime = False
75 75
76 76
77 77 def onetimesetup(ui):
78 78 """Configures the wireprotocol for both clients and servers."""
79 79 global onetime
80 80 if onetime:
81 81 return
82 82 onetime = True
83 83
84 84 # support file content requests
85 85 wireprotov1server.wireprotocommand(
86 86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 87 )(getflogheads)
88 88 wireprotov1server.wireprotocommand(
89 89 b'x_rfl_getfiles', b'', permission=b'pull'
90 90 )(getfiles)
91 91 wireprotov1server.wireprotocommand(
92 92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 93 )(getfile)
94 94
95 95 class streamstate:
96 96 match = None
97 97 shallowremote = False
98 98 noflatmf = False
99 99
100 100 state = streamstate()
101 101
102 102 def stream_out_shallow(repo, proto, other):
103 103 includepattern = None
104 104 excludepattern = None
105 105 raw = other.get(b'includepattern')
106 106 if raw:
107 107 includepattern = raw.split(b'\0')
108 108 raw = other.get(b'excludepattern')
109 109 if raw:
110 110 excludepattern = raw.split(b'\0')
111 111
112 112 oldshallow = state.shallowremote
113 113 oldmatch = state.match
114 114 oldnoflatmf = state.noflatmf
115 115 try:
116 116 state.shallowremote = True
117 117 state.match = match.always()
118 118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 119 if includepattern or excludepattern:
120 120 state.match = match.match(
121 121 repo.root, b'', None, includepattern, excludepattern
122 122 )
123 123 streamres = wireprotov1server.stream(repo, proto)
124 124
125 125 # Force the first value to execute, so the file list is computed
126 126 # within the try/finally scope
127 127 first = next(streamres.gen)
128 128 second = next(streamres.gen)
129 129
130 130 def gen():
131 131 yield first
132 132 yield second
133 133 for value in streamres.gen:
134 134 yield value
135 135
136 136 return wireprototypes.streamres(gen())
137 137 finally:
138 138 state.shallowremote = oldshallow
139 139 state.match = oldmatch
140 140 state.noflatmf = oldnoflatmf
141 141
142 142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 143 stream_out_shallow,
144 144 b'*',
145 145 )
146 146
147 147 # don't clone filelogs to shallow clients
148 148 def _walkstreamfiles(orig, repo, matcher=None):
149 149 if state.shallowremote:
150 150 # if we are shallow ourselves, stream our local commits
151 151 if shallowutil.isenabled(repo):
152 152 striplen = len(repo.store.path) + 1
153 153 readdir = repo.store.rawvfs.readdir
154 154 visit = [os.path.join(repo.store.path, b'data')]
155 155 while visit:
156 156 p = visit.pop()
157 157 for f, kind, st in readdir(p, stat=True):
158 158 fp = p + b'/' + f
159 159 if kind == stat.S_IFREG:
160 160 if not fp.endswith(b'.i') and not fp.endswith(
161 161 b'.d'
162 162 ):
163 163 n = util.pconvert(fp[striplen:])
164 164 d = store.decodedir(n)
165 t = store.FILETYPE_OTHER
166 yield (t, d, st.st_size)
165 yield store.StoreEntry(
166 unencoded_path=d,
167 is_revlog=True,
168 revlog_type=None,
169 is_revlog_main=False,
170 is_volatile=False,
171 file_size=st.st_size,
172 )
173
167 174 if kind == stat.S_IFDIR:
168 175 visit.append(fp)
169 176
170 177 if scmutil.istreemanifest(repo):
171 for (t, u, s) in repo.store.datafiles():
178 for entry in repo.store.datafiles():
179 u = entry.unencoded_path
172 180 if u.startswith(b'meta/') and (
173 181 u.endswith(b'.i') or u.endswith(b'.d')
174 182 ):
175 yield (t, u, s)
183 yield entry
176 184
177 185 # Return .d and .i files that do not match the shallow pattern
178 186 match = state.match
179 187 if match and not match.always():
180 for (t, u, s) in repo.store.datafiles():
188 for entry in repo.store.datafiles():
189 u = entry.unencoded_path
181 190 f = u[5:-2] # trim data/... and .i/.d
182 191 if not state.match(f):
183 yield (t, u, s)
192 yield entry
184 193
185 194 for x in repo.store.topfiles():
186 195 if state.noflatmf and x[1][:11] == b'00manifest.':
187 196 continue
188 197 yield x
189 198
190 199 elif shallowutil.isenabled(repo):
191 200 # don't allow cloning from a shallow repo to a full repo
192 201 # since it would require fetching every version of every
193 202 # file in order to create the revlogs.
194 203 raise error.Abort(
195 204 _(b"Cannot clone from a shallow repo to a full repo.")
196 205 )
197 206 else:
198 207 for x in orig(repo, matcher):
199 208 yield x
200 209
201 210 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
202 211
203 212 # expose remotefilelog capabilities
204 213 def _capabilities(orig, repo, proto):
205 214 caps = orig(repo, proto)
206 215 if shallowutil.isenabled(repo) or ui.configbool(
207 216 b'remotefilelog', b'server'
208 217 ):
209 218 if isinstance(proto, _sshv1server):
210 219 # legacy getfiles method which only works over ssh
211 220 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
212 221 caps.append(b'x_rfl_getflogheads')
213 222 caps.append(b'x_rfl_getfile')
214 223 return caps
215 224
216 225 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
217 226
218 227 def _adjustlinkrev(orig, self, *args, **kwargs):
219 228 # When generating file blobs, taking the real path is too slow on large
220 229 # repos, so force it to just return the linkrev directly.
221 230 repo = self._repo
222 231 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
223 232 return self._filelog.linkrev(self._filelog.rev(self._filenode))
224 233 return orig(self, *args, **kwargs)
225 234
226 235 extensions.wrapfunction(
227 236 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
228 237 )
229 238
230 239 def _iscmd(orig, cmd):
231 240 if cmd == b'x_rfl_getfiles':
232 241 return False
233 242 return orig(cmd)
234 243
235 244 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
236 245
237 246
238 247 def _loadfileblob(repo, cachepath, path, node):
239 248 filecachepath = os.path.join(cachepath, path, hex(node))
240 249 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
241 250 filectx = repo.filectx(path, fileid=node)
242 251 if filectx.node() == repo.nullid:
243 252 repo.changelog = changelog.changelog(repo.svfs)
244 253 filectx = repo.filectx(path, fileid=node)
245 254
246 255 text = createfileblob(filectx)
247 256 # TODO configurable compression engines
248 257 text = zlib.compress(text)
249 258
250 259 # everything should be user & group read/writable
251 260 oldumask = os.umask(0o002)
252 261 try:
253 262 dirname = os.path.dirname(filecachepath)
254 263 if not os.path.exists(dirname):
255 264 try:
256 265 os.makedirs(dirname)
257 266 except FileExistsError:
258 267 pass
259 268
260 269 f = None
261 270 try:
262 271 f = util.atomictempfile(filecachepath, b"wb")
263 272 f.write(text)
264 273 except (IOError, OSError):
265 274 # Don't abort if the user only has permission to read,
266 275 # and not write.
267 276 pass
268 277 finally:
269 278 if f:
270 279 f.close()
271 280 finally:
272 281 os.umask(oldumask)
273 282 else:
274 283 with open(filecachepath, b"rb") as f:
275 284 text = f.read()
276 285 return text
277 286
278 287
279 288 def getflogheads(repo, proto, path):
280 289 """A server api for requesting a filelog's heads"""
281 290 flog = repo.file(path)
282 291 heads = flog.heads()
283 292 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
284 293
285 294
286 295 def getfile(repo, proto, file, node):
287 296 """A server api for requesting a particular version of a file. Can be used
288 297 in batches to request many files at once. The return protocol is:
289 298 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
290 299 non-zero for an error.
291 300
292 301 data is a compressed blob with revlog flag and ancestors information. See
293 302 createfileblob for its content.
294 303 """
295 304 if shallowutil.isenabled(repo):
296 305 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
297 306 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
298 307 if not cachepath:
299 308 cachepath = os.path.join(repo.path, b"remotefilelogcache")
300 309 node = bin(node.strip())
301 310 if node == repo.nullid:
302 311 return b'0\0'
303 312 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
304 313
305 314
306 315 def getfiles(repo, proto):
307 316 """A server api for requesting particular versions of particular files."""
308 317 if shallowutil.isenabled(repo):
309 318 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
310 319 if not isinstance(proto, _sshv1server):
311 320 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
312 321
313 322 def streamer():
314 323 fin = proto._fin
315 324
316 325 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
317 326 if not cachepath:
318 327 cachepath = os.path.join(repo.path, b"remotefilelogcache")
319 328
320 329 while True:
321 330 request = fin.readline()[:-1]
322 331 if not request:
323 332 break
324 333
325 334 node = bin(request[:40])
326 335 if node == repo.nullid:
327 336 yield b'0\n'
328 337 continue
329 338
330 339 path = request[40:]
331 340
332 341 text = _loadfileblob(repo, cachepath, path, node)
333 342
334 343 yield b'%d\n%s' % (len(text), text)
335 344
336 345 # it would be better to only flush after processing a whole batch
337 346 # but currently we don't know if there are more requests coming
338 347 proto._fout.flush()
339 348
340 349 return wireprototypes.streamres(streamer())
341 350
342 351
343 352 def createfileblob(filectx):
344 353 """
345 354 format:
346 355 v0:
347 356 str(len(rawtext)) + '\0' + rawtext + ancestortext
348 357 v1:
349 358 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
350 359 metalist := metalist + '\n' + meta | meta
351 360 meta := sizemeta | flagmeta
352 361 sizemeta := METAKEYSIZE + str(len(rawtext))
353 362 flagmeta := METAKEYFLAG + str(flag)
354 363
355 364 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
356 365 length of 1.
357 366 """
358 367 flog = filectx.filelog()
359 368 frev = filectx.filerev()
360 369 revlogflags = flog._revlog.flags(frev)
361 370 if revlogflags == 0:
362 371 # normal files
363 372 text = filectx.data()
364 373 else:
365 374 # lfs, read raw revision data
366 375 text = flog.rawdata(frev)
367 376
368 377 repo = filectx._repo
369 378
370 379 ancestors = [filectx]
371 380
372 381 try:
373 382 repo.forcelinkrev = True
374 383 ancestors.extend([f for f in filectx.ancestors()])
375 384
376 385 ancestortext = b""
377 386 for ancestorctx in ancestors:
378 387 parents = ancestorctx.parents()
379 388 p1 = repo.nullid
380 389 p2 = repo.nullid
381 390 if len(parents) > 0:
382 391 p1 = parents[0].filenode()
383 392 if len(parents) > 1:
384 393 p2 = parents[1].filenode()
385 394
386 395 copyname = b""
387 396 rename = ancestorctx.renamed()
388 397 if rename:
389 398 copyname = rename[0]
390 399 linknode = ancestorctx.node()
391 400 ancestortext += b"%s%s%s%s%s\0" % (
392 401 ancestorctx.filenode(),
393 402 p1,
394 403 p2,
395 404 linknode,
396 405 copyname,
397 406 )
398 407 finally:
399 408 repo.forcelinkrev = False
400 409
401 410 header = shallowutil.buildfileblobheader(len(text), revlogflags)
402 411
403 412 return b"%s\0%s%s" % (header, text, ancestortext)
404 413
405 414
406 415 def gcserver(ui, repo):
407 416 if not repo.ui.configbool(b"remotefilelog", b"server"):
408 417 return
409 418
410 419 neededfiles = set()
411 420 heads = repo.revs(b"heads(tip~25000:) - null")
412 421
413 422 cachepath = repo.vfs.join(b"remotefilelogcache")
414 423 for head in heads:
415 424 mf = repo[head].manifest()
416 425 for filename, filenode in mf.items():
417 426 filecachepath = os.path.join(cachepath, filename, hex(filenode))
418 427 neededfiles.add(filecachepath)
419 428
420 429 # delete unneeded older files
421 430 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
422 431 expiration = time.time() - (days * 24 * 60 * 60)
423 432
424 433 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
425 434 progress.update(0)
426 435 for root, dirs, files in os.walk(cachepath):
427 436 for file in files:
428 437 filepath = os.path.join(root, file)
429 438 progress.increment()
430 439 if filepath in neededfiles:
431 440 continue
432 441
433 442 stat = os.stat(filepath)
434 443 if stat.st_mtime < expiration:
435 444 os.remove(filepath)
436 445
437 446 progress.complete()
@@ -1,574 +1,576 b''
1 1 # repair.py - functions for repository repair for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 4 # Copyright 2007 Olivia Mackall
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 hex,
13 13 short,
14 14 )
15 15 from . import (
16 16 bundle2,
17 17 changegroup,
18 18 discovery,
19 19 error,
20 20 exchange,
21 21 obsolete,
22 22 obsutil,
23 23 pathutil,
24 24 phases,
25 25 requirements,
26 26 scmutil,
27 27 transaction,
28 28 util,
29 29 )
30 30 from .utils import (
31 31 hashutil,
32 32 urlutil,
33 33 )
34 34
35 35
36 36 def backupbundle(
37 37 repo,
38 38 bases,
39 39 heads,
40 40 node,
41 41 suffix,
42 42 compress=True,
43 43 obsolescence=True,
44 44 tmp_backup=False,
45 45 ):
46 46 """create a bundle with the specified revisions as a backup"""
47 47
48 48 backupdir = b"strip-backup"
49 49 vfs = repo.vfs
50 50 if not vfs.isdir(backupdir):
51 51 vfs.mkdir(backupdir)
52 52
53 53 # Include a hash of all the nodes in the filename for uniqueness
54 54 allcommits = repo.set(b'%ln::%ln', bases, heads)
55 55 allhashes = sorted(c.hex() for c in allcommits)
56 56 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
57 57 name = b"%s/%s-%s-%s.hg" % (
58 58 backupdir,
59 59 short(node),
60 60 hex(totalhash[:4]),
61 61 suffix,
62 62 )
63 63
64 64 cgversion = changegroup.localversion(repo)
65 65 comp = None
66 66 if cgversion != b'01':
67 67 bundletype = b"HG20"
68 68 if compress:
69 69 comp = b'BZ'
70 70 elif compress:
71 71 bundletype = b"HG10BZ"
72 72 else:
73 73 bundletype = b"HG10UN"
74 74
75 75 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
76 76 contentopts = {
77 77 b'cg.version': cgversion,
78 78 b'obsolescence': obsolescence,
79 79 b'phases': True,
80 80 }
81 81 return bundle2.writenewbundle(
82 82 repo.ui,
83 83 repo,
84 84 b'strip',
85 85 name,
86 86 bundletype,
87 87 outgoing,
88 88 contentopts,
89 89 vfs,
90 90 compression=comp,
91 91 allow_internal=tmp_backup,
92 92 )
93 93
94 94
95 95 def _collectfiles(repo, striprev):
96 96 """find out the filelogs affected by the strip"""
97 97 files = set()
98 98
99 99 for x in range(striprev, len(repo)):
100 100 files.update(repo[x].files())
101 101
102 102 return sorted(files)
103 103
104 104
105 105 def _collectrevlog(revlog, striprev):
106 106 _, brokenset = revlog.getstrippoint(striprev)
107 107 return [revlog.linkrev(r) for r in brokenset]
108 108
109 109
110 110 def _collectbrokencsets(repo, files, striprev):
111 111 """return the changesets which will be broken by the truncation"""
112 112 s = set()
113 113
114 114 for revlog in manifestrevlogs(repo):
115 115 s.update(_collectrevlog(revlog, striprev))
116 116 for fname in files:
117 117 s.update(_collectrevlog(repo.file(fname), striprev))
118 118
119 119 return s
120 120
121 121
122 122 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
123 123 # This function requires the caller to lock the repo, but it operates
124 124 # within a transaction of its own, and thus requires there to be no current
125 125 # transaction when it is called.
126 126 if repo.currenttransaction() is not None:
127 127 raise error.ProgrammingError(b'cannot strip from inside a transaction')
128 128
129 129 # Simple way to maintain backwards compatibility for this
130 130 # argument.
131 131 if backup in [b'none', b'strip']:
132 132 backup = False
133 133
134 134 repo = repo.unfiltered()
135 135 repo.destroying()
136 136 vfs = repo.vfs
137 137 # load bookmark before changelog to avoid side effect from outdated
138 138 # changelog (see repo._refreshchangelog)
139 139 repo._bookmarks
140 140 cl = repo.changelog
141 141
142 142 # TODO handle undo of merge sets
143 143 if isinstance(nodelist, bytes):
144 144 nodelist = [nodelist]
145 145 striplist = [cl.rev(node) for node in nodelist]
146 146 striprev = min(striplist)
147 147
148 148 files = _collectfiles(repo, striprev)
149 149 saverevs = _collectbrokencsets(repo, files, striprev)
150 150
151 151 # Some revisions with rev > striprev may not be descendants of striprev.
152 152 # We have to find these revisions and put them in a bundle, so that
153 153 # we can restore them after the truncations.
154 154 # To create the bundle we use repo.changegroupsubset which requires
155 155 # the list of heads and bases of the set of interesting revisions.
156 156 # (head = revision in the set that has no descendant in the set;
157 157 # base = revision in the set that has no ancestor in the set)
158 158 tostrip = set(striplist)
159 159 saveheads = set(saverevs)
160 160 for r in cl.revs(start=striprev + 1):
161 161 if any(p in tostrip for p in cl.parentrevs(r)):
162 162 tostrip.add(r)
163 163
164 164 if r not in tostrip:
165 165 saverevs.add(r)
166 166 saveheads.difference_update(cl.parentrevs(r))
167 167 saveheads.add(r)
168 168 saveheads = [cl.node(r) for r in saveheads]
169 169
170 170 # compute base nodes
171 171 if saverevs:
172 172 descendants = set(cl.descendants(saverevs))
173 173 saverevs.difference_update(descendants)
174 174 savebases = [cl.node(r) for r in saverevs]
175 175 stripbases = [cl.node(r) for r in tostrip]
176 176
177 177 stripobsidx = obsmarkers = ()
178 178 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
179 179 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
180 180 if obsmarkers:
181 181 stripobsidx = [
182 182 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
183 183 ]
184 184
185 185 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
186 186
187 187 backupfile = None
188 188 node = nodelist[-1]
189 189 if backup:
190 190 backupfile = _createstripbackup(repo, stripbases, node, topic)
191 191 # create a changegroup for all the branches we need to keep
192 192 tmpbundlefile = None
193 193 if saveheads:
194 194 # do not compress temporary bundle if we remove it from disk later
195 195 #
196 196 # We do not include obsolescence, it might re-introduce prune markers
197 197 # we are trying to strip. This is harmless since the stripped markers
198 198 # are already backed up and we did not touched the markers for the
199 199 # saved changesets.
200 200 tmpbundlefile = backupbundle(
201 201 repo,
202 202 savebases,
203 203 saveheads,
204 204 node,
205 205 b'temp',
206 206 compress=False,
207 207 obsolescence=False,
208 208 tmp_backup=True,
209 209 )
210 210
211 211 with ui.uninterruptible():
212 212 try:
213 213 with repo.transaction(b"strip") as tr:
214 214 # TODO this code violates the interface abstraction of the
215 215 # transaction and makes assumptions that file storage is
216 216 # using append-only files. We'll need some kind of storage
217 217 # API to handle stripping for us.
218 218 oldfiles = set(tr._offsetmap.keys())
219 219 oldfiles.update(tr._newfiles)
220 220
221 221 tr.startgroup()
222 222 cl.strip(striprev, tr)
223 223 stripmanifest(repo, striprev, tr, files)
224 224
225 225 for fn in files:
226 226 repo.file(fn).strip(striprev, tr)
227 227 tr.endgroup()
228 228
229 229 entries = tr.readjournal()
230 230
231 231 for file, troffset in entries:
232 232 if file in oldfiles:
233 233 continue
234 234 with repo.svfs(file, b'a', checkambig=True) as fp:
235 235 fp.truncate(troffset)
236 236 if troffset == 0:
237 237 repo.store.markremoved(file)
238 238
239 239 deleteobsmarkers(repo.obsstore, stripobsidx)
240 240 del repo.obsstore
241 241 repo.invalidatevolatilesets()
242 242 repo._phasecache.filterunknown(repo)
243 243
244 244 if tmpbundlefile:
245 245 ui.note(_(b"adding branch\n"))
246 246 f = vfs.open(tmpbundlefile, b"rb")
247 247 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
248 248 # silence internal shuffling chatter
249 249 maybe_silent = (
250 250 repo.ui.silent()
251 251 if not repo.ui.verbose
252 252 else util.nullcontextmanager()
253 253 )
254 254 with maybe_silent:
255 255 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
256 256 txnname = b'strip'
257 257 if not isinstance(gen, bundle2.unbundle20):
258 258 txnname = b"strip\n%s" % urlutil.hidepassword(
259 259 tmpbundleurl
260 260 )
261 261 with repo.transaction(txnname) as tr:
262 262 bundle2.applybundle(
263 263 repo, gen, tr, source=b'strip', url=tmpbundleurl
264 264 )
265 265 f.close()
266 266
267 267 with repo.transaction(b'repair') as tr:
268 268 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
269 269 repo._bookmarks.applychanges(repo, tr, bmchanges)
270 270
271 271 transaction.cleanup_undo_files(repo.ui.warn, repo.vfs_map)
272 272
273 273 except: # re-raises
274 274 if backupfile:
275 275 ui.warn(
276 276 _(b"strip failed, backup bundle stored in '%s'\n")
277 277 % vfs.join(backupfile)
278 278 )
279 279 if tmpbundlefile:
280 280 ui.warn(
281 281 _(b"strip failed, unrecovered changes stored in '%s'\n")
282 282 % vfs.join(tmpbundlefile)
283 283 )
284 284 ui.warn(
285 285 _(
286 286 b"(fix the problem, then recover the changesets with "
287 287 b"\"hg unbundle '%s'\")\n"
288 288 )
289 289 % vfs.join(tmpbundlefile)
290 290 )
291 291 raise
292 292 else:
293 293 if tmpbundlefile:
294 294 # Remove temporary bundle only if there were no exceptions
295 295 vfs.unlink(tmpbundlefile)
296 296
297 297 repo.destroyed()
298 298 # return the backup file path (or None if 'backup' was False) so
299 299 # extensions can use it
300 300 return backupfile
301 301
302 302
303 303 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
304 304 """perform a "soft" strip using the archived phase"""
305 305 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
306 306 if not tostrip:
307 307 return None
308 308
309 309 backupfile = None
310 310 if backup:
311 311 node = tostrip[0]
312 312 backupfile = _createstripbackup(repo, tostrip, node, topic)
313 313
314 314 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
315 315 with repo.transaction(b'strip') as tr:
316 316 phases.retractboundary(repo, tr, phases.archived, tostrip)
317 317 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
318 318 repo._bookmarks.applychanges(repo, tr, bmchanges)
319 319 return backupfile
320 320
321 321
322 322 def _bookmarkmovements(repo, tostrip):
323 323 # compute necessary bookmark movement
324 324 bm = repo._bookmarks
325 325 updatebm = []
326 326 for m in bm:
327 327 rev = repo[bm[m]].rev()
328 328 if rev in tostrip:
329 329 updatebm.append(m)
330 330 newbmtarget = None
331 331 # If we need to move bookmarks, compute bookmark
332 332 # targets. Otherwise we can skip doing this logic.
333 333 if updatebm:
334 334 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
335 335 # but is much faster
336 336 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
337 337 if newbmtarget:
338 338 newbmtarget = repo[newbmtarget.first()].node()
339 339 else:
340 340 newbmtarget = b'.'
341 341 return newbmtarget, updatebm
342 342
343 343
344 344 def _createstripbackup(repo, stripbases, node, topic):
345 345 # backup the changeset we are about to strip
346 346 vfs = repo.vfs
347 347 unfi = repo.unfiltered()
348 348 to_node = unfi.changelog.node
349 349 # internal changeset are internal implementation details that should not
350 350 # leave the repository and not be exposed to the users. In addition feature
351 351 # using them requires to be resistant to strip. See test case for more
352 352 # details.
353 353 all_backup = unfi.revs(
354 354 b"(%ln)::(%ld) and not _internal()",
355 355 stripbases,
356 356 unfi.changelog.headrevs(),
357 357 )
358 358 if not all_backup:
359 359 return None
360 360
361 361 def to_nodes(revs):
362 362 return [to_node(r) for r in revs]
363 363
364 364 bases = to_nodes(unfi.revs("roots(%ld)", all_backup))
365 365 heads = to_nodes(unfi.revs("heads(%ld)", all_backup))
366 366 backupfile = backupbundle(repo, bases, heads, node, topic)
367 367 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
368 368 repo.ui.log(
369 369 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
370 370 )
371 371 return backupfile
372 372
373 373
374 374 def safestriproots(ui, repo, nodes):
375 375 """return list of roots of nodes where descendants are covered by nodes"""
376 376 torev = repo.unfiltered().changelog.rev
377 377 revs = {torev(n) for n in nodes}
378 378 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
379 379 # orphaned = affected - wanted
380 380 # affected = descendants(roots(wanted))
381 381 # wanted = revs
382 382 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
383 383 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
384 384 notstrip = revs - tostrip
385 385 if notstrip:
386 386 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
387 387 ui.warn(
388 388 _(b'warning: orphaned descendants detected, not stripping %s\n')
389 389 % nodestr
390 390 )
391 391 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
392 392
393 393
394 394 class stripcallback:
395 395 """used as a transaction postclose callback"""
396 396
397 397 def __init__(self, ui, repo, backup, topic):
398 398 self.ui = ui
399 399 self.repo = repo
400 400 self.backup = backup
401 401 self.topic = topic or b'backup'
402 402 self.nodelist = []
403 403
404 404 def addnodes(self, nodes):
405 405 self.nodelist.extend(nodes)
406 406
407 407 def __call__(self, tr):
408 408 roots = safestriproots(self.ui, self.repo, self.nodelist)
409 409 if roots:
410 410 strip(self.ui, self.repo, roots, self.backup, self.topic)
411 411
412 412
413 413 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
414 414 """like strip, but works inside transaction and won't strip irreverent revs
415 415
416 416 nodelist must explicitly contain all descendants. Otherwise a warning will
417 417 be printed that some nodes are not stripped.
418 418
419 419 Will do a backup if `backup` is True. The last non-None "topic" will be
420 420 used as the backup topic name. The default backup topic name is "backup".
421 421 """
422 422 tr = repo.currenttransaction()
423 423 if not tr:
424 424 nodes = safestriproots(ui, repo, nodelist)
425 425 return strip(ui, repo, nodes, backup=backup, topic=topic)
426 426 # transaction postclose callbacks are called in alphabet order.
427 427 # use '\xff' as prefix so we are likely to be called last.
428 428 callback = tr.getpostclose(b'\xffstrip')
429 429 if callback is None:
430 430 callback = stripcallback(ui, repo, backup=backup, topic=topic)
431 431 tr.addpostclose(b'\xffstrip', callback)
432 432 if topic:
433 433 callback.topic = topic
434 434 callback.addnodes(nodelist)
435 435
436 436
437 437 def stripmanifest(repo, striprev, tr, files):
438 438 for revlog in manifestrevlogs(repo):
439 439 revlog.strip(striprev, tr)
440 440
441 441
442 442 def manifestrevlogs(repo):
443 443 yield repo.manifestlog.getstorage(b'')
444 444 if scmutil.istreemanifest(repo):
445 445 # This logic is safe if treemanifest isn't enabled, but also
446 446 # pointless, so we skip it if treemanifest isn't enabled.
447 for t, unencoded, size in repo.store.datafiles():
447 for entry in repo.store.datafiles():
448 unencoded = entry.unencoded_path
449 # XXX use the entry.revlog_type instead
448 450 if unencoded.startswith(b'meta/') and unencoded.endswith(
449 451 b'00manifest.i'
450 452 ):
451 453 dir = unencoded[5:-12]
452 454 yield repo.manifestlog.getstorage(dir)
453 455
454 456
455 457 def rebuildfncache(ui, repo, only_data=False):
456 458 """Rebuilds the fncache file from repo history.
457 459
458 460 Missing entries will be added. Extra entries will be removed.
459 461 """
460 462 repo = repo.unfiltered()
461 463
462 464 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
463 465 ui.warn(
464 466 _(
465 467 b'(not rebuilding fncache because repository does not '
466 468 b'support fncache)\n'
467 469 )
468 470 )
469 471 return
470 472
471 473 with repo.lock():
472 474 fnc = repo.store.fncache
473 475 fnc.ensureloaded(warn=ui.warn)
474 476
475 477 oldentries = set(fnc.entries)
476 478 newentries = set()
477 479 seenfiles = set()
478 480
479 481 if only_data:
480 482 # Trust the listing of .i from the fncache, but not the .d. This is
481 483 # much faster, because we only need to stat every possible .d files,
482 484 # instead of reading the full changelog
483 485 for f in fnc:
484 486 if f[:5] == b'data/' and f[-2:] == b'.i':
485 487 seenfiles.add(f[5:-2])
486 488 newentries.add(f)
487 489 dataf = f[:-2] + b'.d'
488 490 if repo.store._exists(dataf):
489 491 newentries.add(dataf)
490 492 else:
491 493 progress = ui.makeprogress(
492 494 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
493 495 )
494 496 for rev in repo:
495 497 progress.update(rev)
496 498
497 499 ctx = repo[rev]
498 500 for f in ctx.files():
499 501 # This is to minimize I/O.
500 502 if f in seenfiles:
501 503 continue
502 504 seenfiles.add(f)
503 505
504 506 i = b'data/%s.i' % f
505 507 d = b'data/%s.d' % f
506 508
507 509 if repo.store._exists(i):
508 510 newentries.add(i)
509 511 if repo.store._exists(d):
510 512 newentries.add(d)
511 513
512 514 progress.complete()
513 515
514 516 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
515 517 # This logic is safe if treemanifest isn't enabled, but also
516 518 # pointless, so we skip it if treemanifest isn't enabled.
517 519 for dir in pathutil.dirs(seenfiles):
518 520 i = b'meta/%s/00manifest.i' % dir
519 521 d = b'meta/%s/00manifest.d' % dir
520 522
521 523 if repo.store._exists(i):
522 524 newentries.add(i)
523 525 if repo.store._exists(d):
524 526 newentries.add(d)
525 527
526 528 addcount = len(newentries - oldentries)
527 529 removecount = len(oldentries - newentries)
528 530 for p in sorted(oldentries - newentries):
529 531 ui.write(_(b'removing %s\n') % p)
530 532 for p in sorted(newentries - oldentries):
531 533 ui.write(_(b'adding %s\n') % p)
532 534
533 535 if addcount or removecount:
534 536 ui.write(
535 537 _(b'%d items added, %d removed from fncache\n')
536 538 % (addcount, removecount)
537 539 )
538 540 fnc.entries = newentries
539 541 fnc._dirty = True
540 542
541 543 with repo.transaction(b'fncache') as tr:
542 544 fnc.write(tr)
543 545 else:
544 546 ui.write(_(b'fncache already up to date\n'))
545 547
546 548
547 549 def deleteobsmarkers(obsstore, indices):
548 550 """Delete some obsmarkers from obsstore and return how many were deleted
549 551
550 552 'indices' is a list of ints which are the indices
551 553 of the markers to be deleted.
552 554
553 555 Every invocation of this function completely rewrites the obsstore file,
554 556 skipping the markers we want to be removed. The new temporary file is
555 557 created, remaining markers are written there and on .close() this file
556 558 gets atomically renamed to obsstore, thus guaranteeing consistency."""
557 559 if not indices:
558 560 # we don't want to rewrite the obsstore with the same content
559 561 return
560 562
561 563 left = []
562 564 current = obsstore._all
563 565 n = 0
564 566 for i, m in enumerate(current):
565 567 if i in indices:
566 568 n += 1
567 569 continue
568 570 left.append(m)
569 571
570 572 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
571 573 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
572 574 newobsstorefile.write(bytes)
573 575 newobsstorefile.close()
574 576 return n
@@ -1,883 +1,888 b''
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl._generaldelta = rl._generaldelta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 rl._loadindex()
130 130
131 131
132 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 133 """censors a revision in a "version 2" revlog"""
134 134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 136
137 137 censor_revs = {revlog.rev(censornode)}
138 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 139
140 140
141 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 142 """rewrite a revlog to censor some of its content
143 143
144 144 General principle
145 145
146 146 We create new revlog files (index/data/sidedata) to copy the content of
147 147 the existing data without the censored data.
148 148
149 149 We need to recompute new delta for any revision that used the censored
150 150 revision as delta base. As the cumulative size of the new delta may be
151 151 large, we store them in a temporary file until they are stored in their
152 152 final destination.
153 153
154 154 All data before the censored data can be blindly copied. The rest needs
155 155 to be copied as we go and the associated index entry needs adjustement.
156 156 """
157 157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 159
160 160 old_index = revlog.index
161 161 docket = revlog._docket
162 162
163 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 164
165 165 first_excl_rev = min(censor_revs)
166 166
167 167 first_excl_entry = revlog.index[first_excl_rev]
168 168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 171
172 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 173 # rev β†’ (new_base, data_start, data_end, compression_mode)
174 174 rewritten_entries = _precompute_rewritten_delta(
175 175 revlog,
176 176 old_index,
177 177 censor_revs,
178 178 tmp_storage,
179 179 )
180 180
181 181 all_files = _setup_new_files(
182 182 revlog,
183 183 index_cutoff,
184 184 data_cutoff,
185 185 sidedata_cutoff,
186 186 )
187 187
188 188 # we dont need to open the old index file since its content already
189 189 # exist in a usable form in `old_index`.
190 190 with all_files() as open_files:
191 191 (
192 192 old_data_file,
193 193 old_sidedata_file,
194 194 new_index_file,
195 195 new_data_file,
196 196 new_sidedata_file,
197 197 ) = open_files
198 198
199 199 # writing the censored revision
200 200
201 201 # Writing all subsequent revisions
202 202 for rev in range(first_excl_rev, len(old_index)):
203 203 if rev in censor_revs:
204 204 _rewrite_censor(
205 205 revlog,
206 206 old_index,
207 207 open_files,
208 208 rev,
209 209 tombstone,
210 210 )
211 211 else:
212 212 _rewrite_simple(
213 213 revlog,
214 214 old_index,
215 215 open_files,
216 216 rev,
217 217 rewritten_entries,
218 218 tmp_storage,
219 219 )
220 220 docket.write(transaction=None, stripping=True)
221 221
222 222
223 223 def _precompute_rewritten_delta(
224 224 revlog,
225 225 old_index,
226 226 excluded_revs,
227 227 tmp_storage,
228 228 ):
229 229 """Compute new delta for revisions whose delta is based on revision that
230 230 will not survive as is.
231 231
232 232 Return a mapping: {rev β†’ (new_base, data_start, data_end, compression_mode)}
233 233 """
234 234 dc = deltas.deltacomputer(revlog)
235 235 rewritten_entries = {}
236 236 first_excl_rev = min(excluded_revs)
237 237 with revlog._segmentfile._open_read() as dfh:
238 238 for rev in range(first_excl_rev, len(old_index)):
239 239 if rev in excluded_revs:
240 240 # this revision will be preserved as is, so we don't need to
241 241 # consider recomputing a delta.
242 242 continue
243 243 entry = old_index[rev]
244 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 245 continue
246 246 # This is a revision that use the censored revision as the base
247 247 # for its delta. We need a need new deltas
248 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 249 # this revision is empty, we can delta against nullrev
250 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 251 else:
252 252
253 253 text = revlog.rawdata(rev, _df=dfh)
254 254 info = revlogutils.revisioninfo(
255 255 node=entry[ENTRY_NODE_ID],
256 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 258 btext=[text],
259 259 textlen=len(text),
260 260 cachedelta=None,
261 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 262 )
263 263 d = dc.finddeltainfo(
264 264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 265 )
266 266 default_comp = revlog._docket.default_compression_header
267 267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 268 # using `tell` is a bit lazy, but we are not here for speed
269 269 start = tmp_storage.tell()
270 270 tmp_storage.write(d.data[1])
271 271 end = tmp_storage.tell()
272 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 273 return rewritten_entries
274 274
275 275
276 276 def _setup_new_files(
277 277 revlog,
278 278 index_cutoff,
279 279 data_cutoff,
280 280 sidedata_cutoff,
281 281 ):
282 282 """
283 283
284 284 return a context manager to open all the relevant files:
285 285 - old_data_file,
286 286 - old_sidedata_file,
287 287 - new_index_file,
288 288 - new_data_file,
289 289 - new_sidedata_file,
290 290
291 291 The old_index_file is not here because it is accessed through the
292 292 `old_index` object if the caller function.
293 293 """
294 294 docket = revlog._docket
295 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 298
299 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 302
303 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 305 util.copyfile(
306 306 old_sidedata_filepath,
307 307 new_sidedata_filepath,
308 308 nb_bytes=sidedata_cutoff,
309 309 )
310 310 revlog.opener.register_file(docket.index_filepath())
311 311 revlog.opener.register_file(docket.data_filepath())
312 312 revlog.opener.register_file(docket.sidedata_filepath())
313 313
314 314 docket.index_end = index_cutoff
315 315 docket.data_end = data_cutoff
316 316 docket.sidedata_end = sidedata_cutoff
317 317
318 318 # reload the revlog internal information
319 319 revlog.clearcaches()
320 320 revlog._loadindex(docket=docket)
321 321
322 322 @contextlib.contextmanager
323 323 def all_files_opener():
324 324 # hide opening in an helper function to please check-code, black
325 325 # and various python version at the same time
326 326 with open(old_data_filepath, 'rb') as old_data_file:
327 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 328 with open(new_index_filepath, 'r+b') as new_index_file:
329 329 with open(new_data_filepath, 'r+b') as new_data_file:
330 330 with open(
331 331 new_sidedata_filepath, 'r+b'
332 332 ) as new_sidedata_file:
333 333 new_index_file.seek(0, os.SEEK_END)
334 334 assert new_index_file.tell() == index_cutoff
335 335 new_data_file.seek(0, os.SEEK_END)
336 336 assert new_data_file.tell() == data_cutoff
337 337 new_sidedata_file.seek(0, os.SEEK_END)
338 338 assert new_sidedata_file.tell() == sidedata_cutoff
339 339 yield (
340 340 old_data_file,
341 341 old_sidedata_file,
342 342 new_index_file,
343 343 new_data_file,
344 344 new_sidedata_file,
345 345 )
346 346
347 347 return all_files_opener
348 348
349 349
350 350 def _rewrite_simple(
351 351 revlog,
352 352 old_index,
353 353 all_files,
354 354 rev,
355 355 rewritten_entries,
356 356 tmp_storage,
357 357 ):
358 358 """append a normal revision to the index after the rewritten one(s)"""
359 359 (
360 360 old_data_file,
361 361 old_sidedata_file,
362 362 new_index_file,
363 363 new_data_file,
364 364 new_sidedata_file,
365 365 ) = all_files
366 366 entry = old_index[rev]
367 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 369
370 370 if rev not in rewritten_entries:
371 371 old_data_file.seek(old_data_offset)
372 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 373 new_data = old_data_file.read(new_data_size)
374 374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 376 else:
377 377 (
378 378 data_delta_base,
379 379 start,
380 380 end,
381 381 d_comp_mode,
382 382 ) = rewritten_entries[rev]
383 383 new_data_size = end - start
384 384 tmp_storage.seek(start)
385 385 new_data = tmp_storage.read(new_data_size)
386 386
387 387 # It might be faster to group continuous read/write operation,
388 388 # however, this is censor, an operation that is not focussed
389 389 # around stellar performance. So I have not written this
390 390 # optimisation yet.
391 391 new_data_offset = new_data_file.tell()
392 392 new_data_file.write(new_data)
393 393
394 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 395 new_sidedata_offset = new_sidedata_file.tell()
396 396 if 0 < sidedata_size:
397 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 398 old_sidedata_file.seek(old_sidedata_offset)
399 399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 400 new_sidedata_file.write(new_sidedata)
401 401
402 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 404 assert data_delta_base <= rev, (data_delta_base, rev)
405 405
406 406 new_entry = revlogutils.entry(
407 407 flags=flags,
408 408 data_offset=new_data_offset,
409 409 data_compressed_length=new_data_size,
410 410 data_uncompressed_length=data_uncompressed_length,
411 411 data_delta_base=data_delta_base,
412 412 link_rev=entry[ENTRY_LINK_REV],
413 413 parent_rev_1=entry[ENTRY_PARENT_1],
414 414 parent_rev_2=entry[ENTRY_PARENT_2],
415 415 node_id=entry[ENTRY_NODE_ID],
416 416 sidedata_offset=new_sidedata_offset,
417 417 sidedata_compressed_length=sidedata_size,
418 418 data_compression_mode=d_comp_mode,
419 419 sidedata_compression_mode=sd_com_mode,
420 420 )
421 421 revlog.index.append(new_entry)
422 422 entry_bin = revlog.index.entry_binary(rev)
423 423 new_index_file.write(entry_bin)
424 424
425 425 revlog._docket.index_end = new_index_file.tell()
426 426 revlog._docket.data_end = new_data_file.tell()
427 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 428
429 429
430 430 def _rewrite_censor(
431 431 revlog,
432 432 old_index,
433 433 all_files,
434 434 rev,
435 435 tombstone,
436 436 ):
437 437 """rewrite and append a censored revision"""
438 438 (
439 439 old_data_file,
440 440 old_sidedata_file,
441 441 new_index_file,
442 442 new_data_file,
443 443 new_sidedata_file,
444 444 ) = all_files
445 445 entry = old_index[rev]
446 446
447 447 # XXX consider trying the default compression too
448 448 new_data_size = len(tombstone)
449 449 new_data_offset = new_data_file.tell()
450 450 new_data_file.write(tombstone)
451 451
452 452 # we are not adding any sidedata as they might leak info about the censored version
453 453
454 454 link_rev = entry[ENTRY_LINK_REV]
455 455
456 456 p1 = entry[ENTRY_PARENT_1]
457 457 p2 = entry[ENTRY_PARENT_2]
458 458
459 459 new_entry = revlogutils.entry(
460 460 flags=constants.REVIDX_ISCENSORED,
461 461 data_offset=new_data_offset,
462 462 data_compressed_length=new_data_size,
463 463 data_uncompressed_length=new_data_size,
464 464 data_delta_base=rev,
465 465 link_rev=link_rev,
466 466 parent_rev_1=p1,
467 467 parent_rev_2=p2,
468 468 node_id=entry[ENTRY_NODE_ID],
469 469 sidedata_offset=0,
470 470 sidedata_compressed_length=0,
471 471 data_compression_mode=COMP_MODE_PLAIN,
472 472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 473 )
474 474 revlog.index.append(new_entry)
475 475 entry_bin = revlog.index.entry_binary(rev)
476 476 new_index_file.write(entry_bin)
477 477 revlog._docket.index_end = new_index_file.tell()
478 478 revlog._docket.data_end = new_data_file.tell()
479 479
480 480
481 481 def _get_filename_from_filelog_index(path):
482 482 # Drop the extension and the `data/` prefix
483 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 484 if len(path_part) < 2:
485 485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 486 msg %= path
487 487 raise error.Abort(msg)
488 488
489 489 return path_part[1]
490 490
491 491
492 492 def _filelog_from_filename(repo, path):
493 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 494
495 495 from .. import filelog # avoid cycle
496 496
497 497 fl = filelog.filelog(repo.svfs, path)
498 498 return fl
499 499
500 500
501 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 503 from ..pure import parsers # avoid cycle
504 504
505 505 if repo._currentlock(repo._lockref) is None:
506 506 # Let's be paranoid about it
507 507 msg = "repo needs to be locked to rewrite parents"
508 508 raise error.ProgrammingError(msg)
509 509
510 510 index_format = parsers.IndexObject.index_format
511 511 entry = rl.index[rev]
512 512 new_entry = list(entry)
513 513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 514 packed = index_format.pack(*new_entry[:8])
515 515 fp.seek(offset)
516 516 fp.write(packed)
517 517
518 518
519 519 def _reorder_filelog_parents(repo, fl, to_fix):
520 520 """
521 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 522 new version to disk, overwriting the old one with a rename.
523 523 """
524 524 from ..pure import parsers # avoid cycle
525 525
526 526 ui = repo.ui
527 527 assert len(to_fix) > 0
528 528 rl = fl._revlog
529 529 if rl._format_version != constants.REVLOGV1:
530 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 531 raise error.ProgrammingError(msg)
532 532
533 533 index_file = rl._indexfile
534 534 new_file_path = index_file + b'.tmp-parents-fix'
535 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 536
537 537 with ui.uninterruptible():
538 538 try:
539 539 util.copyfile(
540 540 rl.opener.join(index_file),
541 541 rl.opener.join(new_file_path),
542 542 checkambig=rl._checkambig,
543 543 )
544 544
545 545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 546 if rl._inline:
547 547 index = parsers.InlinedIndexObject(fp.read())
548 548 for rev in fl.revs():
549 549 if rev in to_fix:
550 550 offset = index._calculate_index(rev)
551 551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 552 ui.write(repaired_msg % (rev, index_file))
553 553 else:
554 554 index_format = parsers.IndexObject.index_format
555 555 for rev in to_fix:
556 556 offset = rev * index_format.size
557 557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 558 ui.write(repaired_msg % (rev, index_file))
559 559
560 560 rl.opener.rename(new_file_path, index_file)
561 561 rl.clearcaches()
562 562 rl._loadindex()
563 563 finally:
564 564 util.tryunlink(new_file_path)
565 565
566 566
567 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 568 full_text = lambda: fl._revlog.rawdata(filerev)
569 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 570 return _is_revision_affected_inner(
571 571 full_text, parent_revs, filerev, metadata_cache
572 572 )
573 573
574 574
575 575 def _is_revision_affected_inner(
576 576 full_text,
577 577 parents_revs,
578 578 filerev,
579 579 metadata_cache=None,
580 580 ):
581 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 582 special meaning compared to the reverse in the context of filelog-based
583 583 copytracing. issue6528 exists because new code assumed that parent ordering
584 584 didn't matter, so this detects if the revision contains metadata (since
585 585 it's only used for filelog-based copytracing) and its parents are in the
586 586 "wrong" order."""
587 587 try:
588 588 raw_text = full_text()
589 589 except error.CensoredNodeError:
590 590 # We don't care about censored nodes as they never carry metadata
591 591 return False
592 592
593 593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 595 if metadata_cache is not None:
596 596 metadata_cache[filerev] = has_meta
597 597 if has_meta:
598 598 (p1, p2) = parents_revs()
599 599 if p1 != nullrev and p2 == nullrev:
600 600 return True
601 601 return False
602 602
603 603
604 604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 605 rl = fl._revlog
606 606 is_censored = lambda: rl.iscensored(filerev)
607 607 delta_base = lambda: rl.deltaparent(filerev)
608 608 delta = lambda: rl._chunk(filerev)
609 609 full_text = lambda: rl.rawdata(filerev)
610 610 parent_revs = lambda: rl.parentrevs(filerev)
611 611 return _is_revision_affected_fast_inner(
612 612 is_censored,
613 613 delta_base,
614 614 delta,
615 615 full_text,
616 616 parent_revs,
617 617 filerev,
618 618 metadata_cache,
619 619 )
620 620
621 621
622 622 def _is_revision_affected_fast_inner(
623 623 is_censored,
624 624 delta_base,
625 625 delta,
626 626 full_text,
627 627 parent_revs,
628 628 filerev,
629 629 metadata_cache,
630 630 ):
631 631 """Optimization fast-path for `_is_revision_affected`.
632 632
633 633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 634 revision to check if its base has metadata, saving computation of the full
635 635 text, instead looking at the current delta.
636 636
637 637 This optimization only works if the revisions are looked at in order."""
638 638
639 639 if is_censored():
640 640 # Censored revisions don't contain metadata, so they cannot be affected
641 641 metadata_cache[filerev] = False
642 642 return False
643 643
644 644 p1, p2 = parent_revs()
645 645 if p1 == nullrev or p2 != nullrev:
646 646 return False
647 647
648 648 delta_parent = delta_base()
649 649 parent_has_metadata = metadata_cache.get(delta_parent)
650 650 if parent_has_metadata is None:
651 651 return _is_revision_affected_inner(
652 652 full_text,
653 653 parent_revs,
654 654 filerev,
655 655 metadata_cache,
656 656 )
657 657
658 658 chunk = delta()
659 659 if not len(chunk):
660 660 # No diff for this revision
661 661 return parent_has_metadata
662 662
663 663 header_length = 12
664 664 if len(chunk) < header_length:
665 665 raise error.Abort(_(b"patch cannot be decoded"))
666 666
667 667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668 668
669 669 if start < 2: # len(b'\x01\n') == 2
670 670 # This delta does *something* to the metadata marker (if any).
671 671 # Check it the slow way
672 672 is_affected = _is_revision_affected_inner(
673 673 full_text,
674 674 parent_revs,
675 675 filerev,
676 676 metadata_cache,
677 677 )
678 678 return is_affected
679 679
680 680 # The diff did not remove or add the metadata header, it's then in the same
681 681 # situation as its parent
682 682 metadata_cache[filerev] = parent_has_metadata
683 683 return parent_has_metadata
684 684
685 685
686 686 def _from_report(ui, repo, context, from_report, dry_run):
687 687 """
688 688 Fix the revisions given in the `from_report` file, but still checks if the
689 689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 690 where we'd swap well-ordered parents again.
691 691
692 692 See the doc for `debug_fix_issue6528` for the format documentation.
693 693 """
694 694 ui.write(_(b"loading report file '%s'\n") % from_report)
695 695
696 696 with context(), open(from_report, mode='rb') as f:
697 697 for line in f.read().split(b'\n'):
698 698 if not line:
699 699 continue
700 700 filenodes, filename = line.split(b' ', 1)
701 701 fl = _filelog_from_filename(repo, filename)
702 702 to_fix = set(
703 703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 704 )
705 705 excluded = set()
706 706
707 707 for filerev in to_fix:
708 708 if _is_revision_affected(fl, filerev):
709 709 msg = b"found affected revision %d for filelog '%s'\n"
710 710 ui.warn(msg % (filerev, filename))
711 711 else:
712 712 msg = _(b"revision %s of file '%s' is not affected\n")
713 713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 714 ui.warn(msg)
715 715 excluded.add(filerev)
716 716
717 717 to_fix = to_fix - excluded
718 718 if not to_fix:
719 719 msg = _(b"no affected revisions were found for '%s'\n")
720 720 ui.write(msg % filename)
721 721 continue
722 722 if not dry_run:
723 723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724 724
725 725
726 726 def filter_delta_issue6528(revlog, deltas_iter):
727 727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 728 metadata_cache = {}
729 729
730 730 deltacomputer = deltas.deltacomputer(revlog)
731 731
732 732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 733 (
734 734 node,
735 735 p1_node,
736 736 p2_node,
737 737 linknode,
738 738 deltabase,
739 739 delta,
740 740 flags,
741 741 sidedata,
742 742 ) = d
743 743
744 744 if not revlog.index.has_node(deltabase):
745 745 raise error.LookupError(
746 746 deltabase, revlog.radix, _(b'unknown parent')
747 747 )
748 748 base_rev = revlog.rev(deltabase)
749 749 if not revlog.index.has_node(p1_node):
750 750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 751 p1_rev = revlog.rev(p1_node)
752 752 if not revlog.index.has_node(p2_node):
753 753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 754 p2_rev = revlog.rev(p2_node)
755 755
756 756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 757 delta_base = lambda: revlog.rev(delta_base)
758 758 delta_base = lambda: base_rev
759 759 parent_revs = lambda: (p1_rev, p2_rev)
760 760
761 761 def full_text():
762 762 # note: being able to reuse the full text computation in the
763 763 # underlying addrevision would be useful however this is a bit too
764 764 # intrusive the for the "quick" issue6528 we are writing before the
765 765 # 5.8 release
766 766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767 767
768 768 revinfo = revlogutils.revisioninfo(
769 769 node,
770 770 p1_node,
771 771 p2_node,
772 772 [None],
773 773 textlen,
774 774 (base_rev, delta),
775 775 flags,
776 776 )
777 777 # cached by the global "writing" context
778 778 assert revlog._writinghandles is not None
779 779 if revlog._inline:
780 780 fh = revlog._writinghandles[0]
781 781 else:
782 782 fh = revlog._writinghandles[1]
783 783 return deltacomputer.buildtext(revinfo, fh)
784 784
785 785 is_affected = _is_revision_affected_fast_inner(
786 786 is_censored,
787 787 delta_base,
788 788 lambda: delta,
789 789 full_text,
790 790 parent_revs,
791 791 rev,
792 792 metadata_cache,
793 793 )
794 794 if is_affected:
795 795 d = (
796 796 node,
797 797 p2_node,
798 798 p1_node,
799 799 linknode,
800 800 deltabase,
801 801 delta,
802 802 flags,
803 803 sidedata,
804 804 )
805 805 yield d
806 806
807 807
808 808 def repair_issue6528(
809 809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
810 810 ):
811 811 from .. import store # avoid cycle
812 812
813 813 @contextlib.contextmanager
814 814 def context():
815 815 if dry_run or to_report: # No need for locking
816 816 yield
817 817 else:
818 818 with repo.wlock(), repo.lock():
819 819 yield
820 820
821 821 if from_report:
822 822 return _from_report(ui, repo, context, from_report, dry_run)
823 823
824 824 report_entries = []
825 825
826 826 with context():
827 827 files = list(
828 (file_type, path)
829 for (file_type, path, _s) in repo.store.datafiles()
830 if path.endswith(b'.i') and file_type & store.FILEFLAGS_FILELOG
828 entry
829 for entry in repo.store.datafiles()
830 if (
831 entry.unencoded_path.endswith(b'.i')
832 and entry.is_revlog
833 and entry.revlog_type == store.FILEFLAGS_FILELOG
834 )
831 835 )
832 836
833 837 progress = ui.makeprogress(
834 838 _(b"looking for affected revisions"),
835 839 unit=_(b"filelogs"),
836 840 total=len(files),
837 841 )
838 842 found_nothing = True
839 843
840 for file_type, path in files:
844 for entry in files:
845 path = entry.unencoded_path
841 846 progress.increment()
842 847 filename = _get_filename_from_filelog_index(path)
843 848 fl = _filelog_from_filename(repo, filename)
844 849
845 850 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
846 851 to_fix = set()
847 852 metadata_cache = {}
848 853 for filerev in fl.revs():
849 854 affected = _is_revision_affected_fast(
850 855 repo, fl, filerev, metadata_cache
851 856 )
852 857 if paranoid:
853 858 slow = _is_revision_affected(fl, filerev)
854 859 if slow != affected:
855 860 msg = _(b"paranoid check failed for '%s' at node %s")
856 861 node = binascii.hexlify(fl.node(filerev))
857 862 raise error.Abort(msg % (filename, node))
858 863 if affected:
859 864 msg = b"found affected revision %d for filelog '%s'\n"
860 865 ui.warn(msg % (filerev, path))
861 866 found_nothing = False
862 867 if not dry_run:
863 868 if to_report:
864 869 to_fix.add(binascii.hexlify(fl.node(filerev)))
865 870 else:
866 871 to_fix.add(filerev)
867 872
868 873 if to_fix:
869 874 to_fix = sorted(to_fix)
870 875 if to_report:
871 876 report_entries.append((filename, to_fix))
872 877 else:
873 878 _reorder_filelog_parents(repo, fl, to_fix)
874 879
875 880 if found_nothing:
876 881 ui.write(_(b"no affected revisions were found\n"))
877 882
878 883 if to_report and report_entries:
879 884 with open(to_report, mode="wb") as f:
880 885 for path, to_fix in report_entries:
881 886 f.write(b"%s %s\n" % (b",".join(to_fix), path))
882 887
883 888 progress.complete()
@@ -1,855 +1,901 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 from typing import Generator
13 14
14 15 from .i18n import _
15 16 from .pycompat import getattr
17 from .thirdparty import attr
16 18 from .node import hex
17 19 from . import (
18 20 changelog,
19 21 error,
20 22 manifest,
21 23 policy,
22 24 pycompat,
23 25 util,
24 26 vfs as vfsmod,
25 27 )
26 28 from .utils import hashutil
27 29
28 30 parsers = policy.importmod('parsers')
29 31 # how much bytes should be read from fncache in one read
30 32 # It is done to prevent loading large fncache files into memory
31 33 fncache_chunksize = 10 ** 6
32 34
33 35
34 36 def _matchtrackedpath(path, matcher):
35 37 """parses a fncache entry and returns whether the entry is tracking a path
36 38 matched by matcher or not.
37 39
38 40 If matcher is None, returns True"""
39 41
40 42 if matcher is None:
41 43 return True
42 44 path = decodedir(path)
43 45 if path.startswith(b'data/'):
44 46 return matcher(path[len(b'data/') : -len(b'.i')])
45 47 elif path.startswith(b'meta/'):
46 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47 49
48 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
49 51
50 52
51 53 # This avoids a collision between a file named foo and a dir named
52 54 # foo.i or foo.d
53 55 def _encodedir(path):
54 56 """
55 57 >>> _encodedir(b'data/foo.i')
56 58 'data/foo.i'
57 59 >>> _encodedir(b'data/foo.i/bla.i')
58 60 'data/foo.i.hg/bla.i'
59 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 62 'data/foo.i.hg.hg/bla.i'
61 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 65 """
64 66 return (
65 67 path.replace(b".hg/", b".hg.hg/")
66 68 .replace(b".i/", b".i.hg/")
67 69 .replace(b".d/", b".d.hg/")
68 70 )
69 71
70 72
71 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 74
73 75
74 76 def decodedir(path):
75 77 """
76 78 >>> decodedir(b'data/foo.i')
77 79 'data/foo.i'
78 80 >>> decodedir(b'data/foo.i.hg/bla.i')
79 81 'data/foo.i/bla.i'
80 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 83 'data/foo.i.hg/bla.i'
82 84 """
83 85 if b".hg/" not in path:
84 86 return path
85 87 return (
86 88 path.replace(b".d.hg/", b".d/")
87 89 .replace(b".i.hg/", b".i/")
88 90 .replace(b".hg.hg/", b".hg/")
89 91 )
90 92
91 93
92 94 def _reserved():
93 95 """characters that are problematic for filesystems
94 96
95 97 * ascii escapes (0..31)
96 98 * ascii hi (126..255)
97 99 * windows specials
98 100
99 101 these characters will be escaped by encodefunctions
100 102 """
101 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 104 for x in range(32):
103 105 yield x
104 106 for x in range(126, 256):
105 107 yield x
106 108 for x in winreserved:
107 109 yield x
108 110
109 111
110 112 def _buildencodefun():
111 113 """
112 114 >>> enc, dec = _buildencodefun()
113 115
114 116 >>> enc(b'nothing/special.txt')
115 117 'nothing/special.txt'
116 118 >>> dec(b'nothing/special.txt')
117 119 'nothing/special.txt'
118 120
119 121 >>> enc(b'HELLO')
120 122 '_h_e_l_l_o'
121 123 >>> dec(b'_h_e_l_l_o')
122 124 'HELLO'
123 125
124 126 >>> enc(b'hello:world?')
125 127 'hello~3aworld~3f'
126 128 >>> dec(b'hello~3aworld~3f')
127 129 'hello:world?'
128 130
129 131 >>> enc(b'the\\x07quick\\xADshot')
130 132 'the~07quick~adshot'
131 133 >>> dec(b'the~07quick~adshot')
132 134 'the\\x07quick\\xadshot'
133 135 """
134 136 e = b'_'
135 137 xchr = pycompat.bytechr
136 138 asciistr = list(map(xchr, range(127)))
137 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 140
139 141 cmap = {x: x for x in asciistr}
140 142 for x in _reserved():
141 143 cmap[xchr(x)] = b"~%02x" % x
142 144 for x in capitals + [ord(e)]:
143 145 cmap[xchr(x)] = e + xchr(x).lower()
144 146
145 147 dmap = {}
146 148 for k, v in cmap.items():
147 149 dmap[v] = k
148 150
149 151 def decode(s):
150 152 i = 0
151 153 while i < len(s):
152 154 for l in range(1, 4):
153 155 try:
154 156 yield dmap[s[i : i + l]]
155 157 i += l
156 158 break
157 159 except KeyError:
158 160 pass
159 161 else:
160 162 raise KeyError
161 163
162 164 return (
163 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 166 lambda s: b''.join(list(decode(s))),
165 167 )
166 168
167 169
168 170 _encodefname, _decodefname = _buildencodefun()
169 171
170 172
171 173 def encodefilename(s):
172 174 """
173 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 177 """
176 178 return _encodefname(encodedir(s))
177 179
178 180
179 181 def decodefilename(s):
180 182 """
181 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 185 """
184 186 return decodedir(_decodefname(s))
185 187
186 188
187 189 def _buildlowerencodefun():
188 190 """
189 191 >>> f = _buildlowerencodefun()
190 192 >>> f(b'nothing/special.txt')
191 193 'nothing/special.txt'
192 194 >>> f(b'HELLO')
193 195 'hello'
194 196 >>> f(b'hello:world?')
195 197 'hello~3aworld~3f'
196 198 >>> f(b'the\\x07quick\\xADshot')
197 199 'the~07quick~adshot'
198 200 """
199 201 xchr = pycompat.bytechr
200 202 cmap = {xchr(x): xchr(x) for x in range(127)}
201 203 for x in _reserved():
202 204 cmap[xchr(x)] = b"~%02x" % x
203 205 for x in range(ord(b"A"), ord(b"Z") + 1):
204 206 cmap[xchr(x)] = xchr(x).lower()
205 207
206 208 def lowerencode(s):
207 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 210
209 211 return lowerencode
210 212
211 213
212 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 215
214 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 219
218 220
219 221 def _auxencode(path, dotencode):
220 222 """
221 223 Encodes filenames containing names reserved by Windows or which end in
222 224 period or space. Does not touch other single reserved characters c.
223 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 226 Additionally encodes space or period at the beginning, if dotencode is
225 227 True. Parameter path is assumed to be all lowercase.
226 228 A segment only needs encoding if a reserved name appears as a
227 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 230 doesn't need encoding.
229 231
230 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 233 >>> _auxencode(s.split(b'/'), True)
232 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 236 >>> _auxencode(s.split(b'/'), False)
235 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 238 >>> _auxencode([b'foo. '], True)
237 239 ['foo.~20']
238 240 >>> _auxencode([b' .foo'], True)
239 241 ['~20.foo']
240 242 """
241 243 for i, n in enumerate(path):
242 244 if not n:
243 245 continue
244 246 if dotencode and n[0] in b'. ':
245 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 248 path[i] = n
247 249 else:
248 250 l = n.find(b'.')
249 251 if l == -1:
250 252 l = len(n)
251 253 if (l == 3 and n[:3] in _winres3) or (
252 254 l == 4
253 255 and n[3:4] <= b'9'
254 256 and n[3:4] >= b'1'
255 257 and n[:3] in _winres4
256 258 ):
257 259 # encode third letter ('aux' -> 'au~78')
258 260 ec = b"~%02x" % ord(n[2:3])
259 261 n = n[0:2] + ec + n[3:]
260 262 path[i] = n
261 263 if n[-1] in b'. ':
262 264 # encode last period or space ('foo...' -> 'foo..~2e')
263 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 266 return path
265 267
266 268
267 269 _maxstorepathlen = 120
268 270 _dirprefixlen = 8
269 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 272
271 273
272 274 def _hashencode(path, dotencode):
273 275 digest = hex(hashutil.sha1(path).digest())
274 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 277 parts = _auxencode(le, dotencode)
276 278 basename = parts[-1]
277 279 _root, ext = os.path.splitext(basename)
278 280 sdirs = []
279 281 sdirslen = 0
280 282 for p in parts[:-1]:
281 283 d = p[:_dirprefixlen]
282 284 if d[-1] in b'. ':
283 285 # Windows can't access dirs ending in period or space
284 286 d = d[:-1] + b'_'
285 287 if sdirslen == 0:
286 288 t = len(d)
287 289 else:
288 290 t = sdirslen + 1 + len(d)
289 291 if t > _maxshortdirslen:
290 292 break
291 293 sdirs.append(d)
292 294 sdirslen = t
293 295 dirs = b'/'.join(sdirs)
294 296 if len(dirs) > 0:
295 297 dirs += b'/'
296 298 res = b'dh/' + dirs + digest + ext
297 299 spaceleft = _maxstorepathlen - len(res)
298 300 if spaceleft > 0:
299 301 filler = basename[:spaceleft]
300 302 res = b'dh/' + dirs + filler + digest + ext
301 303 return res
302 304
303 305
304 306 def _hybridencode(path, dotencode):
305 307 """encodes path with a length limit
306 308
307 309 Encodes all paths that begin with 'data/', according to the following.
308 310
309 311 Default encoding (reversible):
310 312
311 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 314 characters are encoded as '~xx', where xx is the two digit hex code
313 315 of the character (see encodefilename).
314 316 Relevant path components consisting of Windows reserved filenames are
315 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 318
317 319 Hashed encoding (not reversible):
318 320
319 321 If the default-encoded path is longer than _maxstorepathlen, a
320 322 non-reversible hybrid hashing of the path is done instead.
321 323 This encoding uses up to _dirprefixlen characters of all directory
322 324 levels of the lowerencoded path, but not more levels than can fit into
323 325 _maxshortdirslen.
324 326 Then follows the filler followed by the sha digest of the full path.
325 327 The filler is the beginning of the basename of the lowerencoded path
326 328 (the basename is everything after the last path separator). The filler
327 329 is as long as possible, filling in characters from the basename until
328 330 the encoded path has _maxstorepathlen characters (or all chars of the
329 331 basename have been taken).
330 332 The extension (e.g. '.i' or '.d') is preserved.
331 333
332 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 335 encoding was used.
334 336 """
335 337 path = encodedir(path)
336 338 ef = _encodefname(path).split(b'/')
337 339 res = b'/'.join(_auxencode(ef, dotencode))
338 340 if len(res) > _maxstorepathlen:
339 341 res = _hashencode(path, dotencode)
340 342 return res
341 343
342 344
343 345 def _pathencode(path):
344 346 de = encodedir(path)
345 347 if len(path) > _maxstorepathlen:
346 348 return _hashencode(de, True)
347 349 ef = _encodefname(de).split(b'/')
348 350 res = b'/'.join(_auxencode(ef, True))
349 351 if len(res) > _maxstorepathlen:
350 352 return _hashencode(de, True)
351 353 return res
352 354
353 355
354 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 357
356 358
357 359 def _plainhybridencode(f):
358 360 return _hybridencode(f, False)
359 361
360 362
361 363 def _calcmode(vfs):
362 364 try:
363 365 # files in .hg/ will be created using this mode
364 366 mode = vfs.stat().st_mode
365 367 # avoid some useless chmods
366 368 if (0o777 & ~util.umask) == (0o777 & mode):
367 369 mode = None
368 370 except OSError:
369 371 mode = None
370 372 return mode
371 373
372 374
373 375 _data = [
374 376 b'bookmarks',
375 377 b'narrowspec',
376 378 b'data',
377 379 b'meta',
378 380 b'00manifest.d',
379 381 b'00manifest.i',
380 382 b'00changelog.d',
381 383 b'00changelog.i',
382 384 b'phaseroots',
383 385 b'obsstore',
384 386 b'requires',
385 387 ]
386 388
387 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 390 REVLOG_FILES_OTHER_EXT = (
389 391 b'.idx',
390 392 b'.d',
391 393 b'.dat',
392 394 b'.n',
393 395 b'.nd',
394 396 b'.sda',
395 397 )
396 398 # files that are "volatile" and might change between listing and streaming
397 399 #
398 400 # note: the ".nd" file are nodemap data and won't "change" but they might be
399 401 # deleted.
400 402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
401 403
402 404 # some exception to the above matching
403 405 #
404 406 # XXX This is currently not in use because of issue6542
405 407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
406 408
407 409
408 410 def is_revlog(f, kind, st):
409 411 if kind != stat.S_IFREG:
410 412 return None
411 413 return revlog_type(f)
412 414
413 415
414 416 def revlog_type(f):
415 417 # XXX we need to filter `undo.` created by the transaction here, however
416 418 # being naive about it also filter revlog for `undo.*` files, leading to
417 419 # issue6542. So we no longer use EXCLUDED.
418 420 if f.endswith(REVLOG_FILES_MAIN_EXT):
419 421 return FILEFLAGS_REVLOG_MAIN
420 422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
421 423 t = FILETYPE_FILELOG_OTHER
422 424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
423 425 t |= FILEFLAGS_VOLATILE
424 426 return t
425 427 return None
426 428
427 429
428 430 # the file is part of changelog data
429 431 FILEFLAGS_CHANGELOG = 1 << 13
430 432 # the file is part of manifest data
431 433 FILEFLAGS_MANIFESTLOG = 1 << 12
432 434 # the file is part of filelog data
433 435 FILEFLAGS_FILELOG = 1 << 11
434 436 # file that are not directly part of a revlog
435 437 FILEFLAGS_OTHER = 1 << 10
436 438
437 439 # the main entry point for a revlog
438 440 FILEFLAGS_REVLOG_MAIN = 1 << 1
439 441 # a secondary file for a revlog
440 442 FILEFLAGS_REVLOG_OTHER = 1 << 0
441 443
442 444 # files that are "volatile" and might change between listing and streaming
443 445 FILEFLAGS_VOLATILE = 1 << 20
444 446
445 447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
446 448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
447 449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
448 450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
449 451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
450 452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
451 453 FILETYPE_OTHER = FILEFLAGS_OTHER
452 454
453 455
456 @attr.s(slots=True)
457 class StoreEntry:
458 """An entry in the store
459
460 This is returned by `store.walk` and represent some data in the store."""
461
462 unencoded_path = attr.ib()
463 is_revlog = attr.ib(default=False)
464 revlog_type = attr.ib(default=None)
465 is_revlog_main = attr.ib(default=None)
466 is_volatile = attr.ib(default=False)
467 file_size = attr.ib(default=None)
468
469
454 470 class basicstore:
455 471 '''base class for local repository stores'''
456 472
457 473 def __init__(self, path, vfstype):
458 474 vfs = vfstype(path)
459 475 self.path = vfs.base
460 476 self.createmode = _calcmode(vfs)
461 477 vfs.createmode = self.createmode
462 478 self.rawvfs = vfs
463 479 self.vfs = vfsmod.filtervfs(vfs, encodedir)
464 480 self.opener = self.vfs
465 481
466 482 def join(self, f):
467 483 return self.path + b'/' + encodedir(f)
468 484
469 485 def _walk(self, relpath, recurse):
470 486 '''yields (revlog_type, unencoded, size)'''
471 487 path = self.path
472 488 if relpath:
473 489 path += b'/' + relpath
474 490 striplen = len(self.path) + 1
475 491 l = []
476 492 if self.rawvfs.isdir(path):
477 493 visit = [path]
478 494 readdir = self.rawvfs.readdir
479 495 while visit:
480 496 p = visit.pop()
481 497 for f, kind, st in readdir(p, stat=True):
482 498 fp = p + b'/' + f
483 499 rl_type = is_revlog(f, kind, st)
484 500 if rl_type is not None:
485 501 n = util.pconvert(fp[striplen:])
486 502 l.append((rl_type, decodedir(n), st.st_size))
487 503 elif kind == stat.S_IFDIR and recurse:
488 504 visit.append(fp)
489 505 l.sort()
490 506 return l
491 507
492 508 def changelog(self, trypending, concurrencychecker=None):
493 509 return changelog.changelog(
494 510 self.vfs,
495 511 trypending=trypending,
496 512 concurrencychecker=concurrencychecker,
497 513 )
498 514
499 515 def manifestlog(self, repo, storenarrowmatch):
500 516 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
501 517 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
502 518
503 def datafiles(self, matcher=None, undecodable=None):
519 def datafiles(
520 self, matcher=None, undecodable=None
521 ) -> Generator[StoreEntry, None, None]:
504 522 """Like walk, but excluding the changelog and root manifest.
505 523
506 524 When [undecodable] is None, revlogs names that can't be
507 525 decoded cause an exception. When it is provided, it should
508 526 be a list and the filenames that can't be decoded are added
509 527 to it instead. This is very rarely needed."""
510 528 files = self._walk(b'data', True) + self._walk(b'meta', True)
511 529 for (t, u, s) in files:
512 530 if t is not None:
513 yield (FILEFLAGS_FILELOG | t, u, s)
531 yield StoreEntry(
532 unencoded_path=u,
533 is_revlog=True,
534 revlog_type=FILEFLAGS_FILELOG,
535 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
536 is_volatile=bool(t & FILEFLAGS_VOLATILE),
537 file_size=s,
538 )
514 539
515 def topfiles(self):
540 def topfiles(self) -> Generator[StoreEntry, None, None]:
516 541 # yield manifest before changelog
517 542 files = reversed(self._walk(b'', False))
518 543 for (t, u, s) in files:
519 544 if u.startswith(b'00changelog'):
520 yield (FILEFLAGS_CHANGELOG | t, u, s)
545 revlog_type = FILEFLAGS_CHANGELOG
521 546 elif u.startswith(b'00manifest'):
522 yield (FILEFLAGS_MANIFESTLOG | t, u, s)
547 revlog_type = FILEFLAGS_MANIFESTLOG
523 548 else:
524 yield (FILETYPE_OTHER | t, u, s)
549 revlog_type = None
550 yield StoreEntry(
551 unencoded_path=u,
552 is_revlog=revlog_type is not None,
553 revlog_type=revlog_type,
554 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
555 is_volatile=bool(t & FILEFLAGS_VOLATILE),
556 file_size=s,
557 )
525 558
526 def walk(self, matcher=None):
559 def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
527 560 """return files related to data storage (ie: revlogs)
528 561
529 562 yields (file_type, unencoded, size)
530 563
531 564 if a matcher is passed, storage files of only those tracked paths
532 565 are passed with matches the matcher
533 566 """
534 567 # yield data files first
535 568 for x in self.datafiles(matcher):
536 569 yield x
537 570 for x in self.topfiles():
538 571 yield x
539 572
540 573 def copylist(self):
541 574 return _data
542 575
543 576 def write(self, tr):
544 577 pass
545 578
546 579 def invalidatecaches(self):
547 580 pass
548 581
549 582 def markremoved(self, fn):
550 583 pass
551 584
552 585 def __contains__(self, path):
553 586 '''Checks if the store contains path'''
554 587 path = b"/".join((b"data", path))
555 588 # file?
556 589 if self.vfs.exists(path + b".i"):
557 590 return True
558 591 # dir?
559 592 if not path.endswith(b"/"):
560 593 path = path + b"/"
561 594 return self.vfs.exists(path)
562 595
563 596
564 597 class encodedstore(basicstore):
565 598 def __init__(self, path, vfstype):
566 599 vfs = vfstype(path + b'/store')
567 600 self.path = vfs.base
568 601 self.createmode = _calcmode(vfs)
569 602 vfs.createmode = self.createmode
570 603 self.rawvfs = vfs
571 604 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
572 605 self.opener = self.vfs
573 606
574 607 # note: topfiles would also need a decode phase. It is just that in
575 608 # practice we do not have any file outside of `data/` that needs encoding.
576 609 # However that might change so we should probably add a test and encoding
577 610 # decoding for it too. see issue6548
578 611
579 def datafiles(self, matcher=None, undecodable=None):
580 for t, f1, size in super(encodedstore, self).datafiles():
612 def datafiles(
613 self, matcher=None, undecodable=None
614 ) -> Generator[StoreEntry, None, None]:
615 for entry in super(encodedstore, self).datafiles():
581 616 try:
617 f1 = entry.unencoded_path
582 618 f2 = decodefilename(f1)
583 619 except KeyError:
584 620 if undecodable is None:
585 621 msg = _(b'undecodable revlog name %s') % f1
586 622 raise error.StorageError(msg)
587 623 else:
588 624 undecodable.append(f1)
589 625 continue
590 626 if not _matchtrackedpath(f2, matcher):
591 627 continue
592 yield t, f2, size
628 entry.unencoded_path = f2
629 yield entry
593 630
594 631 def join(self, f):
595 632 return self.path + b'/' + encodefilename(f)
596 633
597 634 def copylist(self):
598 635 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
599 636
600 637
601 638 class fncache:
602 639 # the filename used to be partially encoded
603 640 # hence the encodedir/decodedir dance
604 641 def __init__(self, vfs):
605 642 self.vfs = vfs
606 643 self._ignores = set()
607 644 self.entries = None
608 645 self._dirty = False
609 646 # set of new additions to fncache
610 647 self.addls = set()
611 648
612 649 def ensureloaded(self, warn=None):
613 650 """read the fncache file if not already read.
614 651
615 652 If the file on disk is corrupted, raise. If warn is provided,
616 653 warn and keep going instead."""
617 654 if self.entries is None:
618 655 self._load(warn)
619 656
620 657 def _load(self, warn=None):
621 658 '''fill the entries from the fncache file'''
622 659 self._dirty = False
623 660 try:
624 661 fp = self.vfs(b'fncache', mode=b'rb')
625 662 except IOError:
626 663 # skip nonexistent file
627 664 self.entries = set()
628 665 return
629 666
630 667 self.entries = set()
631 668 chunk = b''
632 669 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
633 670 chunk += c
634 671 try:
635 672 p = chunk.rindex(b'\n')
636 673 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
637 674 chunk = chunk[p + 1 :]
638 675 except ValueError:
639 676 # substring '\n' not found, maybe the entry is bigger than the
640 677 # chunksize, so let's keep iterating
641 678 pass
642 679
643 680 if chunk:
644 681 msg = _(b"fncache does not ends with a newline")
645 682 if warn:
646 683 warn(msg + b'\n')
647 684 else:
648 685 raise error.Abort(
649 686 msg,
650 687 hint=_(
651 688 b"use 'hg debugrebuildfncache' to "
652 689 b"rebuild the fncache"
653 690 ),
654 691 )
655 692 self._checkentries(fp, warn)
656 693 fp.close()
657 694
658 695 def _checkentries(self, fp, warn):
659 696 """make sure there is no empty string in entries"""
660 697 if b'' in self.entries:
661 698 fp.seek(0)
662 699 for n, line in enumerate(fp):
663 700 if not line.rstrip(b'\n'):
664 701 t = _(b'invalid entry in fncache, line %d') % (n + 1)
665 702 if warn:
666 703 warn(t + b'\n')
667 704 else:
668 705 raise error.Abort(t)
669 706
670 707 def write(self, tr):
671 708 if self._dirty:
672 709 assert self.entries is not None
673 710 self.entries = self.entries | self.addls
674 711 self.addls = set()
675 712 tr.addbackup(b'fncache')
676 713 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
677 714 if self.entries:
678 715 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
679 716 fp.close()
680 717 self._dirty = False
681 718 if self.addls:
682 719 # if we have just new entries, let's append them to the fncache
683 720 tr.addbackup(b'fncache')
684 721 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
685 722 if self.addls:
686 723 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
687 724 fp.close()
688 725 self.entries = None
689 726 self.addls = set()
690 727
691 728 def addignore(self, fn):
692 729 self._ignores.add(fn)
693 730
694 731 def add(self, fn):
695 732 if fn in self._ignores:
696 733 return
697 734 if self.entries is None:
698 735 self._load()
699 736 if fn not in self.entries:
700 737 self.addls.add(fn)
701 738
702 739 def remove(self, fn):
703 740 if self.entries is None:
704 741 self._load()
705 742 if fn in self.addls:
706 743 self.addls.remove(fn)
707 744 return
708 745 try:
709 746 self.entries.remove(fn)
710 747 self._dirty = True
711 748 except KeyError:
712 749 pass
713 750
714 751 def __contains__(self, fn):
715 752 if fn in self.addls:
716 753 return True
717 754 if self.entries is None:
718 755 self._load()
719 756 return fn in self.entries
720 757
721 758 def __iter__(self):
722 759 if self.entries is None:
723 760 self._load()
724 761 return iter(self.entries | self.addls)
725 762
726 763
727 764 class _fncachevfs(vfsmod.proxyvfs):
728 765 def __init__(self, vfs, fnc, encode):
729 766 vfsmod.proxyvfs.__init__(self, vfs)
730 767 self.fncache = fnc
731 768 self.encode = encode
732 769
733 770 def __call__(self, path, mode=b'r', *args, **kw):
734 771 encoded = self.encode(path)
735 772 if (
736 773 mode not in (b'r', b'rb')
737 774 and (path.startswith(b'data/') or path.startswith(b'meta/'))
738 775 and revlog_type(path) is not None
739 776 ):
740 777 # do not trigger a fncache load when adding a file that already is
741 778 # known to exist.
742 779 notload = self.fncache.entries is None and self.vfs.exists(encoded)
743 780 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
744 781 # when appending to an existing file, if the file has size zero,
745 782 # it should be considered as missing. Such zero-size files are
746 783 # the result of truncation when a transaction is aborted.
747 784 notload = False
748 785 if not notload:
749 786 self.fncache.add(path)
750 787 return self.vfs(encoded, mode, *args, **kw)
751 788
752 789 def join(self, path):
753 790 if path:
754 791 return self.vfs.join(self.encode(path))
755 792 else:
756 793 return self.vfs.join(path)
757 794
758 795 def register_file(self, path):
759 796 """generic hook point to lets fncache steer its stew"""
760 797 if path.startswith(b'data/') or path.startswith(b'meta/'):
761 798 self.fncache.add(path)
762 799
763 800
764 801 class fncachestore(basicstore):
765 802 def __init__(self, path, vfstype, dotencode):
766 803 if dotencode:
767 804 encode = _pathencode
768 805 else:
769 806 encode = _plainhybridencode
770 807 self.encode = encode
771 808 vfs = vfstype(path + b'/store')
772 809 self.path = vfs.base
773 810 self.pathsep = self.path + b'/'
774 811 self.createmode = _calcmode(vfs)
775 812 vfs.createmode = self.createmode
776 813 self.rawvfs = vfs
777 814 fnc = fncache(vfs)
778 815 self.fncache = fnc
779 816 self.vfs = _fncachevfs(vfs, fnc, encode)
780 817 self.opener = self.vfs
781 818
782 819 def join(self, f):
783 820 return self.pathsep + self.encode(f)
784 821
785 822 def getsize(self, path):
786 823 return self.rawvfs.stat(path).st_size
787 824
788 def datafiles(self, matcher=None, undecodable=None):
825 def datafiles(
826 self, matcher=None, undecodable=None
827 ) -> Generator[StoreEntry, None, None]:
789 828 for f in sorted(self.fncache):
790 829 if not _matchtrackedpath(f, matcher):
791 830 continue
792 831 ef = self.encode(f)
793 832 t = revlog_type(f)
794 833 if t is None:
795 834 # Note: this should not be in the fncache then…
796 835 #
797 836 # However the fncache might contains such file added by
798 837 # previous version of Mercurial.
799 838 continue
800 839 t |= FILEFLAGS_FILELOG
801 840 try:
802 yield t, f, self.getsize(ef)
841 yield StoreEntry(
842 unencoded_path=f,
843 is_revlog=True,
844 revlog_type=FILEFLAGS_FILELOG,
845 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
846 is_volatile=bool(t & FILEFLAGS_VOLATILE),
847 file_size=self.getsize(ef),
848 )
803 849 except FileNotFoundError:
804 850 pass
805 851
806 852 def copylist(self):
807 853 d = (
808 854 b'bookmarks',
809 855 b'narrowspec',
810 856 b'data',
811 857 b'meta',
812 858 b'dh',
813 859 b'fncache',
814 860 b'phaseroots',
815 861 b'obsstore',
816 862 b'00manifest.d',
817 863 b'00manifest.i',
818 864 b'00changelog.d',
819 865 b'00changelog.i',
820 866 b'requires',
821 867 )
822 868 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
823 869
824 870 def write(self, tr):
825 871 self.fncache.write(tr)
826 872
827 873 def invalidatecaches(self):
828 874 self.fncache.entries = None
829 875 self.fncache.addls = set()
830 876
831 877 def markremoved(self, fn):
832 878 self.fncache.remove(fn)
833 879
834 880 def _exists(self, f):
835 881 ef = self.encode(f)
836 882 try:
837 883 self.getsize(ef)
838 884 return True
839 885 except FileNotFoundError:
840 886 return False
841 887
842 888 def __contains__(self, path):
843 889 '''Checks if the store contains path'''
844 890 path = b"/".join((b"data", path))
845 891 # check for files (exact match)
846 892 e = path + b'.i'
847 893 if e in self.fncache and self._exists(e):
848 894 return True
849 895 # now check for directories (prefix match)
850 896 if not path.endswith(b'/'):
851 897 path += b'/'
852 898 for e in self.fncache:
853 899 if e.startswith(path) and self._exists(e):
854 900 return True
855 901 return False
@@ -1,935 +1,937 b''
1 1 # streamclone.py - producing and consuming streaming repository data
2 2 #
3 3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import contextlib
10 10 import os
11 11 import struct
12 12
13 13 from .i18n import _
14 14 from .pycompat import open
15 15 from .interfaces import repository
16 16 from . import (
17 17 bookmarks,
18 18 cacheutil,
19 19 error,
20 20 narrowspec,
21 21 phases,
22 22 pycompat,
23 23 requirements as requirementsmod,
24 24 scmutil,
25 25 store,
26 26 transaction,
27 27 util,
28 28 )
29 29 from .revlogutils import (
30 30 nodemap,
31 31 )
32 32
33 33
34 34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 35 """determine the final set of requirement for a new stream clone
36 36
37 37 this method combine the "default" requirements that a new repository would
38 38 use with the constaint we get from the stream clone content. We keep local
39 39 configuration choice when possible.
40 40 """
41 41 requirements = set(default_requirements)
42 42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 43 requirements.update(streamed_requirements)
44 44 return requirements
45 45
46 46
47 47 def streamed_requirements(repo):
48 48 """the set of requirement the new clone will have to support
49 49
50 50 This is used for advertising the stream options and to generate the actual
51 51 stream content."""
52 52 requiredformats = (
53 53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 54 )
55 55 return requiredformats
56 56
57 57
58 58 def canperformstreamclone(pullop, bundle2=False):
59 59 """Whether it is possible to perform a streaming clone as part of pull.
60 60
61 61 ``bundle2`` will cause the function to consider stream clone through
62 62 bundle2 and only through bundle2.
63 63
64 64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 65 streaming clone is supported and False otherwise. ``requirements`` is
66 66 a set of repo requirements from the remote, or ``None`` if stream clone
67 67 isn't supported.
68 68 """
69 69 repo = pullop.repo
70 70 remote = pullop.remote
71 71
72 72 bundle2supported = False
73 73 if pullop.canusebundle2:
74 74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
75 75 bundle2supported = True
76 76 # else
77 77 # Server doesn't support bundle2 stream clone or doesn't support
78 78 # the versions we support. Fall back and possibly allow legacy.
79 79
80 80 # Ensures legacy code path uses available bundle2.
81 81 if bundle2supported and not bundle2:
82 82 return False, None
83 83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
84 84 elif bundle2 and not bundle2supported:
85 85 return False, None
86 86
87 87 # Streaming clone only works on empty repositories.
88 88 if len(repo):
89 89 return False, None
90 90
91 91 # Streaming clone only works if all data is being requested.
92 92 if pullop.heads:
93 93 return False, None
94 94
95 95 streamrequested = pullop.streamclonerequested
96 96
97 97 # If we don't have a preference, let the server decide for us. This
98 98 # likely only comes into play in LANs.
99 99 if streamrequested is None:
100 100 # The server can advertise whether to prefer streaming clone.
101 101 streamrequested = remote.capable(b'stream-preferred')
102 102
103 103 if not streamrequested:
104 104 return False, None
105 105
106 106 # In order for stream clone to work, the client has to support all the
107 107 # requirements advertised by the server.
108 108 #
109 109 # The server advertises its requirements via the "stream" and "streamreqs"
110 110 # capability. "stream" (a value-less capability) is advertised if and only
111 111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 112 # is advertised and contains a comma-delimited list of requirements.
113 113 requirements = set()
114 114 if remote.capable(b'stream'):
115 115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 116 else:
117 117 streamreqs = remote.capable(b'streamreqs')
118 118 # This is weird and shouldn't happen with modern servers.
119 119 if not streamreqs:
120 120 pullop.repo.ui.warn(
121 121 _(
122 122 b'warning: stream clone requested but server has them '
123 123 b'disabled\n'
124 124 )
125 125 )
126 126 return False, None
127 127
128 128 streamreqs = set(streamreqs.split(b','))
129 129 # Server requires something we don't support. Bail.
130 130 missingreqs = streamreqs - repo.supported
131 131 if missingreqs:
132 132 pullop.repo.ui.warn(
133 133 _(
134 134 b'warning: stream clone requested but client is missing '
135 135 b'requirements: %s\n'
136 136 )
137 137 % b', '.join(sorted(missingreqs))
138 138 )
139 139 pullop.repo.ui.warn(
140 140 _(
141 141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 142 b'for more information)\n'
143 143 )
144 144 )
145 145 return False, None
146 146 requirements = streamreqs
147 147
148 148 return True, requirements
149 149
150 150
151 151 def maybeperformlegacystreamclone(pullop):
152 152 """Possibly perform a legacy stream clone operation.
153 153
154 154 Legacy stream clones are performed as part of pull but before all other
155 155 operations.
156 156
157 157 A legacy stream clone will not be performed if a bundle2 stream clone is
158 158 supported.
159 159 """
160 160 from . import localrepo
161 161
162 162 supported, requirements = canperformstreamclone(pullop)
163 163
164 164 if not supported:
165 165 return
166 166
167 167 repo = pullop.repo
168 168 remote = pullop.remote
169 169
170 170 # Save remote branchmap. We will use it later to speed up branchcache
171 171 # creation.
172 172 rbranchmap = None
173 173 if remote.capable(b'branchmap'):
174 174 with remote.commandexecutor() as e:
175 175 rbranchmap = e.callcommand(b'branchmap', {}).result()
176 176
177 177 repo.ui.status(_(b'streaming all changes\n'))
178 178
179 179 with remote.commandexecutor() as e:
180 180 fp = e.callcommand(b'stream_out', {}).result()
181 181
182 182 # TODO strictly speaking, this code should all be inside the context
183 183 # manager because the context manager is supposed to ensure all wire state
184 184 # is flushed when exiting. But the legacy peers don't do this, so it
185 185 # doesn't matter.
186 186 l = fp.readline()
187 187 try:
188 188 resp = int(l)
189 189 except ValueError:
190 190 raise error.ResponseError(
191 191 _(b'unexpected response from remote server:'), l
192 192 )
193 193 if resp == 1:
194 194 raise error.Abort(_(b'operation forbidden by server'))
195 195 elif resp == 2:
196 196 raise error.Abort(_(b'locking the remote repository failed'))
197 197 elif resp != 0:
198 198 raise error.Abort(_(b'the server sent an unknown error code'))
199 199
200 200 l = fp.readline()
201 201 try:
202 202 filecount, bytecount = map(int, l.split(b' ', 1))
203 203 except (ValueError, TypeError):
204 204 raise error.ResponseError(
205 205 _(b'unexpected response from remote server:'), l
206 206 )
207 207
208 208 with repo.lock():
209 209 consumev1(repo, fp, filecount, bytecount)
210 210 repo.requirements = new_stream_clone_requirements(
211 211 repo.requirements,
212 212 requirements,
213 213 )
214 214 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 215 repo.ui, repo.requirements, repo.features
216 216 )
217 217 scmutil.writereporequirements(repo)
218 218 nodemap.post_stream_cleanup(repo)
219 219
220 220 if rbranchmap:
221 221 repo._branchcaches.replace(repo, rbranchmap)
222 222
223 223 repo.invalidate()
224 224
225 225
226 226 def allowservergeneration(repo):
227 227 """Whether streaming clones are allowed from the server."""
228 228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 229 return False
230 230
231 231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 232 return False
233 233
234 234 # The way stream clone works makes it impossible to hide secret changesets.
235 235 # So don't allow this by default.
236 236 secret = phases.hassecret(repo)
237 237 if secret:
238 238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239 239
240 240 return True
241 241
242 242
243 243 # This is it's own function so extensions can override it.
244 244 def _walkstreamfiles(repo, matcher=None):
245 245 return repo.store.walk(matcher)
246 246
247 247
248 248 def generatev1(repo):
249 249 """Emit content for version 1 of a streaming clone.
250 250
251 251 This returns a 3-tuple of (file count, byte size, data iterator).
252 252
253 253 The data iterator consists of N entries for each file being transferred.
254 254 Each file entry starts as a line with the file name and integer size
255 255 delimited by a null byte.
256 256
257 257 The raw file data follows. Following the raw file data is the next file
258 258 entry, or EOF.
259 259
260 260 When used on the wire protocol, an additional line indicating protocol
261 261 success will be prepended to the stream. This function is not responsible
262 262 for adding it.
263 263
264 264 This function will obtain a repository lock to ensure a consistent view of
265 265 the store is captured. It therefore may raise LockError.
266 266 """
267 267 entries = []
268 268 total_bytes = 0
269 269 # Get consistent snapshot of repo, lock during scan.
270 270 with repo.lock():
271 271 repo.ui.debug(b'scanning\n')
272 for file_type, name, size in _walkstreamfiles(repo):
273 if size:
274 entries.append((name, size))
275 total_bytes += size
272 for entry in _walkstreamfiles(repo):
273 if entry.file_size:
274 entries.append((entry.unencoded_path, entry.file_size))
275 total_bytes += entry.file_size
276 276 _test_sync_point_walk_1(repo)
277 277 _test_sync_point_walk_2(repo)
278 278
279 279 repo.ui.debug(
280 280 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
281 281 )
282 282
283 283 svfs = repo.svfs
284 284 debugflag = repo.ui.debugflag
285 285
286 286 def emitrevlogdata():
287 287 for name, size in entries:
288 288 if debugflag:
289 289 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
290 290 # partially encode name over the wire for backwards compat
291 291 yield b'%s\0%d\n' % (store.encodedir(name), size)
292 292 # auditing at this stage is both pointless (paths are already
293 293 # trusted by the local repo) and expensive
294 294 with svfs(name, b'rb', auditpath=False) as fp:
295 295 if size <= 65536:
296 296 yield fp.read(size)
297 297 else:
298 298 for chunk in util.filechunkiter(fp, limit=size):
299 299 yield chunk
300 300
301 301 return len(entries), total_bytes, emitrevlogdata()
302 302
303 303
304 304 def generatev1wireproto(repo):
305 305 """Emit content for version 1 of streaming clone suitable for the wire.
306 306
307 307 This is the data output from ``generatev1()`` with 2 header lines. The
308 308 first line indicates overall success. The 2nd contains the file count and
309 309 byte size of payload.
310 310
311 311 The success line contains "0" for success, "1" for stream generation not
312 312 allowed, and "2" for error locking the repository (possibly indicating
313 313 a permissions error for the server process).
314 314 """
315 315 if not allowservergeneration(repo):
316 316 yield b'1\n'
317 317 return
318 318
319 319 try:
320 320 filecount, bytecount, it = generatev1(repo)
321 321 except error.LockError:
322 322 yield b'2\n'
323 323 return
324 324
325 325 # Indicates successful response.
326 326 yield b'0\n'
327 327 yield b'%d %d\n' % (filecount, bytecount)
328 328 for chunk in it:
329 329 yield chunk
330 330
331 331
332 332 def generatebundlev1(repo, compression=b'UN'):
333 333 """Emit content for version 1 of a stream clone bundle.
334 334
335 335 The first 4 bytes of the output ("HGS1") denote this as stream clone
336 336 bundle version 1.
337 337
338 338 The next 2 bytes indicate the compression type. Only "UN" is currently
339 339 supported.
340 340
341 341 The next 16 bytes are two 64-bit big endian unsigned integers indicating
342 342 file count and byte count, respectively.
343 343
344 344 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
345 345 of the requirements string, including a trailing \0. The following N bytes
346 346 are the requirements string, which is ASCII containing a comma-delimited
347 347 list of repo requirements that are needed to support the data.
348 348
349 349 The remaining content is the output of ``generatev1()`` (which may be
350 350 compressed in the future).
351 351
352 352 Returns a tuple of (requirements, data generator).
353 353 """
354 354 if compression != b'UN':
355 355 raise ValueError(b'we do not support the compression argument yet')
356 356
357 357 requirements = streamed_requirements(repo)
358 358 requires = b','.join(sorted(requirements))
359 359
360 360 def gen():
361 361 yield b'HGS1'
362 362 yield compression
363 363
364 364 filecount, bytecount, it = generatev1(repo)
365 365 repo.ui.status(
366 366 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
367 367 )
368 368
369 369 yield struct.pack(b'>QQ', filecount, bytecount)
370 370 yield struct.pack(b'>H', len(requires) + 1)
371 371 yield requires + b'\0'
372 372
373 373 # This is where we'll add compression in the future.
374 374 assert compression == b'UN'
375 375
376 376 progress = repo.ui.makeprogress(
377 377 _(b'bundle'), total=bytecount, unit=_(b'bytes')
378 378 )
379 379 progress.update(0)
380 380
381 381 for chunk in it:
382 382 progress.increment(step=len(chunk))
383 383 yield chunk
384 384
385 385 progress.complete()
386 386
387 387 return requirements, gen()
388 388
389 389
390 390 def consumev1(repo, fp, filecount, bytecount):
391 391 """Apply the contents from version 1 of a streaming clone file handle.
392 392
393 393 This takes the output from "stream_out" and applies it to the specified
394 394 repository.
395 395
396 396 Like "stream_out," the status line added by the wire protocol is not
397 397 handled by this function.
398 398 """
399 399 with repo.lock():
400 400 repo.ui.status(
401 401 _(b'%d files to transfer, %s of data\n')
402 402 % (filecount, util.bytecount(bytecount))
403 403 )
404 404 progress = repo.ui.makeprogress(
405 405 _(b'clone'), total=bytecount, unit=_(b'bytes')
406 406 )
407 407 progress.update(0)
408 408 start = util.timer()
409 409
410 410 # TODO: get rid of (potential) inconsistency
411 411 #
412 412 # If transaction is started and any @filecache property is
413 413 # changed at this point, it causes inconsistency between
414 414 # in-memory cached property and streamclone-ed file on the
415 415 # disk. Nested transaction prevents transaction scope "clone"
416 416 # below from writing in-memory changes out at the end of it,
417 417 # even though in-memory changes are discarded at the end of it
418 418 # regardless of transaction nesting.
419 419 #
420 420 # But transaction nesting can't be simply prohibited, because
421 421 # nesting occurs also in ordinary case (e.g. enabling
422 422 # clonebundles).
423 423
424 424 with repo.transaction(b'clone'):
425 425 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
426 426 for i in range(filecount):
427 427 # XXX doesn't support '\n' or '\r' in filenames
428 428 l = fp.readline()
429 429 try:
430 430 name, size = l.split(b'\0', 1)
431 431 size = int(size)
432 432 except (ValueError, TypeError):
433 433 raise error.ResponseError(
434 434 _(b'unexpected response from remote server:'), l
435 435 )
436 436 if repo.ui.debugflag:
437 437 repo.ui.debug(
438 438 b'adding %s (%s)\n' % (name, util.bytecount(size))
439 439 )
440 440 # for backwards compat, name was partially encoded
441 441 path = store.decodedir(name)
442 442 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
443 443 for chunk in util.filechunkiter(fp, limit=size):
444 444 progress.increment(step=len(chunk))
445 445 ofp.write(chunk)
446 446
447 447 # force @filecache properties to be reloaded from
448 448 # streamclone-ed file at next access
449 449 repo.invalidate(clearfilecache=True)
450 450
451 451 elapsed = util.timer() - start
452 452 if elapsed <= 0:
453 453 elapsed = 0.001
454 454 progress.complete()
455 455 repo.ui.status(
456 456 _(b'transferred %s in %.1f seconds (%s/sec)\n')
457 457 % (
458 458 util.bytecount(bytecount),
459 459 elapsed,
460 460 util.bytecount(bytecount / elapsed),
461 461 )
462 462 )
463 463
464 464
465 465 def readbundle1header(fp):
466 466 compression = fp.read(2)
467 467 if compression != b'UN':
468 468 raise error.Abort(
469 469 _(
470 470 b'only uncompressed stream clone bundles are '
471 471 b'supported; got %s'
472 472 )
473 473 % compression
474 474 )
475 475
476 476 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
477 477 requireslen = struct.unpack(b'>H', fp.read(2))[0]
478 478 requires = fp.read(requireslen)
479 479
480 480 if not requires.endswith(b'\0'):
481 481 raise error.Abort(
482 482 _(
483 483 b'malformed stream clone bundle: '
484 484 b'requirements not properly encoded'
485 485 )
486 486 )
487 487
488 488 requirements = set(requires.rstrip(b'\0').split(b','))
489 489
490 490 return filecount, bytecount, requirements
491 491
492 492
493 493 def applybundlev1(repo, fp):
494 494 """Apply the content from a stream clone bundle version 1.
495 495
496 496 We assume the 4 byte header has been read and validated and the file handle
497 497 is at the 2 byte compression identifier.
498 498 """
499 499 if len(repo):
500 500 raise error.Abort(
501 501 _(b'cannot apply stream clone bundle on non-empty repo')
502 502 )
503 503
504 504 filecount, bytecount, requirements = readbundle1header(fp)
505 505 missingreqs = requirements - repo.supported
506 506 if missingreqs:
507 507 raise error.Abort(
508 508 _(b'unable to apply stream clone: unsupported format: %s')
509 509 % b', '.join(sorted(missingreqs))
510 510 )
511 511
512 512 consumev1(repo, fp, filecount, bytecount)
513 513 nodemap.post_stream_cleanup(repo)
514 514
515 515
516 516 class streamcloneapplier:
517 517 """Class to manage applying streaming clone bundles.
518 518
519 519 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
520 520 readers to perform bundle type-specific functionality.
521 521 """
522 522
523 523 def __init__(self, fh):
524 524 self._fh = fh
525 525
526 526 def apply(self, repo):
527 527 return applybundlev1(repo, self._fh)
528 528
529 529
530 530 # type of file to stream
531 531 _fileappend = 0 # append only file
532 532 _filefull = 1 # full snapshot file
533 533
534 534 # Source of the file
535 535 _srcstore = b's' # store (svfs)
536 536 _srccache = b'c' # cache (cache)
537 537
538 538 # This is it's own function so extensions can override it.
539 539 def _walkstreamfullstorefiles(repo):
540 540 """list snapshot file from the store"""
541 541 fnames = []
542 542 if not repo.publishing():
543 543 fnames.append(b'phaseroots')
544 544 return fnames
545 545
546 546
547 547 def _filterfull(entry, copy, vfsmap):
548 548 """actually copy the snapshot files"""
549 549 src, name, ftype, data = entry
550 550 if ftype != _filefull:
551 551 return entry
552 552 return (src, name, ftype, copy(vfsmap[src].join(name)))
553 553
554 554
555 555 @contextlib.contextmanager
556 556 def maketempcopies():
557 557 """return a function to temporary copy file"""
558 558
559 559 files = []
560 560 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
561 561 try:
562 562
563 563 def copy(src):
564 564 fd, dst = pycompat.mkstemp(
565 565 prefix=os.path.basename(src), dir=dst_dir
566 566 )
567 567 os.close(fd)
568 568 files.append(dst)
569 569 util.copyfiles(src, dst, hardlink=True)
570 570 return dst
571 571
572 572 yield copy
573 573 finally:
574 574 for tmp in files:
575 575 util.tryunlink(tmp)
576 576 util.tryrmdir(dst_dir)
577 577
578 578
579 579 def _makemap(repo):
580 580 """make a (src -> vfs) map for the repo"""
581 581 vfsmap = {
582 582 _srcstore: repo.svfs,
583 583 _srccache: repo.cachevfs,
584 584 }
585 585 # we keep repo.vfs out of the on purpose, ther are too many danger there
586 586 # (eg: .hg/hgrc)
587 587 assert repo.vfs not in vfsmap.values()
588 588
589 589 return vfsmap
590 590
591 591
592 592 def _emit2(repo, entries, totalfilesize):
593 593 """actually emit the stream bundle"""
594 594 vfsmap = _makemap(repo)
595 595 # we keep repo.vfs out of the on purpose, ther are too many danger there
596 596 # (eg: .hg/hgrc),
597 597 #
598 598 # this assert is duplicated (from _makemap) as author might think this is
599 599 # fine, while this is really not fine.
600 600 if repo.vfs in vfsmap.values():
601 601 raise error.ProgrammingError(
602 602 b'repo.vfs must not be added to vfsmap for security reasons'
603 603 )
604 604
605 605 progress = repo.ui.makeprogress(
606 606 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
607 607 )
608 608 progress.update(0)
609 609 with maketempcopies() as copy, progress:
610 610 # copy is delayed until we are in the try
611 611 entries = [_filterfull(e, copy, vfsmap) for e in entries]
612 612 yield None # this release the lock on the repository
613 613 totalbytecount = 0
614 614
615 615 for src, name, ftype, data in entries:
616 616 vfs = vfsmap[src]
617 617 yield src
618 618 yield util.uvarintencode(len(name))
619 619 if ftype == _fileappend:
620 620 fp = vfs(name)
621 621 size = data
622 622 elif ftype == _filefull:
623 623 fp = open(data, b'rb')
624 624 size = util.fstat(fp).st_size
625 625 bytecount = 0
626 626 try:
627 627 yield util.uvarintencode(size)
628 628 yield name
629 629 if size <= 65536:
630 630 chunks = (fp.read(size),)
631 631 else:
632 632 chunks = util.filechunkiter(fp, limit=size)
633 633 for chunk in chunks:
634 634 bytecount += len(chunk)
635 635 totalbytecount += len(chunk)
636 636 progress.update(totalbytecount)
637 637 yield chunk
638 638 if bytecount != size:
639 639 # Would most likely be caused by a race due to `hg strip` or
640 640 # a revlog split
641 641 raise error.Abort(
642 642 _(
643 643 b'clone could only read %d bytes from %s, but '
644 644 b'expected %d bytes'
645 645 )
646 646 % (bytecount, name, size)
647 647 )
648 648 finally:
649 649 fp.close()
650 650
651 651
652 652 def _test_sync_point_walk_1(repo):
653 653 """a function for synchronisation during tests"""
654 654
655 655
656 656 def _test_sync_point_walk_2(repo):
657 657 """a function for synchronisation during tests"""
658 658
659 659
660 660 def _v2_walk(repo, includes, excludes, includeobsmarkers):
661 661 """emit a seris of files information useful to clone a repo
662 662
663 663 return (entries, totalfilesize)
664 664
665 665 entries is a list of tuple (vfs-key, file-path, file-type, size)
666 666
667 667 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
668 668 - `name`: file path of the file to copy (to be feed to the vfss)
669 669 - `file-type`: do this file need to be copied with the source lock ?
670 670 - `size`: the size of the file (or None)
671 671 """
672 672 assert repo._currentlock(repo._lockref) is not None
673 673 entries = []
674 674 totalfilesize = 0
675 675
676 676 matcher = None
677 677 if includes or excludes:
678 678 matcher = narrowspec.match(repo.root, includes, excludes)
679 679
680 for rl_type, name, size in _walkstreamfiles(repo, matcher):
681 if size:
680 for entry in _walkstreamfiles(repo, matcher):
681 if entry.file_size:
682 682 ft = _fileappend
683 if rl_type & store.FILEFLAGS_VOLATILE:
683 if entry.is_volatile:
684 684 ft = _filefull
685 entries.append((_srcstore, name, ft, size))
686 totalfilesize += size
685 entries.append(
686 (_srcstore, entry.unencoded_path, ft, entry.file_size)
687 )
688 totalfilesize += entry.file_size
687 689 for name in _walkstreamfullstorefiles(repo):
688 690 if repo.svfs.exists(name):
689 691 totalfilesize += repo.svfs.lstat(name).st_size
690 692 entries.append((_srcstore, name, _filefull, None))
691 693 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
692 694 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
693 695 entries.append((_srcstore, b'obsstore', _filefull, None))
694 696 for name in cacheutil.cachetocopy(repo):
695 697 if repo.cachevfs.exists(name):
696 698 totalfilesize += repo.cachevfs.lstat(name).st_size
697 699 entries.append((_srccache, name, _filefull, None))
698 700 return entries, totalfilesize
699 701
700 702
701 703 def generatev2(repo, includes, excludes, includeobsmarkers):
702 704 """Emit content for version 2 of a streaming clone.
703 705
704 706 the data stream consists the following entries:
705 707 1) A char representing the file destination (eg: store or cache)
706 708 2) A varint containing the length of the filename
707 709 3) A varint containing the length of file data
708 710 4) N bytes containing the filename (the internal, store-agnostic form)
709 711 5) N bytes containing the file data
710 712
711 713 Returns a 3-tuple of (file count, file size, data iterator).
712 714 """
713 715
714 716 with repo.lock():
715 717
716 718 repo.ui.debug(b'scanning\n')
717 719
718 720 entries, totalfilesize = _v2_walk(
719 721 repo,
720 722 includes=includes,
721 723 excludes=excludes,
722 724 includeobsmarkers=includeobsmarkers,
723 725 )
724 726
725 727 chunks = _emit2(repo, entries, totalfilesize)
726 728 first = next(chunks)
727 729 assert first is None
728 730 _test_sync_point_walk_1(repo)
729 731 _test_sync_point_walk_2(repo)
730 732
731 733 return len(entries), totalfilesize, chunks
732 734
733 735
734 736 @contextlib.contextmanager
735 737 def nested(*ctxs):
736 738 this = ctxs[0]
737 739 rest = ctxs[1:]
738 740 with this:
739 741 if rest:
740 742 with nested(*rest):
741 743 yield
742 744 else:
743 745 yield
744 746
745 747
746 748 def consumev2(repo, fp, filecount, filesize):
747 749 """Apply the contents from a version 2 streaming clone.
748 750
749 751 Data is read from an object that only needs to provide a ``read(size)``
750 752 method.
751 753 """
752 754 with repo.lock():
753 755 repo.ui.status(
754 756 _(b'%d files to transfer, %s of data\n')
755 757 % (filecount, util.bytecount(filesize))
756 758 )
757 759
758 760 start = util.timer()
759 761 progress = repo.ui.makeprogress(
760 762 _(b'clone'), total=filesize, unit=_(b'bytes')
761 763 )
762 764 progress.update(0)
763 765
764 766 vfsmap = _makemap(repo)
765 767 # we keep repo.vfs out of the on purpose, ther are too many danger
766 768 # there (eg: .hg/hgrc),
767 769 #
768 770 # this assert is duplicated (from _makemap) as author might think this
769 771 # is fine, while this is really not fine.
770 772 if repo.vfs in vfsmap.values():
771 773 raise error.ProgrammingError(
772 774 b'repo.vfs must not be added to vfsmap for security reasons'
773 775 )
774 776
775 777 with repo.transaction(b'clone'):
776 778 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
777 779 with nested(*ctxs):
778 780 for i in range(filecount):
779 781 src = util.readexactly(fp, 1)
780 782 vfs = vfsmap[src]
781 783 namelen = util.uvarintdecodestream(fp)
782 784 datalen = util.uvarintdecodestream(fp)
783 785
784 786 name = util.readexactly(fp, namelen)
785 787
786 788 if repo.ui.debugflag:
787 789 repo.ui.debug(
788 790 b'adding [%s] %s (%s)\n'
789 791 % (src, name, util.bytecount(datalen))
790 792 )
791 793
792 794 with vfs(name, b'w') as ofp:
793 795 for chunk in util.filechunkiter(fp, limit=datalen):
794 796 progress.increment(step=len(chunk))
795 797 ofp.write(chunk)
796 798
797 799 # force @filecache properties to be reloaded from
798 800 # streamclone-ed file at next access
799 801 repo.invalidate(clearfilecache=True)
800 802
801 803 elapsed = util.timer() - start
802 804 if elapsed <= 0:
803 805 elapsed = 0.001
804 806 repo.ui.status(
805 807 _(b'transferred %s in %.1f seconds (%s/sec)\n')
806 808 % (
807 809 util.bytecount(progress.pos),
808 810 elapsed,
809 811 util.bytecount(progress.pos / elapsed),
810 812 )
811 813 )
812 814 progress.complete()
813 815
814 816
815 817 def applybundlev2(repo, fp, filecount, filesize, requirements):
816 818 from . import localrepo
817 819
818 820 missingreqs = [r for r in requirements if r not in repo.supported]
819 821 if missingreqs:
820 822 raise error.Abort(
821 823 _(b'unable to apply stream clone: unsupported format: %s')
822 824 % b', '.join(sorted(missingreqs))
823 825 )
824 826
825 827 consumev2(repo, fp, filecount, filesize)
826 828
827 829 repo.requirements = new_stream_clone_requirements(
828 830 repo.requirements,
829 831 requirements,
830 832 )
831 833 repo.svfs.options = localrepo.resolvestorevfsoptions(
832 834 repo.ui, repo.requirements, repo.features
833 835 )
834 836 scmutil.writereporequirements(repo)
835 837 nodemap.post_stream_cleanup(repo)
836 838
837 839
838 840 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
839 841 hardlink = [True]
840 842
841 843 def copy_used():
842 844 hardlink[0] = False
843 845 progress.topic = _(b'copying')
844 846
845 847 for k, path, size in entries:
846 848 src_vfs = src_vfs_map[k]
847 849 dst_vfs = dst_vfs_map[k]
848 850 src_path = src_vfs.join(path)
849 851 dst_path = dst_vfs.join(path)
850 852 # We cannot use dirname and makedirs of dst_vfs here because the store
851 853 # encoding confuses them. See issue 6581 for details.
852 854 dirname = os.path.dirname(dst_path)
853 855 if not os.path.exists(dirname):
854 856 util.makedirs(dirname)
855 857 dst_vfs.register_file(path)
856 858 # XXX we could use the #nb_bytes argument.
857 859 util.copyfile(
858 860 src_path,
859 861 dst_path,
860 862 hardlink=hardlink[0],
861 863 no_hardlink_cb=copy_used,
862 864 check_fs_hardlink=False,
863 865 )
864 866 progress.increment()
865 867 return hardlink[0]
866 868
867 869
868 870 def local_copy(src_repo, dest_repo):
869 871 """copy all content from one local repository to another
870 872
871 873 This is useful for local clone"""
872 874 src_store_requirements = {
873 875 r
874 876 for r in src_repo.requirements
875 877 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
876 878 }
877 879 dest_store_requirements = {
878 880 r
879 881 for r in dest_repo.requirements
880 882 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
881 883 }
882 884 assert src_store_requirements == dest_store_requirements
883 885
884 886 with dest_repo.lock():
885 887 with src_repo.lock():
886 888
887 889 # bookmark is not integrated to the streaming as it might use the
888 890 # `repo.vfs` and they are too many sentitive data accessible
889 891 # through `repo.vfs` to expose it to streaming clone.
890 892 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
891 893 srcbookmarks = src_book_vfs.join(b'bookmarks')
892 894 bm_count = 0
893 895 if os.path.exists(srcbookmarks):
894 896 bm_count = 1
895 897
896 898 entries, totalfilesize = _v2_walk(
897 899 src_repo,
898 900 includes=None,
899 901 excludes=None,
900 902 includeobsmarkers=True,
901 903 )
902 904 src_vfs_map = _makemap(src_repo)
903 905 dest_vfs_map = _makemap(dest_repo)
904 906 progress = src_repo.ui.makeprogress(
905 907 topic=_(b'linking'),
906 908 total=len(entries) + bm_count,
907 909 unit=_(b'files'),
908 910 )
909 911 # copy files
910 912 #
911 913 # We could copy the full file while the source repository is locked
912 914 # and the other one without the lock. However, in the linking case,
913 915 # this would also requires checks that nobody is appending any data
914 916 # to the files while we do the clone, so this is not done yet. We
915 917 # could do this blindly when copying files.
916 918 files = ((k, path, size) for k, path, ftype, size in entries)
917 919 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
918 920
919 921 # copy bookmarks over
920 922 if bm_count:
921 923 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
922 924 dstbookmarks = dst_book_vfs.join(b'bookmarks')
923 925 util.copyfile(srcbookmarks, dstbookmarks)
924 926 progress.complete()
925 927 if hardlink:
926 928 msg = b'linked %d files\n'
927 929 else:
928 930 msg = b'copied %d files\n'
929 931 src_repo.ui.debug(msg % (len(entries) + bm_count))
930 932
931 933 with dest_repo.transaction(b"localclone") as tr:
932 934 dest_repo.store.write(tr)
933 935
934 936 # clean up transaction file as they do not make sense
935 937 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,689 +1,690 b''
1 1 # upgrade.py - functions for in place upgrade of Mercurial repository
2 2 #
3 3 # Copyright (c) 2016-present, Gregory Szorc
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import stat
10 10
11 11 from ..i18n import _
12 12 from ..pycompat import getattr
13 13 from .. import (
14 14 changelog,
15 15 error,
16 16 filelog,
17 17 manifest,
18 18 metadata,
19 19 pycompat,
20 20 requirements,
21 21 scmutil,
22 22 store,
23 23 util,
24 24 vfs as vfsmod,
25 25 )
26 26 from ..revlogutils import (
27 27 constants as revlogconst,
28 28 flagutil,
29 29 nodemap,
30 30 sidedata as sidedatamod,
31 31 )
32 32 from . import actions as upgrade_actions
33 33
34 34
35 35 def get_sidedata_helpers(srcrepo, dstrepo):
36 36 use_w = srcrepo.ui.configbool(b'experimental', b'worker.repository-upgrade')
37 37 sequential = pycompat.iswindows or not use_w
38 38 if not sequential:
39 39 srcrepo.register_sidedata_computer(
40 40 revlogconst.KIND_CHANGELOG,
41 41 sidedatamod.SD_FILES,
42 42 (sidedatamod.SD_FILES,),
43 43 metadata._get_worker_sidedata_adder(srcrepo, dstrepo),
44 44 flagutil.REVIDX_HASCOPIESINFO,
45 45 replace=True,
46 46 )
47 47 return sidedatamod.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
48 48
49 49
50 50 def _revlogfrompath(repo, rl_type, path):
51 51 """Obtain a revlog from a repo path.
52 52
53 53 An instance of the appropriate class is returned.
54 54 """
55 55 if rl_type & store.FILEFLAGS_CHANGELOG:
56 56 return changelog.changelog(repo.svfs)
57 57 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
58 58 mandir = b''
59 59 if b'/' in path:
60 60 mandir = path.rsplit(b'/', 1)[0]
61 61 return manifest.manifestrevlog(
62 62 repo.nodeconstants, repo.svfs, tree=mandir
63 63 )
64 64 else:
65 65 # drop the extension and the `data/` prefix
66 66 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
67 67 if len(path_part) < 2:
68 68 msg = _(b'cannot recognize revlog from filename: %s')
69 69 msg %= path
70 70 raise error.Abort(msg)
71 71 path = path_part[1]
72 72 return filelog.filelog(repo.svfs, path)
73 73
74 74
75 75 def _copyrevlog(tr, destrepo, oldrl, rl_type, unencodedname):
76 76 """copy all relevant files for `oldrl` into `destrepo` store
77 77
78 78 Files are copied "as is" without any transformation. The copy is performed
79 79 without extra checks. Callers are responsible for making sure the copied
80 80 content is compatible with format of the destination repository.
81 81 """
82 82 oldrl = getattr(oldrl, '_revlog', oldrl)
83 83 newrl = _revlogfrompath(destrepo, rl_type, unencodedname)
84 84 newrl = getattr(newrl, '_revlog', newrl)
85 85
86 86 oldvfs = oldrl.opener
87 87 newvfs = newrl.opener
88 88 oldindex = oldvfs.join(oldrl._indexfile)
89 89 newindex = newvfs.join(newrl._indexfile)
90 90 olddata = oldvfs.join(oldrl._datafile)
91 91 newdata = newvfs.join(newrl._datafile)
92 92
93 93 with newvfs(newrl._indexfile, b'w'):
94 94 pass # create all the directories
95 95
96 96 util.copyfile(oldindex, newindex)
97 97 copydata = oldrl.opener.exists(oldrl._datafile)
98 98 if copydata:
99 99 util.copyfile(olddata, newdata)
100 100
101 101 if rl_type & store.FILEFLAGS_FILELOG:
102 102 destrepo.svfs.fncache.add(unencodedname)
103 103 if copydata:
104 104 destrepo.svfs.fncache.add(unencodedname[:-2] + b'.d')
105 105
106 106
107 107 UPGRADE_CHANGELOG = b"changelog"
108 108 UPGRADE_MANIFEST = b"manifest"
109 109 UPGRADE_FILELOGS = b"all-filelogs"
110 110
111 111 UPGRADE_ALL_REVLOGS = frozenset(
112 112 [UPGRADE_CHANGELOG, UPGRADE_MANIFEST, UPGRADE_FILELOGS]
113 113 )
114 114
115 115
116 116 def matchrevlog(revlogfilter, rl_type):
117 117 """check if a revlog is selected for cloning.
118 118
119 119 In other words, are there any updates which need to be done on revlog
120 120 or it can be blindly copied.
121 121
122 122 The store entry is checked against the passed filter"""
123 123 if rl_type & store.FILEFLAGS_CHANGELOG:
124 124 return UPGRADE_CHANGELOG in revlogfilter
125 125 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
126 126 return UPGRADE_MANIFEST in revlogfilter
127 127 assert rl_type & store.FILEFLAGS_FILELOG
128 128 return UPGRADE_FILELOGS in revlogfilter
129 129
130 130
131 131 def _perform_clone(
132 132 ui,
133 133 dstrepo,
134 134 tr,
135 135 old_revlog,
136 136 rl_type,
137 137 unencoded,
138 138 upgrade_op,
139 139 sidedata_helpers,
140 140 oncopiedrevision,
141 141 ):
142 142 """returns the new revlog object created"""
143 143 newrl = None
144 144 if matchrevlog(upgrade_op.revlogs_to_process, rl_type):
145 145 ui.note(
146 146 _(b'cloning %d revisions from %s\n') % (len(old_revlog), unencoded)
147 147 )
148 148 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
149 149 old_revlog.clone(
150 150 tr,
151 151 newrl,
152 152 addrevisioncb=oncopiedrevision,
153 153 deltareuse=upgrade_op.delta_reuse_mode,
154 154 forcedeltabothparents=upgrade_op.force_re_delta_both_parents,
155 155 sidedata_helpers=sidedata_helpers,
156 156 )
157 157 else:
158 158 msg = _(b'blindly copying %s containing %i revisions\n')
159 159 ui.note(msg % (unencoded, len(old_revlog)))
160 160 _copyrevlog(tr, dstrepo, old_revlog, rl_type, unencoded)
161 161
162 162 newrl = _revlogfrompath(dstrepo, rl_type, unencoded)
163 163 return newrl
164 164
165 165
166 166 def _clonerevlogs(
167 167 ui,
168 168 srcrepo,
169 169 dstrepo,
170 170 tr,
171 171 upgrade_op,
172 172 ):
173 173 """Copy revlogs between 2 repos."""
174 174 revcount = 0
175 175 srcsize = 0
176 176 srcrawsize = 0
177 177 dstsize = 0
178 178 fcount = 0
179 179 frevcount = 0
180 180 fsrcsize = 0
181 181 frawsize = 0
182 182 fdstsize = 0
183 183 mcount = 0
184 184 mrevcount = 0
185 185 msrcsize = 0
186 186 mrawsize = 0
187 187 mdstsize = 0
188 188 crevcount = 0
189 189 csrcsize = 0
190 190 crawsize = 0
191 191 cdstsize = 0
192 192
193 193 alldatafiles = list(srcrepo.store.walk())
194 194 # mapping of data files which needs to be cloned
195 195 # key is unencoded filename
196 196 # value is revlog_object_from_srcrepo
197 197 manifests = {}
198 198 changelogs = {}
199 199 filelogs = {}
200 200
201 201 # Perform a pass to collect metadata. This validates we can open all
202 202 # source files and allows a unified progress bar to be displayed.
203 for rl_type, unencoded, size in alldatafiles:
204 if not rl_type & store.FILEFLAGS_REVLOG_MAIN:
203 for entry in alldatafiles:
204 if not (entry.is_revlog and entry.is_revlog_main):
205 205 continue
206 unencoded = entry.unencoded_path
206 207
207 208 # the store.walk function will wrongly pickup transaction backup and
208 209 # get confused. As a quick fix for 5.9 release, we ignore those.
209 210 # (this is not a module constants because it seems better to keep the
210 211 # hack together)
211 212 skip_undo = (
212 213 b'undo.backup.00changelog.i',
213 214 b'undo.backup.00manifest.i',
214 215 )
215 216 if unencoded in skip_undo:
216 217 continue
217 218
218 rl = _revlogfrompath(srcrepo, rl_type, unencoded)
219 rl = _revlogfrompath(srcrepo, entry.revlog_type, unencoded)
219 220
220 221 info = rl.storageinfo(
221 222 exclusivefiles=True,
222 223 revisionscount=True,
223 224 trackedsize=True,
224 225 storedsize=True,
225 226 )
226 227
227 228 revcount += info[b'revisionscount'] or 0
228 229 datasize = info[b'storedsize'] or 0
229 230 rawsize = info[b'trackedsize'] or 0
230 231
231 232 srcsize += datasize
232 233 srcrawsize += rawsize
233 234
234 235 # This is for the separate progress bars.
235 if rl_type & store.FILEFLAGS_CHANGELOG:
236 changelogs[unencoded] = rl_type
236 if entry.revlog_type & store.FILEFLAGS_CHANGELOG:
237 changelogs[unencoded] = entry.revlog_type
237 238 crevcount += len(rl)
238 239 csrcsize += datasize
239 240 crawsize += rawsize
240 elif rl_type & store.FILEFLAGS_MANIFESTLOG:
241 manifests[unencoded] = rl_type
241 elif entry.revlog_type & store.FILEFLAGS_MANIFESTLOG:
242 manifests[unencoded] = entry.revlog_type
242 243 mcount += 1
243 244 mrevcount += len(rl)
244 245 msrcsize += datasize
245 246 mrawsize += rawsize
246 elif rl_type & store.FILEFLAGS_FILELOG:
247 filelogs[unencoded] = rl_type
247 elif entry.revlog_type & store.FILEFLAGS_FILELOG:
248 filelogs[unencoded] = entry.revlog_type
248 249 fcount += 1
249 250 frevcount += len(rl)
250 251 fsrcsize += datasize
251 252 frawsize += rawsize
252 253 else:
253 254 error.ProgrammingError(b'unknown revlog type')
254 255
255 256 if not revcount:
256 257 return
257 258
258 259 ui.status(
259 260 _(
260 261 b'migrating %d total revisions (%d in filelogs, %d in manifests, '
261 262 b'%d in changelog)\n'
262 263 )
263 264 % (revcount, frevcount, mrevcount, crevcount)
264 265 )
265 266 ui.status(
266 267 _(b'migrating %s in store; %s tracked data\n')
267 268 % ((util.bytecount(srcsize), util.bytecount(srcrawsize)))
268 269 )
269 270
270 271 # Used to keep track of progress.
271 272 progress = None
272 273
273 274 def oncopiedrevision(rl, rev, node):
274 275 progress.increment()
275 276
276 277 sidedata_helpers = get_sidedata_helpers(srcrepo, dstrepo)
277 278
278 279 # Migrating filelogs
279 280 ui.status(
280 281 _(
281 282 b'migrating %d filelogs containing %d revisions '
282 283 b'(%s in store; %s tracked data)\n'
283 284 )
284 285 % (
285 286 fcount,
286 287 frevcount,
287 288 util.bytecount(fsrcsize),
288 289 util.bytecount(frawsize),
289 290 )
290 291 )
291 292 progress = srcrepo.ui.makeprogress(_(b'file revisions'), total=frevcount)
292 293 for unencoded, rl_type in sorted(filelogs.items()):
293 294 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
294 295
295 296 newrl = _perform_clone(
296 297 ui,
297 298 dstrepo,
298 299 tr,
299 300 oldrl,
300 301 rl_type,
301 302 unencoded,
302 303 upgrade_op,
303 304 sidedata_helpers,
304 305 oncopiedrevision,
305 306 )
306 307 info = newrl.storageinfo(storedsize=True)
307 308 fdstsize += info[b'storedsize'] or 0
308 309 ui.status(
309 310 _(
310 311 b'finished migrating %d filelog revisions across %d '
311 312 b'filelogs; change in size: %s\n'
312 313 )
313 314 % (frevcount, fcount, util.bytecount(fdstsize - fsrcsize))
314 315 )
315 316
316 317 # Migrating manifests
317 318 ui.status(
318 319 _(
319 320 b'migrating %d manifests containing %d revisions '
320 321 b'(%s in store; %s tracked data)\n'
321 322 )
322 323 % (
323 324 mcount,
324 325 mrevcount,
325 326 util.bytecount(msrcsize),
326 327 util.bytecount(mrawsize),
327 328 )
328 329 )
329 330 if progress:
330 331 progress.complete()
331 332 progress = srcrepo.ui.makeprogress(
332 333 _(b'manifest revisions'), total=mrevcount
333 334 )
334 335 for unencoded, rl_type in sorted(manifests.items()):
335 336 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
336 337 newrl = _perform_clone(
337 338 ui,
338 339 dstrepo,
339 340 tr,
340 341 oldrl,
341 342 rl_type,
342 343 unencoded,
343 344 upgrade_op,
344 345 sidedata_helpers,
345 346 oncopiedrevision,
346 347 )
347 348 info = newrl.storageinfo(storedsize=True)
348 349 mdstsize += info[b'storedsize'] or 0
349 350 ui.status(
350 351 _(
351 352 b'finished migrating %d manifest revisions across %d '
352 353 b'manifests; change in size: %s\n'
353 354 )
354 355 % (mrevcount, mcount, util.bytecount(mdstsize - msrcsize))
355 356 )
356 357
357 358 # Migrating changelog
358 359 ui.status(
359 360 _(
360 361 b'migrating changelog containing %d revisions '
361 362 b'(%s in store; %s tracked data)\n'
362 363 )
363 364 % (
364 365 crevcount,
365 366 util.bytecount(csrcsize),
366 367 util.bytecount(crawsize),
367 368 )
368 369 )
369 370 if progress:
370 371 progress.complete()
371 372 progress = srcrepo.ui.makeprogress(
372 373 _(b'changelog revisions'), total=crevcount
373 374 )
374 375 for unencoded, rl_type in sorted(changelogs.items()):
375 376 oldrl = _revlogfrompath(srcrepo, rl_type, unencoded)
376 377 newrl = _perform_clone(
377 378 ui,
378 379 dstrepo,
379 380 tr,
380 381 oldrl,
381 382 rl_type,
382 383 unencoded,
383 384 upgrade_op,
384 385 sidedata_helpers,
385 386 oncopiedrevision,
386 387 )
387 388 info = newrl.storageinfo(storedsize=True)
388 389 cdstsize += info[b'storedsize'] or 0
389 390 progress.complete()
390 391 ui.status(
391 392 _(
392 393 b'finished migrating %d changelog revisions; change in size: '
393 394 b'%s\n'
394 395 )
395 396 % (crevcount, util.bytecount(cdstsize - csrcsize))
396 397 )
397 398
398 399 dstsize = fdstsize + mdstsize + cdstsize
399 400 ui.status(
400 401 _(
401 402 b'finished migrating %d total revisions; total change in store '
402 403 b'size: %s\n'
403 404 )
404 405 % (revcount, util.bytecount(dstsize - srcsize))
405 406 )
406 407
407 408
408 409 def _files_to_copy_post_revlog_clone(srcrepo):
409 410 """yields files which should be copied to destination after revlogs
410 411 are cloned"""
411 412 for path, kind, st in sorted(srcrepo.store.vfs.readdir(b'', stat=True)):
412 413 # don't copy revlogs as they are already cloned
413 414 if store.revlog_type(path) is not None:
414 415 continue
415 416 # Skip transaction related files.
416 417 if path.startswith(b'undo'):
417 418 continue
418 419 # Only copy regular files.
419 420 if kind != stat.S_IFREG:
420 421 continue
421 422 # Skip other skipped files.
422 423 if path in (b'lock', b'fncache'):
423 424 continue
424 425 # TODO: should we skip cache too?
425 426
426 427 yield path
427 428
428 429
429 430 def _replacestores(currentrepo, upgradedrepo, backupvfs, upgrade_op):
430 431 """Replace the stores after current repository is upgraded
431 432
432 433 Creates a backup of current repository store at backup path
433 434 Replaces upgraded store files in current repo from upgraded one
434 435
435 436 Arguments:
436 437 currentrepo: repo object of current repository
437 438 upgradedrepo: repo object of the upgraded data
438 439 backupvfs: vfs object for the backup path
439 440 upgrade_op: upgrade operation object
440 441 to be used to decide what all is upgraded
441 442 """
442 443 # TODO: don't blindly rename everything in store
443 444 # There can be upgrades where store is not touched at all
444 445 if upgrade_op.backup_store:
445 446 util.rename(currentrepo.spath, backupvfs.join(b'store'))
446 447 else:
447 448 currentrepo.vfs.rmtree(b'store', forcibly=True)
448 449 util.rename(upgradedrepo.spath, currentrepo.spath)
449 450
450 451
451 452 def finishdatamigration(ui, srcrepo, dstrepo, requirements):
452 453 """Hook point for extensions to perform additional actions during upgrade.
453 454
454 455 This function is called after revlogs and store files have been copied but
455 456 before the new store is swapped into the original location.
456 457 """
457 458
458 459
459 460 def upgrade(ui, srcrepo, dstrepo, upgrade_op):
460 461 """Do the low-level work of upgrading a repository.
461 462
462 463 The upgrade is effectively performed as a copy between a source
463 464 repository and a temporary destination repository.
464 465
465 466 The source repository is unmodified for as long as possible so the
466 467 upgrade can abort at any time without causing loss of service for
467 468 readers and without corrupting the source repository.
468 469 """
469 470 assert srcrepo.currentwlock()
470 471 assert dstrepo.currentwlock()
471 472 backuppath = None
472 473 backupvfs = None
473 474
474 475 ui.status(
475 476 _(
476 477 b'(it is safe to interrupt this process any time before '
477 478 b'data migration completes)\n'
478 479 )
479 480 )
480 481
481 482 if upgrade_actions.dirstatev2 in upgrade_op.upgrade_actions:
482 483 ui.status(_(b'upgrading to dirstate-v2 from v1\n'))
483 484 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v1', b'v2')
484 485 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatev2)
485 486
486 487 if upgrade_actions.dirstatev2 in upgrade_op.removed_actions:
487 488 ui.status(_(b'downgrading from dirstate-v2 to v1\n'))
488 489 upgrade_dirstate(ui, srcrepo, upgrade_op, b'v2', b'v1')
489 490 upgrade_op.removed_actions.remove(upgrade_actions.dirstatev2)
490 491
491 492 if upgrade_actions.dirstatetrackedkey in upgrade_op.upgrade_actions:
492 493 ui.status(_(b'create dirstate-tracked-hint file\n'))
493 494 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=True)
494 495 upgrade_op.upgrade_actions.remove(upgrade_actions.dirstatetrackedkey)
495 496 elif upgrade_actions.dirstatetrackedkey in upgrade_op.removed_actions:
496 497 ui.status(_(b'remove dirstate-tracked-hint file\n'))
497 498 upgrade_tracked_hint(ui, srcrepo, upgrade_op, add=False)
498 499 upgrade_op.removed_actions.remove(upgrade_actions.dirstatetrackedkey)
499 500
500 501 if not (upgrade_op.upgrade_actions or upgrade_op.removed_actions):
501 502 return
502 503
503 504 if upgrade_op.requirements_only:
504 505 ui.status(_(b'upgrading repository requirements\n'))
505 506 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
506 507 # if there is only one action and that is persistent nodemap upgrade
507 508 # directly write the nodemap file and update requirements instead of going
508 509 # through the whole cloning process
509 510 elif (
510 511 len(upgrade_op.upgrade_actions) == 1
511 512 and b'persistent-nodemap' in upgrade_op.upgrade_actions_names
512 513 and not upgrade_op.removed_actions
513 514 ):
514 515 ui.status(
515 516 _(b'upgrading repository to use persistent nodemap feature\n')
516 517 )
517 518 with srcrepo.transaction(b'upgrade') as tr:
518 519 unfi = srcrepo.unfiltered()
519 520 cl = unfi.changelog
520 521 nodemap.persist_nodemap(tr, cl, force=True)
521 522 # we want to directly operate on the underlying revlog to force
522 523 # create a nodemap file. This is fine since this is upgrade code
523 524 # and it heavily relies on repository being revlog based
524 525 # hence accessing private attributes can be justified
525 526 nodemap.persist_nodemap(
526 527 tr, unfi.manifestlog._rootstore._revlog, force=True
527 528 )
528 529 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
529 530 elif (
530 531 len(upgrade_op.removed_actions) == 1
531 532 and [
532 533 x
533 534 for x in upgrade_op.removed_actions
534 535 if x.name == b'persistent-nodemap'
535 536 ]
536 537 and not upgrade_op.upgrade_actions
537 538 ):
538 539 ui.status(
539 540 _(b'downgrading repository to not use persistent nodemap feature\n')
540 541 )
541 542 with srcrepo.transaction(b'upgrade') as tr:
542 543 unfi = srcrepo.unfiltered()
543 544 cl = unfi.changelog
544 545 nodemap.delete_nodemap(tr, srcrepo, cl)
545 546 # check comment 20 lines above for accessing private attributes
546 547 nodemap.delete_nodemap(
547 548 tr, srcrepo, unfi.manifestlog._rootstore._revlog
548 549 )
549 550 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
550 551 else:
551 552 with dstrepo.transaction(b'upgrade') as tr:
552 553 _clonerevlogs(
553 554 ui,
554 555 srcrepo,
555 556 dstrepo,
556 557 tr,
557 558 upgrade_op,
558 559 )
559 560
560 561 # Now copy other files in the store directory.
561 562 for p in _files_to_copy_post_revlog_clone(srcrepo):
562 563 srcrepo.ui.status(_(b'copying %s\n') % p)
563 564 src = srcrepo.store.rawvfs.join(p)
564 565 dst = dstrepo.store.rawvfs.join(p)
565 566 util.copyfile(src, dst, copystat=True)
566 567
567 568 finishdatamigration(ui, srcrepo, dstrepo, requirements)
568 569
569 570 ui.status(_(b'data fully upgraded in a temporary repository\n'))
570 571
571 572 if upgrade_op.backup_store:
572 573 backuppath = pycompat.mkdtemp(
573 574 prefix=b'upgradebackup.', dir=srcrepo.path
574 575 )
575 576 backupvfs = vfsmod.vfs(backuppath)
576 577
577 578 # Make a backup of requires file first, as it is the first to be modified.
578 579 util.copyfile(
579 580 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
580 581 )
581 582
582 583 # We install an arbitrary requirement that clients must not support
583 584 # as a mechanism to lock out new clients during the data swap. This is
584 585 # better than allowing a client to continue while the repository is in
585 586 # an inconsistent state.
586 587 ui.status(
587 588 _(
588 589 b'marking source repository as being upgraded; clients will be '
589 590 b'unable to read from repository\n'
590 591 )
591 592 )
592 593 scmutil.writereporequirements(
593 594 srcrepo, srcrepo.requirements | {b'upgradeinprogress'}
594 595 )
595 596
596 597 ui.status(_(b'starting in-place swap of repository data\n'))
597 598 if upgrade_op.backup_store:
598 599 ui.status(
599 600 _(b'replaced files will be backed up at %s\n') % backuppath
600 601 )
601 602
602 603 # Now swap in the new store directory. Doing it as a rename should make
603 604 # the operation nearly instantaneous and atomic (at least in well-behaved
604 605 # environments).
605 606 ui.status(_(b'replacing store...\n'))
606 607 tstart = util.timer()
607 608 _replacestores(srcrepo, dstrepo, backupvfs, upgrade_op)
608 609 elapsed = util.timer() - tstart
609 610 ui.status(
610 611 _(
611 612 b'store replacement complete; repository was inconsistent for '
612 613 b'%0.1fs\n'
613 614 )
614 615 % elapsed
615 616 )
616 617
617 618 # We first write the requirements file. Any new requirements will lock
618 619 # out legacy clients.
619 620 ui.status(
620 621 _(
621 622 b'finalizing requirements file and making repository readable '
622 623 b'again\n'
623 624 )
624 625 )
625 626 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
626 627
627 628 if upgrade_op.backup_store:
628 629 # The lock file from the old store won't be removed because nothing has a
629 630 # reference to its new location. So clean it up manually. Alternatively, we
630 631 # could update srcrepo.svfs and other variables to point to the new
631 632 # location. This is simpler.
632 633 assert backupvfs is not None # help pytype
633 634 backupvfs.unlink(b'store/lock')
634 635
635 636 return backuppath
636 637
637 638
638 639 def upgrade_dirstate(ui, srcrepo, upgrade_op, old, new):
639 640 if upgrade_op.backup_store:
640 641 backuppath = pycompat.mkdtemp(
641 642 prefix=b'upgradebackup.', dir=srcrepo.path
642 643 )
643 644 ui.status(_(b'replaced files will be backed up at %s\n') % backuppath)
644 645 backupvfs = vfsmod.vfs(backuppath)
645 646 util.copyfile(
646 647 srcrepo.vfs.join(b'requires'), backupvfs.join(b'requires')
647 648 )
648 649 try:
649 650 util.copyfile(
650 651 srcrepo.vfs.join(b'dirstate'), backupvfs.join(b'dirstate')
651 652 )
652 653 except FileNotFoundError:
653 654 # The dirstate does not exist on an empty repo or a repo with no
654 655 # revision checked out
655 656 pass
656 657
657 658 assert srcrepo.dirstate._use_dirstate_v2 == (old == b'v2')
658 659 use_v2 = new == b'v2'
659 660 if use_v2:
660 661 # Write the requirements *before* upgrading
661 662 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
662 663
663 664 srcrepo.dirstate._map.preload()
664 665 srcrepo.dirstate._use_dirstate_v2 = use_v2
665 666 srcrepo.dirstate._map._use_dirstate_v2 = use_v2
666 667 srcrepo.dirstate._dirty = True
667 668 try:
668 669 srcrepo.vfs.unlink(b'dirstate')
669 670 except FileNotFoundError:
670 671 # The dirstate does not exist on an empty repo or a repo with no
671 672 # revision checked out
672 673 pass
673 674
674 675 srcrepo.dirstate.write(None)
675 676 if not use_v2:
676 677 # Remove the v2 requirement *after* downgrading
677 678 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
678 679
679 680
680 681 def upgrade_tracked_hint(ui, srcrepo, upgrade_op, add):
681 682 if add:
682 683 srcrepo.dirstate._use_tracked_hint = True
683 684 srcrepo.dirstate._dirty = True
684 685 srcrepo.dirstate._dirty_tracked_set = True
685 686 srcrepo.dirstate.write(None)
686 687 if not add:
687 688 srcrepo.dirstate.delete_tracked_hint()
688 689
689 690 scmutil.writereporequirements(srcrepo, upgrade_op.new_requirements)
@@ -1,621 +1,625 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10
11 11 from .i18n import _
12 12 from .node import short
13 13 from .utils import stringutil
14 14
15 15 from . import (
16 16 error,
17 17 pycompat,
18 18 requirements,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49 WARN_UNKNOWN_COPY_SOURCE = _(
50 50 b"warning: copy source of '%s' not in parents of %s"
51 51 )
52 52
53 53 WARN_NULLID_COPY_SOURCE = _(
54 54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 55 )
56 56
57 57
58 58 class verifier:
59 59 def __init__(self, repo, level=None):
60 60 self.repo = repo.unfiltered()
61 61 self.ui = repo.ui
62 62 self.match = repo.narrowmatch()
63 63 if level is None:
64 64 level = VERIFY_DEFAULT
65 65 self._level = level
66 66 self.badrevs = set()
67 67 self.errors = 0
68 68 self.warnings = 0
69 69 self.havecl = len(repo.changelog) > 0
70 70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 73 self.refersmf = False
74 74 self.fncachewarned = False
75 75 # developer config: verify.skipflags
76 76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 77 self.warnorphanstorefiles = True
78 78
79 79 def _warn(self, msg):
80 80 """record a "warning" level issue"""
81 81 self.ui.warn(msg + b"\n")
82 82 self.warnings += 1
83 83
84 84 def _err(self, linkrev, msg, filename=None):
85 85 """record a "error" level issue"""
86 86 if linkrev is not None:
87 87 self.badrevs.add(linkrev)
88 88 linkrev = b"%d" % linkrev
89 89 else:
90 90 linkrev = b'?'
91 91 msg = b"%s: %s" % (linkrev, msg)
92 92 if filename:
93 93 msg = b"%s@%s" % (filename, msg)
94 94 self.ui.warn(b" " + msg + b"\n")
95 95 self.errors += 1
96 96
97 97 def _exc(self, linkrev, msg, inst, filename=None):
98 98 """record exception raised during the verify process"""
99 99 fmsg = stringutil.forcebytestr(inst)
100 100 if not fmsg:
101 101 fmsg = pycompat.byterepr(inst)
102 102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 103
104 104 def _checkrevlog(self, obj, name, linkrev):
105 105 """verify high level property of a revlog
106 106
107 107 - revlog is present,
108 108 - revlog is non-empty,
109 109 - sizes (index and data) are correct,
110 110 - revlog's format version is correct.
111 111 """
112 112 if not len(obj) and (self.havecl or self.havemf):
113 113 self._err(linkrev, _(b"empty or missing %s") % name)
114 114 return
115 115
116 116 d = obj.checksize()
117 117 if d[0]:
118 118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 119 if d[1]:
120 120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 121
122 122 if obj._format_version != revlog.REVLOGV0:
123 123 if not self.revlogv1:
124 124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 125 elif self.revlogv1:
126 126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 127
128 128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 129 """verify a single revlog entry
130 130
131 131 arguments are:
132 132 - obj: the source revlog
133 133 - i: the revision number
134 134 - node: the revision node id
135 135 - seen: nodes previously seen for this revlog
136 136 - linkrevs: [changelog-revisions] introducing "node"
137 137 - f: string label ("changelog", "manifest", or filename)
138 138
139 139 Performs the following checks:
140 140 - linkrev points to an existing changelog revision,
141 141 - linkrev points to a changelog revision that introduces this revision,
142 142 - linkrev points to the lowest of these changesets,
143 143 - both parents exist in the revlog,
144 144 - the revision is not duplicated.
145 145
146 146 Return the linkrev of the revision (or None for changelog's revisions).
147 147 """
148 148 lr = obj.linkrev(obj.rev(node))
149 149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 150 if lr < 0 or lr >= len(self.repo.changelog):
151 151 msg = _(b"rev %d points to nonexistent changeset %d")
152 152 else:
153 153 msg = _(b"rev %d points to unexpected changeset %d")
154 154 self._err(None, msg % (i, lr), f)
155 155 if linkrevs:
156 156 if f and len(linkrevs) > 1:
157 157 try:
158 158 # attempt to filter down to real linkrevs
159 159 linkrevs = []
160 160 for lr in linkrevs:
161 161 if self.lrugetctx(lr)[f].filenode() == node:
162 162 linkrevs.append(lr)
163 163 except Exception:
164 164 pass
165 165 msg = _(b" (expected %s)")
166 166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 167 self._warn(msg)
168 168 lr = None # can't be trusted
169 169
170 170 try:
171 171 p1, p2 = obj.parents(node)
172 172 if p1 not in seen and p1 != self.repo.nullid:
173 173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 174 self._err(lr, msg, f)
175 175 if p2 not in seen and p2 != self.repo.nullid:
176 176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 177 self._err(lr, msg, f)
178 178 except Exception as inst:
179 179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 180
181 181 if node in seen:
182 182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 183 seen[node] = i
184 184 return lr
185 185
186 186 def verify(self):
187 187 """verify the content of the Mercurial repository
188 188
189 189 This method run all verifications, displaying issues as they are found.
190 190
191 191 return 1 if any error have been encountered, 0 otherwise."""
192 192 # initial validation and generic report
193 193 repo = self.repo
194 194 ui = repo.ui
195 195 if not repo.url().startswith(b'file:'):
196 196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 197
198 198 if os.path.exists(repo.sjoin(b"journal")):
199 199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 200
201 201 if ui.verbose or not self.revlogv1:
202 202 ui.status(
203 203 _(b"repository uses revlog format %d\n")
204 204 % (self.revlogv1 and 1 or 0)
205 205 )
206 206
207 207 # data verification
208 208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 209 filenodes = self._verifymanifest(mflinkrevs)
210 210 del mflinkrevs
211 211 self._crosscheckfiles(filelinkrevs, filenodes)
212 212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 213
214 214 if self.errors:
215 215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 216 dirstate_errors = 0
217 217 else:
218 218 dirstate_errors = self._verify_dirstate()
219 219
220 220 # final report
221 221 ui.status(
222 222 _(b"checked %d changesets with %d changes to %d files\n")
223 223 % (len(repo.changelog), filerevisions, totalfiles)
224 224 )
225 225 if self.warnings:
226 226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 227 if self.fncachewarned:
228 228 ui.warn(HINT_FNCACHE)
229 229 if self.errors:
230 230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 231 if self.badrevs:
232 232 msg = _(b"(first damaged changeset appears to be %d)\n")
233 233 msg %= min(self.badrevs)
234 234 ui.warn(msg)
235 235 if dirstate_errors:
236 236 ui.warn(
237 237 _(b"dirstate inconsistent with current parent's manifest\n")
238 238 )
239 239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 240 return 1
241 241 return 0
242 242
243 243 def _verifychangelog(self):
244 244 """verify the changelog of a repository
245 245
246 246 The following checks are performed:
247 247 - all of `_checkrevlog` checks,
248 248 - all of `_checkentry` checks (for each revisions),
249 249 - each revision can be read.
250 250
251 251 The function returns some of the data observed in the changesets as a
252 252 (mflinkrevs, filelinkrevs) tuples:
253 253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255 255
256 256 If a matcher was specified, filelinkrevs will only contains matched
257 257 files.
258 258 """
259 259 ui = self.ui
260 260 repo = self.repo
261 261 match = self.match
262 262 cl = repo.changelog
263 263
264 264 ui.status(_(b"checking changesets\n"))
265 265 mflinkrevs = {}
266 266 filelinkrevs = {}
267 267 seen = {}
268 268 self._checkrevlog(cl, b"changelog", 0)
269 269 progress = ui.makeprogress(
270 270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 271 )
272 272 for i in repo:
273 273 progress.update(i)
274 274 n = cl.node(i)
275 275 self._checkentry(cl, i, n, seen, [i], b"changelog")
276 276
277 277 try:
278 278 changes = cl.read(n)
279 279 if changes[0] != self.repo.nullid:
280 280 mflinkrevs.setdefault(changes[0], []).append(i)
281 281 self.refersmf = True
282 282 for f in changes[3]:
283 283 if match(f):
284 284 filelinkrevs.setdefault(_normpath(f), []).append(i)
285 285 except Exception as inst:
286 286 self.refersmf = True
287 287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
288 288 progress.complete()
289 289 return mflinkrevs, filelinkrevs
290 290
291 291 def _verifymanifest(
292 292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
293 293 ):
294 294 """verify the manifestlog content
295 295
296 296 Inputs:
297 297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
298 298 - dir: a subdirectory to check (for tree manifest repo)
299 299 - storefiles: set of currently "orphan" files.
300 300 - subdirprogress: a progress object
301 301
302 302 This function checks:
303 303 * all of `_checkrevlog` checks (for all manifest related revlogs)
304 304 * all of `_checkentry` checks (for all manifest related revisions)
305 305 * nodes for subdirectory exists in the sub-directory manifest
306 306 * each manifest entries have a file path
307 307 * each manifest node refered in mflinkrevs exist in the manifest log
308 308
309 309 If tree manifest is in use and a matchers is specified, only the
310 310 sub-directories matching it will be verified.
311 311
312 312 return a two level mapping:
313 313 {"path" -> { filenode -> changelog-revision}}
314 314
315 315 This mapping primarily contains entries for every files in the
316 316 repository. In addition, when tree-manifest is used, it also contains
317 317 sub-directory entries.
318 318
319 319 If a matcher is provided, only matching paths will be included.
320 320 """
321 321 repo = self.repo
322 322 ui = self.ui
323 323 match = self.match
324 324 mfl = self.repo.manifestlog
325 325 mf = mfl.getstorage(dir)
326 326
327 327 if not dir:
328 328 self.ui.status(_(b"checking manifests\n"))
329 329
330 330 filenodes = {}
331 331 subdirnodes = {}
332 332 seen = {}
333 333 label = b"manifest"
334 334 if dir:
335 335 label = dir
336 336 revlogfiles = mf.files()
337 337 storefiles.difference_update(revlogfiles)
338 338 if subdirprogress: # should be true since we're in a subdirectory
339 339 subdirprogress.increment()
340 340 if self.refersmf:
341 341 # Do not check manifest if there are only changelog entries with
342 342 # null manifests.
343 343 self._checkrevlog(mf._revlog, label, 0)
344 344 progress = ui.makeprogress(
345 345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
346 346 )
347 347 for i in mf:
348 348 if not dir:
349 349 progress.update(i)
350 350 n = mf.node(i)
351 351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
352 352 if n in mflinkrevs:
353 353 del mflinkrevs[n]
354 354 elif dir:
355 355 msg = _(b"%s not in parent-directory manifest") % short(n)
356 356 self._err(lr, msg, label)
357 357 else:
358 358 self._err(lr, _(b"%s not in changesets") % short(n), label)
359 359
360 360 try:
361 361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
362 362 for f, fn, fl in mfdelta.iterentries():
363 363 if not f:
364 364 self._err(lr, _(b"entry without name in manifest"))
365 365 elif f == b"/dev/null": # ignore this in very old repos
366 366 continue
367 367 fullpath = dir + _normpath(f)
368 368 if fl == b't':
369 369 if not match.visitdir(fullpath):
370 370 continue
371 371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
372 372 sdn.setdefault(fn, []).append(lr)
373 373 else:
374 374 if not match(fullpath):
375 375 continue
376 376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
377 377 except Exception as inst:
378 378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
379 379 if self._level >= VERIFY_FULL:
380 380 try:
381 381 # Various issues can affect manifest. So we read each full
382 382 # text from storage. This triggers the checks from the core
383 383 # code (eg: hash verification, filename are ordered, etc.)
384 384 mfdelta = mfl.get(dir, n).read()
385 385 except Exception as inst:
386 386 msg = _(b"reading full manifest %s") % short(n)
387 387 self._exc(lr, msg, inst, label)
388 388
389 389 if not dir:
390 390 progress.complete()
391 391
392 392 if self.havemf:
393 393 # since we delete entry in `mflinkrevs` during iteration, any
394 394 # remaining entries are "missing". We need to issue errors for them.
395 395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
396 396 for c, m in sorted(changesetpairs):
397 397 if dir:
398 398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
399 399 else:
400 400 msg = _(b"changeset refers to unknown revision %s")
401 401 msg %= short(m)
402 402 self._err(c, msg, label)
403 403
404 404 if not dir and subdirnodes:
405 405 self.ui.status(_(b"checking directory manifests\n"))
406 406 storefiles = set()
407 407 subdirs = set()
408 408 revlogv1 = self.revlogv1
409 409 undecodable = []
410 for t, f, size in repo.store.datafiles(undecodable=undecodable):
410 for entry in repo.store.datafiles(undecodable=undecodable):
411 f = entry.unencoded_path
412 size = entry.file_size
411 413 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
412 414 storefiles.add(_normpath(f))
413 415 subdirs.add(os.path.dirname(f))
414 416 for f in undecodable:
415 417 self._err(None, _(b"cannot decode filename '%s'") % f)
416 418 subdirprogress = ui.makeprogress(
417 419 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
418 420 )
419 421
420 422 for subdir, linkrevs in subdirnodes.items():
421 423 subdirfilenodes = self._verifymanifest(
422 424 linkrevs, subdir, storefiles, subdirprogress
423 425 )
424 426 for f, onefilenodes in subdirfilenodes.items():
425 427 filenodes.setdefault(f, {}).update(onefilenodes)
426 428
427 429 if not dir and subdirnodes:
428 430 assert subdirprogress is not None # help pytype
429 431 subdirprogress.complete()
430 432 if self.warnorphanstorefiles:
431 433 for f in sorted(storefiles):
432 434 self._warn(_(b"warning: orphan data file '%s'") % f)
433 435
434 436 return filenodes
435 437
436 438 def _crosscheckfiles(self, filelinkrevs, filenodes):
437 439 repo = self.repo
438 440 ui = self.ui
439 441 ui.status(_(b"crosschecking files in changesets and manifests\n"))
440 442
441 443 total = len(filelinkrevs) + len(filenodes)
442 444 progress = ui.makeprogress(
443 445 _(b'crosschecking'), unit=_(b'files'), total=total
444 446 )
445 447 if self.havemf:
446 448 for f in sorted(filelinkrevs):
447 449 progress.increment()
448 450 if f not in filenodes:
449 451 lr = filelinkrevs[f][0]
450 452 self._err(lr, _(b"in changeset but not in manifest"), f)
451 453
452 454 if self.havecl:
453 455 for f in sorted(filenodes):
454 456 progress.increment()
455 457 if f not in filelinkrevs:
456 458 try:
457 459 fl = repo.file(f)
458 460 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
459 461 except Exception:
460 462 lr = None
461 463 self._err(lr, _(b"in manifest but not in changeset"), f)
462 464
463 465 progress.complete()
464 466
465 467 def _verifyfiles(self, filenodes, filelinkrevs):
466 468 repo = self.repo
467 469 ui = self.ui
468 470 lrugetctx = self.lrugetctx
469 471 revlogv1 = self.revlogv1
470 472 havemf = self.havemf
471 473 ui.status(_(b"checking files\n"))
472 474
473 475 storefiles = set()
474 476 undecodable = []
475 for t, f, size in repo.store.datafiles(undecodable=undecodable):
477 for entry in repo.store.datafiles(undecodable=undecodable):
478 size = entry.file_size
479 f = entry.unencoded_path
476 480 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
477 481 storefiles.add(_normpath(f))
478 482 for f in undecodable:
479 483 self._err(None, _(b"cannot decode filename '%s'") % f)
480 484
481 485 state = {
482 486 # TODO this assumes revlog storage for changelog.
483 487 b'expectedversion': self.repo.changelog._format_version,
484 488 b'skipflags': self.skipflags,
485 489 # experimental config: censor.policy
486 490 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
487 491 }
488 492
489 493 files = sorted(set(filenodes) | set(filelinkrevs))
490 494 revisions = 0
491 495 progress = ui.makeprogress(
492 496 _(b'checking'), unit=_(b'files'), total=len(files)
493 497 )
494 498 for i, f in enumerate(files):
495 499 progress.update(i, item=f)
496 500 try:
497 501 linkrevs = filelinkrevs[f]
498 502 except KeyError:
499 503 # in manifest but not in changelog
500 504 linkrevs = []
501 505
502 506 if linkrevs:
503 507 lr = linkrevs[0]
504 508 else:
505 509 lr = None
506 510
507 511 try:
508 512 fl = repo.file(f)
509 513 except error.StorageError as e:
510 514 self._err(lr, _(b"broken revlog! (%s)") % e, f)
511 515 continue
512 516
513 517 for ff in fl.files():
514 518 try:
515 519 storefiles.remove(ff)
516 520 except KeyError:
517 521 if self.warnorphanstorefiles:
518 522 msg = _(b" warning: revlog '%s' not in fncache!")
519 523 self._warn(msg % ff)
520 524 self.fncachewarned = True
521 525
522 526 if not len(fl) and (self.havecl or self.havemf):
523 527 self._err(lr, _(b"empty or missing %s") % f)
524 528 else:
525 529 # Guard against implementations not setting this.
526 530 state[b'skipread'] = set()
527 531 state[b'safe_renamed'] = set()
528 532
529 533 for problem in fl.verifyintegrity(state):
530 534 if problem.node is not None:
531 535 linkrev = fl.linkrev(fl.rev(problem.node))
532 536 else:
533 537 linkrev = None
534 538
535 539 if problem.warning:
536 540 self._warn(problem.warning)
537 541 elif problem.error:
538 542 linkrev_msg = linkrev if linkrev is not None else lr
539 543 self._err(linkrev_msg, problem.error, f)
540 544 else:
541 545 raise error.ProgrammingError(
542 546 b'problem instance does not set warning or error '
543 547 b'attribute: %s' % problem.msg
544 548 )
545 549
546 550 seen = {}
547 551 for i in fl:
548 552 revisions += 1
549 553 n = fl.node(i)
550 554 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
551 555 if f in filenodes:
552 556 if havemf and n not in filenodes[f]:
553 557 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
554 558 else:
555 559 del filenodes[f][n]
556 560
557 561 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
558 562 continue
559 563
560 564 # check renames
561 565 try:
562 566 # This requires resolving fulltext (at least on revlogs,
563 567 # though not with LFS revisions). We may want
564 568 # ``verifyintegrity()`` to pass a set of nodes with
565 569 # rename metadata as an optimization.
566 570 rp = fl.renamed(n)
567 571 if rp:
568 572 if lr is not None and ui.verbose:
569 573 ctx = lrugetctx(lr)
570 574 if not any(rp[0] in pctx for pctx in ctx.parents()):
571 575 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
572 576 fl2 = repo.file(rp[0])
573 577 if not len(fl2):
574 578 m = _(b"empty or missing copy source revlog %s:%s")
575 579 self._err(lr, m % (rp[0], short(rp[1])), f)
576 580 elif rp[1] == self.repo.nullid:
577 581 msg = WARN_NULLID_COPY_SOURCE
578 582 msg %= (f, lr, rp[0], short(rp[1]))
579 583 ui.note(msg)
580 584 else:
581 585 fl2.rev(rp[1])
582 586 except Exception as inst:
583 587 self._exc(
584 588 lr, _(b"checking rename of %s") % short(n), inst, f
585 589 )
586 590
587 591 # cross-check
588 592 if f in filenodes:
589 593 fns = [(v, k) for k, v in filenodes[f].items()]
590 594 for lr, node in sorted(fns):
591 595 msg = _(b"manifest refers to unknown revision %s")
592 596 self._err(lr, msg % short(node), f)
593 597 progress.complete()
594 598
595 599 if self.warnorphanstorefiles:
596 600 for f in sorted(storefiles):
597 601 self._warn(_(b"warning: orphan data file '%s'") % f)
598 602
599 603 return len(files), revisions
600 604
601 605 def _verify_dirstate(self):
602 606 """Check that the dirstate is consistent with the parent's manifest"""
603 607 repo = self.repo
604 608 ui = self.ui
605 609 ui.status(_(b"checking dirstate\n"))
606 610
607 611 parent1, parent2 = repo.dirstate.parents()
608 612 m1 = repo[parent1].manifest()
609 613 m2 = repo[parent2].manifest()
610 614 dirstate_errors = 0
611 615
612 616 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
613 617 narrow_matcher = repo.narrowmatch() if is_narrow else None
614 618
615 619 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
616 620 ui.error(err)
617 621 dirstate_errors += 1
618 622
619 623 if dirstate_errors:
620 624 self.errors += dirstate_errors
621 625 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now