##// END OF EJS Templates
store: rename `datafiles` to `data_entries`...
marmoute -
r51397:862e3a13 default
parent child Browse files
Show More
@@ -1,823 +1,823
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''largefiles utility code: must not import other modules in this package.'''
10 10
11 11 import contextlib
12 12 import copy
13 13 import os
14 14 import stat
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.node import hex
18 18 from mercurial.pycompat import open
19 19
20 20 from mercurial import (
21 21 dirstate,
22 22 encoding,
23 23 error,
24 24 httpconnection,
25 25 match as matchmod,
26 26 pycompat,
27 27 requirements,
28 28 scmutil,
29 29 sparse,
30 30 util,
31 31 vfs as vfsmod,
32 32 )
33 33 from mercurial.utils import hashutil
34 34 from mercurial.dirstateutils import timestamp
35 35
36 36 shortname = b'.hglf'
37 37 shortnameslash = shortname + b'/'
38 38 longname = b'largefiles'
39 39
40 40 # -- Private worker functions ------------------------------------------
41 41
42 42
43 43 @contextlib.contextmanager
44 44 def lfstatus(repo, value=True):
45 45 oldvalue = getattr(repo, 'lfstatus', False)
46 46 repo.lfstatus = value
47 47 try:
48 48 yield
49 49 finally:
50 50 repo.lfstatus = oldvalue
51 51
52 52
53 53 def getminsize(ui, assumelfiles, opt, default=10):
54 54 lfsize = opt
55 55 if not lfsize and assumelfiles:
56 56 lfsize = ui.config(longname, b'minsize', default=default)
57 57 if lfsize:
58 58 try:
59 59 lfsize = float(lfsize)
60 60 except ValueError:
61 61 raise error.Abort(
62 62 _(b'largefiles: size must be number (not %s)\n') % lfsize
63 63 )
64 64 if lfsize is None:
65 65 raise error.Abort(_(b'minimum size for largefiles must be specified'))
66 66 return lfsize
67 67
68 68
69 69 def link(src, dest):
70 70 """Try to create hardlink - if that fails, efficiently make a copy."""
71 71 util.makedirs(os.path.dirname(dest))
72 72 try:
73 73 util.oslink(src, dest)
74 74 except OSError:
75 75 # if hardlinks fail, fallback on atomic copy
76 76 with open(src, b'rb') as srcf, util.atomictempfile(dest) as dstf:
77 77 for chunk in util.filechunkiter(srcf):
78 78 dstf.write(chunk)
79 79 os.chmod(dest, os.stat(src).st_mode)
80 80
81 81
82 82 def usercachepath(ui, hash):
83 83 """Return the correct location in the "global" largefiles cache for a file
84 84 with the given hash.
85 85 This cache is used for sharing of largefiles across repositories - both
86 86 to preserve download bandwidth and storage space."""
87 87 return os.path.join(_usercachedir(ui), hash)
88 88
89 89
90 90 def _usercachedir(ui, name=longname):
91 91 '''Return the location of the "global" largefiles cache.'''
92 92 path = ui.configpath(name, b'usercache')
93 93 if path:
94 94 return path
95 95
96 96 hint = None
97 97
98 98 if pycompat.iswindows:
99 99 appdata = encoding.environ.get(
100 100 b'LOCALAPPDATA', encoding.environ.get(b'APPDATA')
101 101 )
102 102 if appdata:
103 103 return os.path.join(appdata, name)
104 104
105 105 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
106 106 b"LOCALAPPDATA",
107 107 b"APPDATA",
108 108 name,
109 109 )
110 110 elif pycompat.isdarwin:
111 111 home = encoding.environ.get(b'HOME')
112 112 if home:
113 113 return os.path.join(home, b'Library', b'Caches', name)
114 114
115 115 hint = _(b"define %s in the environment, or set %s.usercache") % (
116 116 b"HOME",
117 117 name,
118 118 )
119 119 elif pycompat.isposix:
120 120 path = encoding.environ.get(b'XDG_CACHE_HOME')
121 121 if path:
122 122 return os.path.join(path, name)
123 123 home = encoding.environ.get(b'HOME')
124 124 if home:
125 125 return os.path.join(home, b'.cache', name)
126 126
127 127 hint = _(b"define %s or %s in the environment, or set %s.usercache") % (
128 128 b"XDG_CACHE_HOME",
129 129 b"HOME",
130 130 name,
131 131 )
132 132 else:
133 133 raise error.Abort(
134 134 _(b'unknown operating system: %s\n') % pycompat.osname
135 135 )
136 136
137 137 raise error.Abort(_(b'unknown %s usercache location') % name, hint=hint)
138 138
139 139
140 140 def inusercache(ui, hash):
141 141 path = usercachepath(ui, hash)
142 142 return os.path.exists(path)
143 143
144 144
145 145 def findfile(repo, hash):
146 146 """Return store path of the largefile with the specified hash.
147 147 As a side effect, the file might be linked from user cache.
148 148 Return None if the file can't be found locally."""
149 149 path, exists = findstorepath(repo, hash)
150 150 if exists:
151 151 repo.ui.note(_(b'found %s in store\n') % hash)
152 152 return path
153 153 elif inusercache(repo.ui, hash):
154 154 repo.ui.note(_(b'found %s in system cache\n') % hash)
155 155 path = storepath(repo, hash)
156 156 link(usercachepath(repo.ui, hash), path)
157 157 return path
158 158 return None
159 159
160 160
161 161 class largefilesdirstate(dirstate.dirstate):
162 162 _large_file_dirstate = True
163 163 _tr_key_suffix = b'-large-files'
164 164
165 165 def __getitem__(self, key):
166 166 return super(largefilesdirstate, self).__getitem__(unixpath(key))
167 167
168 168 def set_tracked(self, f):
169 169 return super(largefilesdirstate, self).set_tracked(unixpath(f))
170 170
171 171 def set_untracked(self, f):
172 172 return super(largefilesdirstate, self).set_untracked(unixpath(f))
173 173
174 174 def normal(self, f, parentfiledata=None):
175 175 # not sure if we should pass the `parentfiledata` down or throw it
176 176 # away. So throwing it away to stay on the safe side.
177 177 return super(largefilesdirstate, self).normal(unixpath(f))
178 178
179 179 def remove(self, f):
180 180 return super(largefilesdirstate, self).remove(unixpath(f))
181 181
182 182 def add(self, f):
183 183 return super(largefilesdirstate, self).add(unixpath(f))
184 184
185 185 def drop(self, f):
186 186 return super(largefilesdirstate, self).drop(unixpath(f))
187 187
188 188 def forget(self, f):
189 189 return super(largefilesdirstate, self).forget(unixpath(f))
190 190
191 191 def normallookup(self, f):
192 192 return super(largefilesdirstate, self).normallookup(unixpath(f))
193 193
194 194 def _ignore(self, f):
195 195 return False
196 196
197 197 def write(self, tr):
198 198 # (1) disable PENDING mode always
199 199 # (lfdirstate isn't yet managed as a part of the transaction)
200 200 # (2) avoid develwarn 'use dirstate.write with ....'
201 201 if tr:
202 202 tr.addbackup(b'largefiles/dirstate', location=b'plain')
203 203 super(largefilesdirstate, self).write(None)
204 204
205 205
206 206 def openlfdirstate(ui, repo, create=True):
207 207 """
208 208 Return a dirstate object that tracks largefiles: i.e. its root is
209 209 the repo root, but it is saved in .hg/largefiles/dirstate.
210 210
211 211 If a dirstate object already exists and is being used for a 'changing_*'
212 212 context, it will be returned.
213 213 """
214 214 sub_dirstate = getattr(repo.dirstate, '_sub_dirstate', None)
215 215 if sub_dirstate is not None:
216 216 return sub_dirstate
217 217 vfs = repo.vfs
218 218 lfstoredir = longname
219 219 opener = vfsmod.vfs(vfs.join(lfstoredir))
220 220 use_dirstate_v2 = requirements.DIRSTATE_V2_REQUIREMENT in repo.requirements
221 221 lfdirstate = largefilesdirstate(
222 222 opener,
223 223 ui,
224 224 repo.root,
225 225 repo.dirstate._validate,
226 226 lambda: sparse.matcher(repo),
227 227 repo.nodeconstants,
228 228 use_dirstate_v2,
229 229 )
230 230
231 231 # If the largefiles dirstate does not exist, populate and create
232 232 # it. This ensures that we create it on the first meaningful
233 233 # largefiles operation in a new clone.
234 234 if create and not vfs.exists(vfs.join(lfstoredir, b'dirstate')):
235 235 try:
236 236 with repo.wlock(wait=False), lfdirstate.changing_files(repo):
237 237 matcher = getstandinmatcher(repo)
238 238 standins = repo.dirstate.walk(
239 239 matcher, subrepos=[], unknown=False, ignored=False
240 240 )
241 241
242 242 if len(standins) > 0:
243 243 vfs.makedirs(lfstoredir)
244 244
245 245 for standin in standins:
246 246 lfile = splitstandin(standin)
247 247 lfdirstate.hacky_extension_update_file(
248 248 lfile,
249 249 p1_tracked=True,
250 250 wc_tracked=True,
251 251 possibly_dirty=True,
252 252 )
253 253 except error.LockError:
254 254 # Assume that whatever was holding the lock was important.
255 255 # If we were doing something important, we would already have
256 256 # either the lock or a largefile dirstate.
257 257 pass
258 258 return lfdirstate
259 259
260 260
261 261 def lfdirstatestatus(lfdirstate, repo):
262 262 pctx = repo[b'.']
263 263 match = matchmod.always()
264 264 unsure, s, mtime_boundary = lfdirstate.status(
265 265 match, subrepos=[], ignored=False, clean=False, unknown=False
266 266 )
267 267 modified, clean = s.modified, s.clean
268 268 wctx = repo[None]
269 269 for lfile in unsure:
270 270 try:
271 271 fctx = pctx[standin(lfile)]
272 272 except LookupError:
273 273 fctx = None
274 274 if not fctx or readasstandin(fctx) != hashfile(repo.wjoin(lfile)):
275 275 modified.append(lfile)
276 276 else:
277 277 clean.append(lfile)
278 278 st = wctx[lfile].lstat()
279 279 mode = st.st_mode
280 280 size = st.st_size
281 281 mtime = timestamp.reliable_mtime_of(st, mtime_boundary)
282 282 if mtime is not None:
283 283 cache_data = (mode, size, mtime)
284 284 lfdirstate.set_clean(lfile, cache_data)
285 285 return s
286 286
287 287
288 288 def listlfiles(repo, rev=None, matcher=None):
289 289 """return a list of largefiles in the working copy or the
290 290 specified changeset"""
291 291
292 292 if matcher is None:
293 293 matcher = getstandinmatcher(repo)
294 294
295 295 # ignore unknown files in working directory
296 296 return [
297 297 splitstandin(f)
298 298 for f in repo[rev].walk(matcher)
299 299 if rev is not None or repo.dirstate.get_entry(f).any_tracked
300 300 ]
301 301
302 302
303 303 def instore(repo, hash, forcelocal=False):
304 304 '''Return true if a largefile with the given hash exists in the store'''
305 305 return os.path.exists(storepath(repo, hash, forcelocal))
306 306
307 307
308 308 def storepath(repo, hash, forcelocal=False):
309 309 """Return the correct location in the repository largefiles store for a
310 310 file with the given hash."""
311 311 if not forcelocal and repo.shared():
312 312 return repo.vfs.reljoin(repo.sharedpath, longname, hash)
313 313 return repo.vfs.join(longname, hash)
314 314
315 315
316 316 def findstorepath(repo, hash):
317 317 """Search through the local store path(s) to find the file for the given
318 318 hash. If the file is not found, its path in the primary store is returned.
319 319 The return value is a tuple of (path, exists(path)).
320 320 """
321 321 # For shared repos, the primary store is in the share source. But for
322 322 # backward compatibility, force a lookup in the local store if it wasn't
323 323 # found in the share source.
324 324 path = storepath(repo, hash, False)
325 325
326 326 if instore(repo, hash):
327 327 return (path, True)
328 328 elif repo.shared() and instore(repo, hash, True):
329 329 return storepath(repo, hash, True), True
330 330
331 331 return (path, False)
332 332
333 333
334 334 def copyfromcache(repo, hash, filename):
335 335 """Copy the specified largefile from the repo or system cache to
336 336 filename in the repository. Return true on success or false if the
337 337 file was not found in either cache (which should not happened:
338 338 this is meant to be called only after ensuring that the needed
339 339 largefile exists in the cache)."""
340 340 wvfs = repo.wvfs
341 341 path = findfile(repo, hash)
342 342 if path is None:
343 343 return False
344 344 wvfs.makedirs(wvfs.dirname(wvfs.join(filename)))
345 345 # The write may fail before the file is fully written, but we
346 346 # don't use atomic writes in the working copy.
347 347 with open(path, b'rb') as srcfd, wvfs(filename, b'wb') as destfd:
348 348 gothash = copyandhash(util.filechunkiter(srcfd), destfd)
349 349 if gothash != hash:
350 350 repo.ui.warn(
351 351 _(b'%s: data corruption in %s with hash %s\n')
352 352 % (filename, path, gothash)
353 353 )
354 354 wvfs.unlink(filename)
355 355 return False
356 356 return True
357 357
358 358
359 359 def copytostore(repo, ctx, file, fstandin):
360 360 wvfs = repo.wvfs
361 361 hash = readasstandin(ctx[fstandin])
362 362 if instore(repo, hash):
363 363 return
364 364 if wvfs.exists(file):
365 365 copytostoreabsolute(repo, wvfs.join(file), hash)
366 366 else:
367 367 repo.ui.warn(
368 368 _(b"%s: largefile %s not available from local store\n")
369 369 % (file, hash)
370 370 )
371 371
372 372
373 373 def copyalltostore(repo, node):
374 374 '''Copy all largefiles in a given revision to the store'''
375 375
376 376 ctx = repo[node]
377 377 for filename in ctx.files():
378 378 realfile = splitstandin(filename)
379 379 if realfile is not None and filename in ctx.manifest():
380 380 copytostore(repo, ctx, realfile, filename)
381 381
382 382
383 383 def copytostoreabsolute(repo, file, hash):
384 384 if inusercache(repo.ui, hash):
385 385 link(usercachepath(repo.ui, hash), storepath(repo, hash))
386 386 else:
387 387 util.makedirs(os.path.dirname(storepath(repo, hash)))
388 388 with open(file, b'rb') as srcf:
389 389 with util.atomictempfile(
390 390 storepath(repo, hash), createmode=repo.store.createmode
391 391 ) as dstf:
392 392 for chunk in util.filechunkiter(srcf):
393 393 dstf.write(chunk)
394 394 linktousercache(repo, hash)
395 395
396 396
397 397 def linktousercache(repo, hash):
398 398 """Link / copy the largefile with the specified hash from the store
399 399 to the cache."""
400 400 path = usercachepath(repo.ui, hash)
401 401 link(storepath(repo, hash), path)
402 402
403 403
404 404 def getstandinmatcher(repo, rmatcher=None):
405 405 '''Return a match object that applies rmatcher to the standin directory'''
406 406 wvfs = repo.wvfs
407 407 standindir = shortname
408 408
409 409 # no warnings about missing files or directories
410 410 badfn = lambda f, msg: None
411 411
412 412 if rmatcher and not rmatcher.always():
413 413 pats = [wvfs.join(standindir, pat) for pat in rmatcher.files()]
414 414 if not pats:
415 415 pats = [wvfs.join(standindir)]
416 416 match = scmutil.match(repo[None], pats, badfn=badfn)
417 417 else:
418 418 # no patterns: relative to repo root
419 419 match = scmutil.match(repo[None], [wvfs.join(standindir)], badfn=badfn)
420 420 return match
421 421
422 422
423 423 def composestandinmatcher(repo, rmatcher):
424 424 """Return a matcher that accepts standins corresponding to the
425 425 files accepted by rmatcher. Pass the list of files in the matcher
426 426 as the paths specified by the user."""
427 427 smatcher = getstandinmatcher(repo, rmatcher)
428 428 isstandin = smatcher.matchfn
429 429
430 430 def composedmatchfn(f):
431 431 return isstandin(f) and rmatcher.matchfn(splitstandin(f))
432 432
433 433 smatcher.matchfn = composedmatchfn
434 434
435 435 return smatcher
436 436
437 437
438 438 def standin(filename):
439 439 """Return the repo-relative path to the standin for the specified big
440 440 file."""
441 441 # Notes:
442 442 # 1) Some callers want an absolute path, but for instance addlargefiles
443 443 # needs it repo-relative so it can be passed to repo[None].add(). So
444 444 # leave it up to the caller to use repo.wjoin() to get an absolute path.
445 445 # 2) Join with '/' because that's what dirstate always uses, even on
446 446 # Windows. Change existing separator to '/' first in case we are
447 447 # passed filenames from an external source (like the command line).
448 448 return shortnameslash + util.pconvert(filename)
449 449
450 450
451 451 def isstandin(filename):
452 452 """Return true if filename is a big file standin. filename must be
453 453 in Mercurial's internal form (slash-separated)."""
454 454 return filename.startswith(shortnameslash)
455 455
456 456
457 457 def splitstandin(filename):
458 458 # Split on / because that's what dirstate always uses, even on Windows.
459 459 # Change local separator to / first just in case we are passed filenames
460 460 # from an external source (like the command line).
461 461 bits = util.pconvert(filename).split(b'/', 1)
462 462 if len(bits) == 2 and bits[0] == shortname:
463 463 return bits[1]
464 464 else:
465 465 return None
466 466
467 467
468 468 def updatestandin(repo, lfile, standin):
469 469 """Re-calculate hash value of lfile and write it into standin
470 470
471 471 This assumes that "lfutil.standin(lfile) == standin", for efficiency.
472 472 """
473 473 file = repo.wjoin(lfile)
474 474 if repo.wvfs.exists(lfile):
475 475 hash = hashfile(file)
476 476 executable = getexecutable(file)
477 477 writestandin(repo, standin, hash, executable)
478 478 else:
479 479 raise error.Abort(_(b'%s: file not found!') % lfile)
480 480
481 481
482 482 def readasstandin(fctx):
483 483 """read hex hash from given filectx of standin file
484 484
485 485 This encapsulates how "standin" data is stored into storage layer."""
486 486 return fctx.data().strip()
487 487
488 488
489 489 def writestandin(repo, standin, hash, executable):
490 490 '''write hash to <repo.root>/<standin>'''
491 491 repo.wwrite(standin, hash + b'\n', executable and b'x' or b'')
492 492
493 493
494 494 def copyandhash(instream, outfile):
495 495 """Read bytes from instream (iterable) and write them to outfile,
496 496 computing the SHA-1 hash of the data along the way. Return the hash."""
497 497 hasher = hashutil.sha1(b'')
498 498 for data in instream:
499 499 hasher.update(data)
500 500 outfile.write(data)
501 501 return hex(hasher.digest())
502 502
503 503
504 504 def hashfile(file):
505 505 if not os.path.exists(file):
506 506 return b''
507 507 with open(file, b'rb') as fd:
508 508 return hexsha1(fd)
509 509
510 510
511 511 def getexecutable(filename):
512 512 mode = os.stat(filename).st_mode
513 513 return (
514 514 (mode & stat.S_IXUSR)
515 515 and (mode & stat.S_IXGRP)
516 516 and (mode & stat.S_IXOTH)
517 517 )
518 518
519 519
520 520 def urljoin(first, second, *arg):
521 521 def join(left, right):
522 522 if not left.endswith(b'/'):
523 523 left += b'/'
524 524 if right.startswith(b'/'):
525 525 right = right[1:]
526 526 return left + right
527 527
528 528 url = join(first, second)
529 529 for a in arg:
530 530 url = join(url, a)
531 531 return url
532 532
533 533
534 534 def hexsha1(fileobj):
535 535 """hexsha1 returns the hex-encoded sha1 sum of the data in the file-like
536 536 object data"""
537 537 h = hashutil.sha1()
538 538 for chunk in util.filechunkiter(fileobj):
539 539 h.update(chunk)
540 540 return hex(h.digest())
541 541
542 542
543 543 def httpsendfile(ui, filename):
544 544 return httpconnection.httpsendfile(ui, filename, b'rb')
545 545
546 546
547 547 def unixpath(path):
548 548 '''Return a version of path normalized for use with the lfdirstate.'''
549 549 return util.pconvert(os.path.normpath(path))
550 550
551 551
552 552 def islfilesrepo(repo):
553 553 '''Return true if the repo is a largefile repo.'''
554 554 if b'largefiles' in repo.requirements:
555 for entry in repo.store.datafiles():
555 for entry in repo.store.data_entries():
556 556 if entry.is_revlog and shortnameslash in entry.target_id:
557 557 return True
558 558
559 559 return any(openlfdirstate(repo.ui, repo, False))
560 560
561 561
562 562 class storeprotonotcapable(Exception):
563 563 def __init__(self, storetypes):
564 564 self.storetypes = storetypes
565 565
566 566
567 567 def getstandinsstate(repo):
568 568 standins = []
569 569 matcher = getstandinmatcher(repo)
570 570 wctx = repo[None]
571 571 for standin in repo.dirstate.walk(
572 572 matcher, subrepos=[], unknown=False, ignored=False
573 573 ):
574 574 lfile = splitstandin(standin)
575 575 try:
576 576 hash = readasstandin(wctx[standin])
577 577 except IOError:
578 578 hash = None
579 579 standins.append((lfile, hash))
580 580 return standins
581 581
582 582
583 583 def synclfdirstate(repo, lfdirstate, lfile, normallookup):
584 584 lfstandin = standin(lfile)
585 585 if lfstandin not in repo.dirstate:
586 586 lfdirstate.hacky_extension_update_file(
587 587 lfile,
588 588 p1_tracked=False,
589 589 wc_tracked=False,
590 590 )
591 591 else:
592 592 entry = repo.dirstate.get_entry(lfstandin)
593 593 lfdirstate.hacky_extension_update_file(
594 594 lfile,
595 595 wc_tracked=entry.tracked,
596 596 p1_tracked=entry.p1_tracked,
597 597 p2_info=entry.p2_info,
598 598 possibly_dirty=True,
599 599 )
600 600
601 601
602 602 def markcommitted(orig, ctx, node):
603 603 repo = ctx.repo()
604 604
605 605 with repo.dirstate.changing_parents(repo):
606 606 orig(node)
607 607
608 608 # ATTENTION: "ctx.files()" may differ from "repo[node].files()"
609 609 # because files coming from the 2nd parent are omitted in the latter.
610 610 #
611 611 # The former should be used to get targets of "synclfdirstate",
612 612 # because such files:
613 613 # - are marked as "a" by "patch.patch()" (e.g. via transplant), and
614 614 # - have to be marked as "n" after commit, but
615 615 # - aren't listed in "repo[node].files()"
616 616
617 617 lfdirstate = openlfdirstate(repo.ui, repo)
618 618 for f in ctx.files():
619 619 lfile = splitstandin(f)
620 620 if lfile is not None:
621 621 synclfdirstate(repo, lfdirstate, lfile, False)
622 622
623 623 # As part of committing, copy all of the largefiles into the cache.
624 624 #
625 625 # Using "node" instead of "ctx" implies additional "repo[node]"
626 626 # lookup while copyalltostore(), but can omit redundant check for
627 627 # files comming from the 2nd parent, which should exist in store
628 628 # at merging.
629 629 copyalltostore(repo, node)
630 630
631 631
632 632 def getlfilestoupdate(oldstandins, newstandins):
633 633 changedstandins = set(oldstandins).symmetric_difference(set(newstandins))
634 634 filelist = []
635 635 for f in changedstandins:
636 636 if f[0] not in filelist:
637 637 filelist.append(f[0])
638 638 return filelist
639 639
640 640
641 641 def getlfilestoupload(repo, missing, addfunc):
642 642 makeprogress = repo.ui.makeprogress
643 643 with makeprogress(
644 644 _(b'finding outgoing largefiles'),
645 645 unit=_(b'revisions'),
646 646 total=len(missing),
647 647 ) as progress:
648 648 for i, n in enumerate(missing):
649 649 progress.update(i)
650 650 parents = [p for p in repo[n].parents() if p != repo.nullid]
651 651
652 652 with lfstatus(repo, value=False):
653 653 ctx = repo[n]
654 654
655 655 files = set(ctx.files())
656 656 if len(parents) == 2:
657 657 mc = ctx.manifest()
658 658 mp1 = ctx.p1().manifest()
659 659 mp2 = ctx.p2().manifest()
660 660 for f in mp1:
661 661 if f not in mc:
662 662 files.add(f)
663 663 for f in mp2:
664 664 if f not in mc:
665 665 files.add(f)
666 666 for f in mc:
667 667 if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
668 668 files.add(f)
669 669 for fn in files:
670 670 if isstandin(fn) and fn in ctx:
671 671 addfunc(fn, readasstandin(ctx[fn]))
672 672
673 673
674 674 def updatestandinsbymatch(repo, match):
675 675 """Update standins in the working directory according to specified match
676 676
677 677 This returns (possibly modified) ``match`` object to be used for
678 678 subsequent commit process.
679 679 """
680 680
681 681 ui = repo.ui
682 682
683 683 # Case 1: user calls commit with no specific files or
684 684 # include/exclude patterns: refresh and commit all files that
685 685 # are "dirty".
686 686 if match is None or match.always():
687 687 # Spend a bit of time here to get a list of files we know
688 688 # are modified so we can compare only against those.
689 689 # It can cost a lot of time (several seconds)
690 690 # otherwise to update all standins if the largefiles are
691 691 # large.
692 692 dirtymatch = matchmod.always()
693 693 with repo.dirstate.running_status(repo):
694 694 lfdirstate = openlfdirstate(ui, repo)
695 695 unsure, s, mtime_boundary = lfdirstate.status(
696 696 dirtymatch,
697 697 subrepos=[],
698 698 ignored=False,
699 699 clean=False,
700 700 unknown=False,
701 701 )
702 702 modifiedfiles = unsure + s.modified + s.added + s.removed
703 703 lfiles = listlfiles(repo)
704 704 # this only loops through largefiles that exist (not
705 705 # removed/renamed)
706 706 for lfile in lfiles:
707 707 if lfile in modifiedfiles:
708 708 fstandin = standin(lfile)
709 709 if repo.wvfs.exists(fstandin):
710 710 # this handles the case where a rebase is being
711 711 # performed and the working copy is not updated
712 712 # yet.
713 713 if repo.wvfs.exists(lfile):
714 714 updatestandin(repo, lfile, fstandin)
715 715
716 716 return match
717 717
718 718 lfiles = listlfiles(repo)
719 719 match._files = repo._subdirlfs(match.files(), lfiles)
720 720
721 721 # Case 2: user calls commit with specified patterns: refresh
722 722 # any matching big files.
723 723 smatcher = composestandinmatcher(repo, match)
724 724 standins = repo.dirstate.walk(
725 725 smatcher, subrepos=[], unknown=False, ignored=False
726 726 )
727 727
728 728 # No matching big files: get out of the way and pass control to
729 729 # the usual commit() method.
730 730 if not standins:
731 731 return match
732 732
733 733 # Refresh all matching big files. It's possible that the
734 734 # commit will end up failing, in which case the big files will
735 735 # stay refreshed. No harm done: the user modified them and
736 736 # asked to commit them, so sooner or later we're going to
737 737 # refresh the standins. Might as well leave them refreshed.
738 738 lfdirstate = openlfdirstate(ui, repo)
739 739 for fstandin in standins:
740 740 lfile = splitstandin(fstandin)
741 741 if lfdirstate.get_entry(lfile).tracked:
742 742 updatestandin(repo, lfile, fstandin)
743 743
744 744 # Cook up a new matcher that only matches regular files or
745 745 # standins corresponding to the big files requested by the
746 746 # user. Have to modify _files to prevent commit() from
747 747 # complaining "not tracked" for big files.
748 748 match = copy.copy(match)
749 749 origmatchfn = match.matchfn
750 750
751 751 # Check both the list of largefiles and the list of
752 752 # standins because if a largefile was removed, it
753 753 # won't be in the list of largefiles at this point
754 754 match._files += sorted(standins)
755 755
756 756 actualfiles = []
757 757 for f in match._files:
758 758 fstandin = standin(f)
759 759
760 760 # For largefiles, only one of the normal and standin should be
761 761 # committed (except if one of them is a remove). In the case of a
762 762 # standin removal, drop the normal file if it is unknown to dirstate.
763 763 # Thus, skip plain largefile names but keep the standin.
764 764 if f in lfiles or fstandin in standins:
765 765 if not repo.dirstate.get_entry(fstandin).removed:
766 766 if not repo.dirstate.get_entry(f).removed:
767 767 continue
768 768 elif not repo.dirstate.get_entry(f).any_tracked:
769 769 continue
770 770
771 771 actualfiles.append(f)
772 772 match._files = actualfiles
773 773
774 774 def matchfn(f):
775 775 if origmatchfn(f):
776 776 return f not in lfiles
777 777 else:
778 778 return f in standins
779 779
780 780 match.matchfn = matchfn
781 781
782 782 return match
783 783
784 784
785 785 class automatedcommithook:
786 786 """Stateful hook to update standins at the 1st commit of resuming
787 787
788 788 For efficiency, updating standins in the working directory should
789 789 be avoided while automated committing (like rebase, transplant and
790 790 so on), because they should be updated before committing.
791 791
792 792 But the 1st commit of resuming automated committing (e.g. ``rebase
793 793 --continue``) should update them, because largefiles may be
794 794 modified manually.
795 795 """
796 796
797 797 def __init__(self, resuming):
798 798 self.resuming = resuming
799 799
800 800 def __call__(self, repo, match):
801 801 if self.resuming:
802 802 self.resuming = False # avoids updating at subsequent commits
803 803 return updatestandinsbymatch(repo, match)
804 804 else:
805 805 return match
806 806
807 807
808 808 def getstatuswriter(ui, repo, forcibly=None):
809 809 """Return the function to write largefiles specific status out
810 810
811 811 If ``forcibly`` is ``None``, this returns the last element of
812 812 ``repo._lfstatuswriters`` as "default" writer function.
813 813
814 814 Otherwise, this returns the function to always write out (or
815 815 ignore if ``not forcibly``) status.
816 816 """
817 817 if forcibly is None and util.safehasattr(repo, b'_largefilesenabled'):
818 818 return repo._lfstatuswriters[-1]
819 819 else:
820 820 if forcibly:
821 821 return ui.status # forcibly WRITE OUT
822 822 else:
823 823 return lambda *msg, **opts: None # forcibly IGNORE
@@ -1,474 +1,474
1 1 # Copyright 2009-2010 Gregory P. Ward
2 2 # Copyright 2009-2010 Intelerad Medical Systems Incorporated
3 3 # Copyright 2010-2011 Fog Creek Software
4 4 # Copyright 2010-2011 Unity Technologies
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 '''setup for largefiles repositories: reposetup'''
10 10
11 11 import copy
12 12
13 13 from mercurial.i18n import _
14 14
15 15 from mercurial import (
16 16 error,
17 17 extensions,
18 18 localrepo,
19 19 match as matchmod,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 from mercurial.dirstateutils import timestamp
25 25
26 26 from . import (
27 27 lfcommands,
28 28 lfutil,
29 29 )
30 30
31 31
32 32 def reposetup(ui, repo):
33 33 # wire repositories should be given new wireproto functions
34 34 # by "proto.wirereposetup()" via "hg.wirepeersetupfuncs"
35 35 if not repo.local():
36 36 return
37 37
38 38 class lfilesrepo(repo.__class__):
39 39 # the mark to examine whether "repo" object enables largefiles or not
40 40 _largefilesenabled = True
41 41
42 42 lfstatus = False
43 43
44 44 # When lfstatus is set, return a context that gives the names
45 45 # of largefiles instead of their corresponding standins and
46 46 # identifies the largefiles as always binary, regardless of
47 47 # their actual contents.
48 48 def __getitem__(self, changeid):
49 49 ctx = super(lfilesrepo, self).__getitem__(changeid)
50 50 if self.lfstatus:
51 51
52 52 def files(orig):
53 53 filenames = orig()
54 54 return [lfutil.splitstandin(f) or f for f in filenames]
55 55
56 56 extensions.wrapfunction(ctx, 'files', files)
57 57
58 58 def manifest(orig):
59 59 man1 = orig()
60 60
61 61 class lfilesmanifest(man1.__class__):
62 62 def __contains__(self, filename):
63 63 orig = super(lfilesmanifest, self).__contains__
64 64 return orig(filename) or orig(
65 65 lfutil.standin(filename)
66 66 )
67 67
68 68 man1.__class__ = lfilesmanifest
69 69 return man1
70 70
71 71 extensions.wrapfunction(ctx, 'manifest', manifest)
72 72
73 73 def filectx(orig, path, fileid=None, filelog=None):
74 74 try:
75 75 if filelog is not None:
76 76 result = orig(path, fileid, filelog)
77 77 else:
78 78 result = orig(path, fileid)
79 79 except error.LookupError:
80 80 # Adding a null character will cause Mercurial to
81 81 # identify this as a binary file.
82 82 if filelog is not None:
83 83 result = orig(lfutil.standin(path), fileid, filelog)
84 84 else:
85 85 result = orig(lfutil.standin(path), fileid)
86 86 olddata = result.data
87 87 result.data = lambda: olddata() + b'\0'
88 88 return result
89 89
90 90 extensions.wrapfunction(ctx, 'filectx', filectx)
91 91
92 92 return ctx
93 93
94 94 # Figure out the status of big files and insert them into the
95 95 # appropriate list in the result. Also removes standin files
96 96 # from the listing. Revert to the original status if
97 97 # self.lfstatus is False.
98 98 # XXX large file status is buggy when used on repo proxy.
99 99 # XXX this needs to be investigated.
100 100 @localrepo.unfilteredmethod
101 101 def status(
102 102 self,
103 103 node1=b'.',
104 104 node2=None,
105 105 match=None,
106 106 ignored=False,
107 107 clean=False,
108 108 unknown=False,
109 109 listsubrepos=False,
110 110 ):
111 111 listignored, listclean, listunknown = ignored, clean, unknown
112 112 orig = super(lfilesrepo, self).status
113 113 if not self.lfstatus:
114 114 return orig(
115 115 node1,
116 116 node2,
117 117 match,
118 118 listignored,
119 119 listclean,
120 120 listunknown,
121 121 listsubrepos,
122 122 )
123 123
124 124 # some calls in this function rely on the old version of status
125 125 self.lfstatus = False
126 126 ctx1 = self[node1]
127 127 ctx2 = self[node2]
128 128 working = ctx2.rev() is None
129 129 parentworking = working and ctx1 == self[b'.']
130 130
131 131 if match is None:
132 132 match = matchmod.always()
133 133
134 134 try:
135 135 # updating the dirstate is optional
136 136 # so we don't wait on the lock
137 137 wlock = self.wlock(False)
138 138 gotlock = True
139 139 except error.LockError:
140 140 wlock = util.nullcontextmanager()
141 141 gotlock = False
142 142 with wlock, self.dirstate.running_status(self):
143 143
144 144 # First check if paths or patterns were specified on the
145 145 # command line. If there were, and they don't match any
146 146 # largefiles, we should just bail here and let super
147 147 # handle it -- thus gaining a big performance boost.
148 148 lfdirstate = lfutil.openlfdirstate(ui, self)
149 149 if not match.always():
150 150 for f in lfdirstate:
151 151 if match(f):
152 152 break
153 153 else:
154 154 return orig(
155 155 node1,
156 156 node2,
157 157 match,
158 158 listignored,
159 159 listclean,
160 160 listunknown,
161 161 listsubrepos,
162 162 )
163 163
164 164 # Create a copy of match that matches standins instead
165 165 # of largefiles.
166 166 def tostandins(files):
167 167 if not working:
168 168 return files
169 169 newfiles = []
170 170 dirstate = self.dirstate
171 171 for f in files:
172 172 sf = lfutil.standin(f)
173 173 if sf in dirstate:
174 174 newfiles.append(sf)
175 175 elif dirstate.hasdir(sf):
176 176 # Directory entries could be regular or
177 177 # standin, check both
178 178 newfiles.extend((f, sf))
179 179 else:
180 180 newfiles.append(f)
181 181 return newfiles
182 182
183 183 m = copy.copy(match)
184 184 m._files = tostandins(m._files)
185 185
186 186 result = orig(
187 187 node1, node2, m, ignored, clean, unknown, listsubrepos
188 188 )
189 189 if working:
190 190
191 191 def sfindirstate(f):
192 192 sf = lfutil.standin(f)
193 193 dirstate = self.dirstate
194 194 return sf in dirstate or dirstate.hasdir(sf)
195 195
196 196 match._files = [f for f in match._files if sfindirstate(f)]
197 197 # Don't waste time getting the ignored and unknown
198 198 # files from lfdirstate
199 199 unsure, s, mtime_boundary = lfdirstate.status(
200 200 match,
201 201 subrepos=[],
202 202 ignored=False,
203 203 clean=listclean,
204 204 unknown=False,
205 205 )
206 206 (modified, added, removed, deleted, clean) = (
207 207 s.modified,
208 208 s.added,
209 209 s.removed,
210 210 s.deleted,
211 211 s.clean,
212 212 )
213 213 if parentworking:
214 214 wctx = repo[None]
215 215 for lfile in unsure:
216 216 standin = lfutil.standin(lfile)
217 217 if standin not in ctx1:
218 218 # from second parent
219 219 modified.append(lfile)
220 220 elif lfutil.readasstandin(
221 221 ctx1[standin]
222 222 ) != lfutil.hashfile(self.wjoin(lfile)):
223 223 modified.append(lfile)
224 224 else:
225 225 if listclean:
226 226 clean.append(lfile)
227 227 s = wctx[lfile].lstat()
228 228 mode = s.st_mode
229 229 size = s.st_size
230 230 mtime = timestamp.reliable_mtime_of(
231 231 s, mtime_boundary
232 232 )
233 233 if mtime is not None:
234 234 cache_data = (mode, size, mtime)
235 235 lfdirstate.set_clean(lfile, cache_data)
236 236 else:
237 237 tocheck = unsure + modified + added + clean
238 238 modified, added, clean = [], [], []
239 239 checkexec = self.dirstate._checkexec
240 240
241 241 for lfile in tocheck:
242 242 standin = lfutil.standin(lfile)
243 243 if standin in ctx1:
244 244 abslfile = self.wjoin(lfile)
245 245 if (
246 246 lfutil.readasstandin(ctx1[standin])
247 247 != lfutil.hashfile(abslfile)
248 248 ) or (
249 249 checkexec
250 250 and (b'x' in ctx1.flags(standin))
251 251 != bool(lfutil.getexecutable(abslfile))
252 252 ):
253 253 modified.append(lfile)
254 254 elif listclean:
255 255 clean.append(lfile)
256 256 else:
257 257 added.append(lfile)
258 258
259 259 # at this point, 'removed' contains largefiles
260 260 # marked as 'R' in the working context.
261 261 # then, largefiles not managed also in the target
262 262 # context should be excluded from 'removed'.
263 263 removed = [
264 264 lfile
265 265 for lfile in removed
266 266 if lfutil.standin(lfile) in ctx1
267 267 ]
268 268
269 269 # Standins no longer found in lfdirstate have been deleted
270 270 for standin in ctx1.walk(lfutil.getstandinmatcher(self)):
271 271 lfile = lfutil.splitstandin(standin)
272 272 if not match(lfile):
273 273 continue
274 274 if lfile not in lfdirstate:
275 275 deleted.append(lfile)
276 276 # Sync "largefile has been removed" back to the
277 277 # standin. Removing a file as a side effect of
278 278 # running status is gross, but the alternatives (if
279 279 # any) are worse.
280 280 self.wvfs.unlinkpath(standin, ignoremissing=True)
281 281
282 282 # Filter result lists
283 283 result = list(result)
284 284
285 285 # Largefiles are not really removed when they're
286 286 # still in the normal dirstate. Likewise, normal
287 287 # files are not really removed if they are still in
288 288 # lfdirstate. This happens in merges where files
289 289 # change type.
290 290 removed = [f for f in removed if f not in self.dirstate]
291 291 result[2] = [f for f in result[2] if f not in lfdirstate]
292 292
293 293 lfiles = set(lfdirstate)
294 294 # Unknown files
295 295 result[4] = set(result[4]).difference(lfiles)
296 296 # Ignored files
297 297 result[5] = set(result[5]).difference(lfiles)
298 298 # combine normal files and largefiles
299 299 normals = [
300 300 [fn for fn in filelist if not lfutil.isstandin(fn)]
301 301 for filelist in result
302 302 ]
303 303 lfstatus = (
304 304 modified,
305 305 added,
306 306 removed,
307 307 deleted,
308 308 [],
309 309 [],
310 310 clean,
311 311 )
312 312 result = [
313 313 sorted(list1 + list2)
314 314 for (list1, list2) in zip(normals, lfstatus)
315 315 ]
316 316 else: # not against working directory
317 317 result = [
318 318 [lfutil.splitstandin(f) or f for f in items]
319 319 for items in result
320 320 ]
321 321
322 322 if gotlock:
323 323 lfdirstate.write(self.currenttransaction())
324 324 else:
325 325 lfdirstate.invalidate()
326 326
327 327 self.lfstatus = True
328 328 return scmutil.status(*result)
329 329
330 330 def commitctx(self, ctx, *args, **kwargs):
331 331 node = super(lfilesrepo, self).commitctx(ctx, *args, **kwargs)
332 332
333 333 class lfilesctx(ctx.__class__):
334 334 def markcommitted(self, node):
335 335 orig = super(lfilesctx, self).markcommitted
336 336 return lfutil.markcommitted(orig, self, node)
337 337
338 338 ctx.__class__ = lfilesctx
339 339 return node
340 340
341 341 # Before commit, largefile standins have not had their
342 342 # contents updated to reflect the hash of their largefile.
343 343 # Do that here.
344 344 def commit(
345 345 self,
346 346 text=b"",
347 347 user=None,
348 348 date=None,
349 349 match=None,
350 350 force=False,
351 351 editor=False,
352 352 extra=None,
353 353 ):
354 354 if extra is None:
355 355 extra = {}
356 356 orig = super(lfilesrepo, self).commit
357 357
358 358 with self.wlock():
359 359 lfcommithook = self._lfcommithooks[-1]
360 360 match = lfcommithook(self, match)
361 361 result = orig(
362 362 text=text,
363 363 user=user,
364 364 date=date,
365 365 match=match,
366 366 force=force,
367 367 editor=editor,
368 368 extra=extra,
369 369 )
370 370 return result
371 371
372 372 # TODO: _subdirlfs should be moved into "lfutil.py", because
373 373 # it is referred only from "lfutil.updatestandinsbymatch"
374 374 def _subdirlfs(self, files, lfiles):
375 375 """
376 376 Adjust matched file list
377 377 If we pass a directory to commit whose only committable files
378 378 are largefiles, the core commit code aborts before finding
379 379 the largefiles.
380 380 So we do the following:
381 381 For directories that only have largefiles as matches,
382 382 we explicitly add the largefiles to the match list and remove
383 383 the directory.
384 384 In other cases, we leave the match list unmodified.
385 385 """
386 386 actualfiles = []
387 387 dirs = []
388 388 regulars = []
389 389
390 390 for f in files:
391 391 if lfutil.isstandin(f + b'/'):
392 392 raise error.Abort(
393 393 _(b'file "%s" is a largefile standin') % f,
394 394 hint=b'commit the largefile itself instead',
395 395 )
396 396 # Scan directories
397 397 if self.wvfs.isdir(f):
398 398 dirs.append(f)
399 399 else:
400 400 regulars.append(f)
401 401
402 402 for f in dirs:
403 403 matcheddir = False
404 404 d = self.dirstate.normalize(f) + b'/'
405 405 # Check for matched normal files
406 406 for mf in regulars:
407 407 if self.dirstate.normalize(mf).startswith(d):
408 408 actualfiles.append(f)
409 409 matcheddir = True
410 410 break
411 411 if not matcheddir:
412 412 # If no normal match, manually append
413 413 # any matching largefiles
414 414 for lf in lfiles:
415 415 if self.dirstate.normalize(lf).startswith(d):
416 416 actualfiles.append(lf)
417 417 if not matcheddir:
418 418 # There may still be normal files in the dir, so
419 419 # add a directory to the list, which
420 420 # forces status/dirstate to walk all files and
421 421 # call the match function on the matcher, even
422 422 # on case sensitive filesystems.
423 423 actualfiles.append(b'.')
424 424 matcheddir = True
425 425 # Nothing in dir, so readd it
426 426 # and let commit reject it
427 427 if not matcheddir:
428 428 actualfiles.append(f)
429 429
430 430 # Always add normal files
431 431 actualfiles += regulars
432 432 return actualfiles
433 433
434 434 repo.__class__ = lfilesrepo
435 435
436 436 # stack of hooks being executed before committing.
437 437 # only last element ("_lfcommithooks[-1]") is used for each committing.
438 438 repo._lfcommithooks = [lfutil.updatestandinsbymatch]
439 439
440 440 # Stack of status writer functions taking "*msg, **opts" arguments
441 441 # like "ui.status()". Only last element ("_lfstatuswriters[-1]")
442 442 # is used to write status out.
443 443 repo._lfstatuswriters = [ui.status]
444 444
445 445 def prepushoutgoinghook(pushop):
446 446 """Push largefiles for pushop before pushing revisions."""
447 447 lfrevs = pushop.lfrevs
448 448 if lfrevs is None:
449 449 lfrevs = pushop.outgoing.missing
450 450 if lfrevs:
451 451 toupload = set()
452 452 addfunc = lambda fn, lfhash: toupload.add(lfhash)
453 453 lfutil.getlfilestoupload(pushop.repo, lfrevs, addfunc)
454 454 lfcommands.uploadlfiles(ui, pushop.repo, pushop.remote, toupload)
455 455
456 456 repo.prepushoutgoinghooks.add(b"largefiles", prepushoutgoinghook)
457 457
458 458 def checkrequireslfiles(ui, repo, **kwargs):
459 459 with repo.lock():
460 460 if b'largefiles' in repo.requirements:
461 461 return
462 462 marker = lfutil.shortnameslash
463 for entry in repo.store.datafiles():
463 for entry in repo.store.data_entries():
464 464 # XXX note that this match is not rooted and can wrongly match
465 465 # directory ending with ".hglf"
466 466 if entry.is_revlog and marker in entry.target_id:
467 467 repo.requirements.add(b'largefiles')
468 468 scmutil.writereporequirements(repo)
469 469 break
470 470
471 471 ui.setconfig(
472 472 b'hooks', b'changegroup.lfiles', checkrequireslfiles, b'largefiles'
473 473 )
474 474 ui.setconfig(b'hooks', b'commit.lfiles', checkrequireslfiles, b'largefiles')
@@ -1,698 +1,698
1 1 # narrowcommands.py - command modifications for narrowhg extension
2 2 #
3 3 # Copyright 2017 Google, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import itertools
9 9 import os
10 10
11 11 from mercurial.i18n import _
12 12 from mercurial.node import (
13 13 hex,
14 14 short,
15 15 )
16 16 from mercurial import (
17 17 bundle2,
18 18 cmdutil,
19 19 commands,
20 20 discovery,
21 21 encoding,
22 22 error,
23 23 exchange,
24 24 extensions,
25 25 hg,
26 26 narrowspec,
27 27 pathutil,
28 28 pycompat,
29 29 registrar,
30 30 repair,
31 31 repoview,
32 32 requirements,
33 33 sparse,
34 34 util,
35 35 wireprototypes,
36 36 )
37 37 from mercurial.utils import (
38 38 urlutil,
39 39 )
40 40
41 41 table = {}
42 42 command = registrar.command(table)
43 43
44 44
45 45 def setup():
46 46 """Wraps user-facing mercurial commands with narrow-aware versions."""
47 47
48 48 entry = extensions.wrapcommand(commands.table, b'clone', clonenarrowcmd)
49 49 entry[1].append(
50 50 (b'', b'narrow', None, _(b"create a narrow clone of select files"))
51 51 )
52 52 entry[1].append(
53 53 (
54 54 b'',
55 55 b'depth',
56 56 b'',
57 57 _(b"limit the history fetched by distance from heads"),
58 58 )
59 59 )
60 60 entry[1].append((b'', b'narrowspec', b'', _(b"read narrowspecs from file")))
61 61 # TODO(durin42): unify sparse/narrow --include/--exclude logic a bit
62 62 if b'sparse' not in extensions.enabled():
63 63 entry[1].append(
64 64 (b'', b'include', [], _(b"specifically fetch this file/directory"))
65 65 )
66 66 entry[1].append(
67 67 (
68 68 b'',
69 69 b'exclude',
70 70 [],
71 71 _(b"do not fetch this file/directory, even if included"),
72 72 )
73 73 )
74 74
75 75 entry = extensions.wrapcommand(commands.table, b'pull', pullnarrowcmd)
76 76 entry[1].append(
77 77 (
78 78 b'',
79 79 b'depth',
80 80 b'',
81 81 _(b"limit the history fetched by distance from heads"),
82 82 )
83 83 )
84 84
85 85 extensions.wrapcommand(commands.table, b'archive', archivenarrowcmd)
86 86
87 87
88 88 def clonenarrowcmd(orig, ui, repo, *args, **opts):
89 89 """Wraps clone command, so 'hg clone' first wraps localrepo.clone()."""
90 90 opts = pycompat.byteskwargs(opts)
91 91 wrappedextraprepare = util.nullcontextmanager()
92 92 narrowspecfile = opts[b'narrowspec']
93 93
94 94 if narrowspecfile:
95 95 filepath = os.path.join(encoding.getcwd(), narrowspecfile)
96 96 ui.status(_(b"reading narrowspec from '%s'\n") % filepath)
97 97 try:
98 98 fdata = util.readfile(filepath)
99 99 except IOError as inst:
100 100 raise error.Abort(
101 101 _(b"cannot read narrowspecs from '%s': %s")
102 102 % (filepath, encoding.strtolocal(inst.strerror))
103 103 )
104 104
105 105 includes, excludes, profiles = sparse.parseconfig(ui, fdata, b'narrow')
106 106 if profiles:
107 107 raise error.ConfigError(
108 108 _(
109 109 b"cannot specify other files using '%include' in"
110 110 b" narrowspec"
111 111 )
112 112 )
113 113
114 114 narrowspec.validatepatterns(includes)
115 115 narrowspec.validatepatterns(excludes)
116 116
117 117 # narrowspec is passed so we should assume that user wants narrow clone
118 118 opts[b'narrow'] = True
119 119 opts[b'include'].extend(includes)
120 120 opts[b'exclude'].extend(excludes)
121 121
122 122 if opts[b'narrow']:
123 123
124 124 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
125 125 orig(pullop, kwargs)
126 126
127 127 if opts.get(b'depth'):
128 128 kwargs[b'depth'] = opts[b'depth']
129 129
130 130 wrappedextraprepare = extensions.wrappedfunction(
131 131 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
132 132 )
133 133
134 134 with wrappedextraprepare:
135 135 return orig(ui, repo, *args, **pycompat.strkwargs(opts))
136 136
137 137
138 138 def pullnarrowcmd(orig, ui, repo, *args, **opts):
139 139 """Wraps pull command to allow modifying narrow spec."""
140 140 wrappedextraprepare = util.nullcontextmanager()
141 141 if requirements.NARROW_REQUIREMENT in repo.requirements:
142 142
143 143 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
144 144 orig(pullop, kwargs)
145 145 if opts.get('depth'):
146 146 kwargs[b'depth'] = opts['depth']
147 147
148 148 wrappedextraprepare = extensions.wrappedfunction(
149 149 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
150 150 )
151 151
152 152 with wrappedextraprepare:
153 153 return orig(ui, repo, *args, **opts)
154 154
155 155
156 156 def archivenarrowcmd(orig, ui, repo, *args, **opts):
157 157 """Wraps archive command to narrow the default includes."""
158 158 if requirements.NARROW_REQUIREMENT in repo.requirements:
159 159 repo_includes, repo_excludes = repo.narrowpats
160 160 includes = set(opts.get('include', []))
161 161 excludes = set(opts.get('exclude', []))
162 162 includes, excludes, unused_invalid = narrowspec.restrictpatterns(
163 163 includes, excludes, repo_includes, repo_excludes
164 164 )
165 165 if includes:
166 166 opts['include'] = includes
167 167 if excludes:
168 168 opts['exclude'] = excludes
169 169 return orig(ui, repo, *args, **opts)
170 170
171 171
172 172 def pullbundle2extraprepare(orig, pullop, kwargs):
173 173 repo = pullop.repo
174 174 if requirements.NARROW_REQUIREMENT not in repo.requirements:
175 175 return orig(pullop, kwargs)
176 176
177 177 if wireprototypes.NARROWCAP not in pullop.remote.capabilities():
178 178 raise error.Abort(_(b"server does not support narrow clones"))
179 179 orig(pullop, kwargs)
180 180 kwargs[b'narrow'] = True
181 181 include, exclude = repo.narrowpats
182 182 kwargs[b'oldincludepats'] = include
183 183 kwargs[b'oldexcludepats'] = exclude
184 184 if include:
185 185 kwargs[b'includepats'] = include
186 186 if exclude:
187 187 kwargs[b'excludepats'] = exclude
188 188 # calculate known nodes only in ellipses cases because in non-ellipses cases
189 189 # we have all the nodes
190 190 if wireprototypes.ELLIPSESCAP1 in pullop.remote.capabilities():
191 191 kwargs[b'known'] = [
192 192 hex(ctx.node())
193 193 for ctx in repo.set(b'::%ln', pullop.common)
194 194 if ctx.node() != repo.nullid
195 195 ]
196 196 if not kwargs[b'known']:
197 197 # Mercurial serializes an empty list as '' and deserializes it as
198 198 # [''], so delete it instead to avoid handling the empty string on
199 199 # the server.
200 200 del kwargs[b'known']
201 201
202 202
203 203 extensions.wrapfunction(
204 204 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare
205 205 )
206 206
207 207
208 208 def _narrow(
209 209 ui,
210 210 repo,
211 211 remote,
212 212 commoninc,
213 213 oldincludes,
214 214 oldexcludes,
215 215 newincludes,
216 216 newexcludes,
217 217 force,
218 218 backup,
219 219 ):
220 220 oldmatch = narrowspec.match(repo.root, oldincludes, oldexcludes)
221 221 newmatch = narrowspec.match(repo.root, newincludes, newexcludes)
222 222
223 223 # This is essentially doing "hg outgoing" to find all local-only
224 224 # commits. We will then check that the local-only commits don't
225 225 # have any changes to files that will be untracked.
226 226 unfi = repo.unfiltered()
227 227 outgoing = discovery.findcommonoutgoing(unfi, remote, commoninc=commoninc)
228 228 ui.status(_(b'looking for local changes to affected paths\n'))
229 229 progress = ui.makeprogress(
230 230 topic=_(b'changesets'),
231 231 unit=_(b'changesets'),
232 232 total=len(outgoing.missing) + len(outgoing.excluded),
233 233 )
234 234 localnodes = []
235 235 with progress:
236 236 for n in itertools.chain(outgoing.missing, outgoing.excluded):
237 237 progress.increment()
238 238 if any(oldmatch(f) and not newmatch(f) for f in unfi[n].files()):
239 239 localnodes.append(n)
240 240 revstostrip = unfi.revs(b'descendants(%ln)', localnodes)
241 241 hiddenrevs = repoview.filterrevs(repo, b'visible')
242 242 visibletostrip = list(
243 243 repo.changelog.node(r) for r in (revstostrip - hiddenrevs)
244 244 )
245 245 if visibletostrip:
246 246 ui.status(
247 247 _(
248 248 b'The following changeset(s) or their ancestors have '
249 249 b'local changes not on the remote:\n'
250 250 )
251 251 )
252 252 maxnodes = 10
253 253 if ui.verbose or len(visibletostrip) <= maxnodes:
254 254 for n in visibletostrip:
255 255 ui.status(b'%s\n' % short(n))
256 256 else:
257 257 for n in visibletostrip[:maxnodes]:
258 258 ui.status(b'%s\n' % short(n))
259 259 ui.status(
260 260 _(b'...and %d more, use --verbose to list all\n')
261 261 % (len(visibletostrip) - maxnodes)
262 262 )
263 263 if not force:
264 264 raise error.StateError(
265 265 _(b'local changes found'),
266 266 hint=_(b'use --force-delete-local-changes to ignore'),
267 267 )
268 268
269 269 with ui.uninterruptible():
270 270 if revstostrip:
271 271 tostrip = [unfi.changelog.node(r) for r in revstostrip]
272 272 if repo[b'.'].node() in tostrip:
273 273 # stripping working copy, so move to a different commit first
274 274 urev = max(
275 275 repo.revs(
276 276 b'(::%n) - %ln + null',
277 277 repo[b'.'].node(),
278 278 visibletostrip,
279 279 )
280 280 )
281 281 hg.clean(repo, urev)
282 282 overrides = {(b'devel', b'strip-obsmarkers'): False}
283 283 if backup:
284 284 ui.status(_(b'moving unwanted changesets to backup\n'))
285 285 else:
286 286 ui.status(_(b'deleting unwanted changesets\n'))
287 287 with ui.configoverride(overrides, b'narrow'):
288 288 repair.strip(ui, unfi, tostrip, topic=b'narrow', backup=backup)
289 289
290 290 todelete = []
291 for entry in repo.store.datafiles():
291 for entry in repo.store.data_entries():
292 292 if not entry.is_revlog:
293 293 continue
294 294 if entry.is_filelog:
295 295 if not newmatch(entry.target_id):
296 296 for file_ in entry.files():
297 297 todelete.append(file_.unencoded_path)
298 298 elif entry.is_manifestlog:
299 299 dir = entry.target_id
300 300 dirs = sorted(pathutil.dirs({dir})) + [dir]
301 301 include = True
302 302 for d in dirs:
303 303 visit = newmatch.visitdir(d)
304 304 if not visit:
305 305 include = False
306 306 break
307 307 if visit == b'all':
308 308 break
309 309 if not include:
310 310 for file_ in entry.files():
311 311 todelete.append(file_.unencoded_path)
312 312
313 313 repo.destroying()
314 314
315 315 with repo.transaction(b'narrowing'):
316 316 # Update narrowspec before removing revlogs, so repo won't be
317 317 # corrupt in case of crash
318 318 repo.setnarrowpats(newincludes, newexcludes)
319 319
320 320 for f in todelete:
321 321 ui.status(_(b'deleting %s\n') % f)
322 322 util.unlinkpath(repo.svfs.join(f))
323 323 repo.store.markremoved(f)
324 324
325 325 ui.status(_(b'deleting unwanted files from working copy\n'))
326 326 with repo.dirstate.changing_parents(repo):
327 327 narrowspec.updateworkingcopy(repo, assumeclean=True)
328 328 narrowspec.copytoworkingcopy(repo)
329 329
330 330 repo.destroyed()
331 331
332 332
333 333 def _widen(
334 334 ui,
335 335 repo,
336 336 remote,
337 337 commoninc,
338 338 oldincludes,
339 339 oldexcludes,
340 340 newincludes,
341 341 newexcludes,
342 342 ):
343 343 # for now we assume that if a server has ellipses enabled, we will be
344 344 # exchanging ellipses nodes. In future we should add ellipses as a client
345 345 # side requirement (maybe) to distinguish a client is shallow or not and
346 346 # then send that information to server whether we want ellipses or not.
347 347 # Theoretically a non-ellipses repo should be able to use narrow
348 348 # functionality from an ellipses enabled server
349 349 remotecap = remote.capabilities()
350 350 ellipsesremote = any(
351 351 cap in remotecap for cap in wireprototypes.SUPPORTED_ELLIPSESCAP
352 352 )
353 353
354 354 # check whether we are talking to a server which supports old version of
355 355 # ellipses capabilities
356 356 isoldellipses = (
357 357 ellipsesremote
358 358 and wireprototypes.ELLIPSESCAP1 in remotecap
359 359 and wireprototypes.ELLIPSESCAP not in remotecap
360 360 )
361 361
362 362 def pullbundle2extraprepare_widen(orig, pullop, kwargs):
363 363 orig(pullop, kwargs)
364 364 # The old{in,ex}cludepats have already been set by orig()
365 365 kwargs[b'includepats'] = newincludes
366 366 kwargs[b'excludepats'] = newexcludes
367 367
368 368 wrappedextraprepare = extensions.wrappedfunction(
369 369 exchange, b'_pullbundle2extraprepare', pullbundle2extraprepare_widen
370 370 )
371 371
372 372 # define a function that narrowbundle2 can call after creating the
373 373 # backup bundle, but before applying the bundle from the server
374 374 def setnewnarrowpats():
375 375 repo.setnarrowpats(newincludes, newexcludes)
376 376
377 377 repo.setnewnarrowpats = setnewnarrowpats
378 378 # silence the devel-warning of applying an empty changegroup
379 379 overrides = {(b'devel', b'all-warnings'): False}
380 380
381 381 common = commoninc[0]
382 382 with ui.uninterruptible():
383 383 if ellipsesremote:
384 384 ds = repo.dirstate
385 385 p1, p2 = ds.p1(), ds.p2()
386 386 with ds.changing_parents(repo):
387 387 ds.setparents(repo.nullid, repo.nullid)
388 388 if isoldellipses:
389 389 with wrappedextraprepare:
390 390 exchange.pull(repo, remote, heads=common)
391 391 else:
392 392 known = []
393 393 if ellipsesremote:
394 394 known = [
395 395 ctx.node()
396 396 for ctx in repo.set(b'::%ln', common)
397 397 if ctx.node() != repo.nullid
398 398 ]
399 399 with remote.commandexecutor() as e:
400 400 bundle = e.callcommand(
401 401 b'narrow_widen',
402 402 {
403 403 b'oldincludes': oldincludes,
404 404 b'oldexcludes': oldexcludes,
405 405 b'newincludes': newincludes,
406 406 b'newexcludes': newexcludes,
407 407 b'cgversion': b'03',
408 408 b'commonheads': common,
409 409 b'known': known,
410 410 b'ellipses': ellipsesremote,
411 411 },
412 412 ).result()
413 413
414 414 trmanager = exchange.transactionmanager(
415 415 repo, b'widen', remote.url()
416 416 )
417 417 with trmanager, repo.ui.configoverride(overrides, b'widen'):
418 418 op = bundle2.bundleoperation(
419 419 repo, trmanager.transaction, source=b'widen'
420 420 )
421 421 # TODO: we should catch error.Abort here
422 422 bundle2.processbundle(repo, bundle, op=op, remote=remote)
423 423
424 424 if ellipsesremote:
425 425 with ds.changing_parents(repo):
426 426 ds.setparents(p1, p2)
427 427
428 428 with repo.transaction(b'widening'), repo.dirstate.changing_parents(
429 429 repo
430 430 ):
431 431 repo.setnewnarrowpats()
432 432 narrowspec.updateworkingcopy(repo)
433 433 narrowspec.copytoworkingcopy(repo)
434 434
435 435
436 436 # TODO(rdamazio): Make new matcher format and update description
437 437 @command(
438 438 b'tracked',
439 439 [
440 440 (b'', b'addinclude', [], _(b'new paths to include')),
441 441 (b'', b'removeinclude', [], _(b'old paths to no longer include')),
442 442 (
443 443 b'',
444 444 b'auto-remove-includes',
445 445 False,
446 446 _(b'automatically choose unused includes to remove'),
447 447 ),
448 448 (b'', b'addexclude', [], _(b'new paths to exclude')),
449 449 (b'', b'import-rules', b'', _(b'import narrowspecs from a file')),
450 450 (b'', b'removeexclude', [], _(b'old paths to no longer exclude')),
451 451 (
452 452 b'',
453 453 b'clear',
454 454 False,
455 455 _(b'whether to replace the existing narrowspec'),
456 456 ),
457 457 (
458 458 b'',
459 459 b'force-delete-local-changes',
460 460 False,
461 461 _(b'forces deletion of local changes when narrowing'),
462 462 ),
463 463 (
464 464 b'',
465 465 b'backup',
466 466 True,
467 467 _(b'back up local changes when narrowing'),
468 468 ),
469 469 (
470 470 b'',
471 471 b'update-working-copy',
472 472 False,
473 473 _(b'update working copy when the store has changed'),
474 474 ),
475 475 ]
476 476 + commands.remoteopts,
477 477 _(b'[OPTIONS]... [REMOTE]'),
478 478 inferrepo=True,
479 479 helpcategory=command.CATEGORY_MAINTENANCE,
480 480 )
481 481 def trackedcmd(ui, repo, remotepath=None, *pats, **opts):
482 482 """show or change the current narrowspec
483 483
484 484 With no argument, shows the current narrowspec entries, one per line. Each
485 485 line will be prefixed with 'I' or 'X' for included or excluded patterns,
486 486 respectively.
487 487
488 488 The narrowspec is comprised of expressions to match remote files and/or
489 489 directories that should be pulled into your client.
490 490 The narrowspec has *include* and *exclude* expressions, with excludes always
491 491 trumping includes: that is, if a file matches an exclude expression, it will
492 492 be excluded even if it also matches an include expression.
493 493 Excluding files that were never included has no effect.
494 494
495 495 Each included or excluded entry is in the format described by
496 496 'hg help patterns'.
497 497
498 498 The options allow you to add or remove included and excluded expressions.
499 499
500 500 If --clear is specified, then all previous includes and excludes are DROPPED
501 501 and replaced by the new ones specified to --addinclude and --addexclude.
502 502 If --clear is specified without any further options, the narrowspec will be
503 503 empty and will not match any files.
504 504
505 505 If --auto-remove-includes is specified, then those includes that don't match
506 506 any files modified by currently visible local commits (those not shared by
507 507 the remote) will be added to the set of explicitly specified includes to
508 508 remove.
509 509
510 510 --import-rules accepts a path to a file containing rules, allowing you to
511 511 add --addinclude, --addexclude rules in bulk. Like the other include and
512 512 exclude switches, the changes are applied immediately.
513 513 """
514 514 opts = pycompat.byteskwargs(opts)
515 515 if requirements.NARROW_REQUIREMENT not in repo.requirements:
516 516 raise error.InputError(
517 517 _(
518 518 b'the tracked command is only supported on '
519 519 b'repositories cloned with --narrow'
520 520 )
521 521 )
522 522
523 523 # Before supporting, decide whether it "hg tracked --clear" should mean
524 524 # tracking no paths or all paths.
525 525 if opts[b'clear']:
526 526 raise error.InputError(_(b'the --clear option is not yet supported'))
527 527
528 528 # import rules from a file
529 529 newrules = opts.get(b'import_rules')
530 530 if newrules:
531 531 try:
532 532 filepath = os.path.join(encoding.getcwd(), newrules)
533 533 fdata = util.readfile(filepath)
534 534 except IOError as inst:
535 535 raise error.StorageError(
536 536 _(b"cannot read narrowspecs from '%s': %s")
537 537 % (filepath, encoding.strtolocal(inst.strerror))
538 538 )
539 539 includepats, excludepats, profiles = sparse.parseconfig(
540 540 ui, fdata, b'narrow'
541 541 )
542 542 if profiles:
543 543 raise error.InputError(
544 544 _(
545 545 b"including other spec files using '%include' "
546 546 b"is not supported in narrowspec"
547 547 )
548 548 )
549 549 opts[b'addinclude'].extend(includepats)
550 550 opts[b'addexclude'].extend(excludepats)
551 551
552 552 addedincludes = narrowspec.parsepatterns(opts[b'addinclude'])
553 553 removedincludes = narrowspec.parsepatterns(opts[b'removeinclude'])
554 554 addedexcludes = narrowspec.parsepatterns(opts[b'addexclude'])
555 555 removedexcludes = narrowspec.parsepatterns(opts[b'removeexclude'])
556 556 autoremoveincludes = opts[b'auto_remove_includes']
557 557
558 558 update_working_copy = opts[b'update_working_copy']
559 559 only_show = not (
560 560 addedincludes
561 561 or removedincludes
562 562 or addedexcludes
563 563 or removedexcludes
564 564 or newrules
565 565 or autoremoveincludes
566 566 or update_working_copy
567 567 )
568 568
569 569 # Only print the current narrowspec.
570 570 if only_show:
571 571 oldincludes, oldexcludes = repo.narrowpats
572 572 ui.pager(b'tracked')
573 573 fm = ui.formatter(b'narrow', opts)
574 574 for i in sorted(oldincludes):
575 575 fm.startitem()
576 576 fm.write(b'status', b'%s ', b'I', label=b'narrow.included')
577 577 fm.write(b'pat', b'%s\n', i, label=b'narrow.included')
578 578 for i in sorted(oldexcludes):
579 579 fm.startitem()
580 580 fm.write(b'status', b'%s ', b'X', label=b'narrow.excluded')
581 581 fm.write(b'pat', b'%s\n', i, label=b'narrow.excluded')
582 582 fm.end()
583 583 return 0
584 584
585 585 with repo.wlock(), repo.lock():
586 586 oldincludes, oldexcludes = repo.narrowpats
587 587
588 588 # filter the user passed additions and deletions into actual additions and
589 589 # deletions of excludes and includes
590 590 addedincludes -= oldincludes
591 591 removedincludes &= oldincludes
592 592 addedexcludes -= oldexcludes
593 593 removedexcludes &= oldexcludes
594 594
595 595 widening = addedincludes or removedexcludes
596 596 narrowing = removedincludes or addedexcludes
597 597
598 598 if update_working_copy:
599 599 with repo.transaction(b'narrow-wc'), repo.dirstate.changing_parents(
600 600 repo
601 601 ):
602 602 narrowspec.updateworkingcopy(repo)
603 603 narrowspec.copytoworkingcopy(repo)
604 604 return 0
605 605
606 606 if not (widening or narrowing or autoremoveincludes):
607 607 ui.status(_(b"nothing to widen or narrow\n"))
608 608 return 0
609 609
610 610 cmdutil.bailifchanged(repo)
611 611
612 612 # Find the revisions we have in common with the remote. These will
613 613 # be used for finding local-only changes for narrowing. They will
614 614 # also define the set of revisions to update for widening.
615 615 path = urlutil.get_unique_pull_path_obj(b'tracked', ui, remotepath)
616 616 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(path.loc))
617 617 remote = hg.peer(repo, opts, path)
618 618
619 619 try:
620 620 # check narrow support before doing anything if widening needs to be
621 621 # performed. In future we should also abort if client is ellipses and
622 622 # server does not support ellipses
623 623 if (
624 624 widening
625 625 and wireprototypes.NARROWCAP not in remote.capabilities()
626 626 ):
627 627 raise error.Abort(_(b"server does not support narrow clones"))
628 628
629 629 commoninc = discovery.findcommonincoming(repo, remote)
630 630
631 631 if autoremoveincludes:
632 632 outgoing = discovery.findcommonoutgoing(
633 633 repo, remote, commoninc=commoninc
634 634 )
635 635 ui.status(_(b'looking for unused includes to remove\n'))
636 636 localfiles = set()
637 637 for n in itertools.chain(outgoing.missing, outgoing.excluded):
638 638 localfiles.update(repo[n].files())
639 639 suggestedremovals = []
640 640 for include in sorted(oldincludes):
641 641 match = narrowspec.match(repo.root, [include], oldexcludes)
642 642 if not any(match(f) for f in localfiles):
643 643 suggestedremovals.append(include)
644 644 if suggestedremovals:
645 645 for s in suggestedremovals:
646 646 ui.status(b'%s\n' % s)
647 647 if (
648 648 ui.promptchoice(
649 649 _(
650 650 b'remove these unused includes (yn)?'
651 651 b'$$ &Yes $$ &No'
652 652 )
653 653 )
654 654 == 0
655 655 ):
656 656 removedincludes.update(suggestedremovals)
657 657 narrowing = True
658 658 else:
659 659 ui.status(_(b'found no unused includes\n'))
660 660
661 661 if narrowing:
662 662 newincludes = oldincludes - removedincludes
663 663 newexcludes = oldexcludes | addedexcludes
664 664 _narrow(
665 665 ui,
666 666 repo,
667 667 remote,
668 668 commoninc,
669 669 oldincludes,
670 670 oldexcludes,
671 671 newincludes,
672 672 newexcludes,
673 673 opts[b'force_delete_local_changes'],
674 674 opts[b'backup'],
675 675 )
676 676 # _narrow() updated the narrowspec and _widen() below needs to
677 677 # use the updated values as its base (otherwise removed includes
678 678 # and addedexcludes will be lost in the resulting narrowspec)
679 679 oldincludes = newincludes
680 680 oldexcludes = newexcludes
681 681
682 682 if widening:
683 683 newincludes = oldincludes | addedincludes
684 684 newexcludes = oldexcludes - removedexcludes
685 685 _widen(
686 686 ui,
687 687 repo,
688 688 remote,
689 689 commoninc,
690 690 oldincludes,
691 691 oldexcludes,
692 692 newincludes,
693 693 newexcludes,
694 694 )
695 695 finally:
696 696 remote.close()
697 697
698 698 return 0
@@ -1,396 +1,396
1 1 import threading
2 2
3 3 from mercurial.node import (
4 4 hex,
5 5 sha1nodeconstants,
6 6 )
7 7 from mercurial.pycompat import getattr
8 8 from mercurial import (
9 9 mdiff,
10 10 revlog,
11 11 )
12 12 from . import (
13 13 basestore,
14 14 constants,
15 15 shallowutil,
16 16 )
17 17
18 18
19 19 class ChainIndicies:
20 20 """A static class for easy reference to the delta chain indicies."""
21 21
22 22 # The filename of this revision delta
23 23 NAME = 0
24 24 # The mercurial file node for this revision delta
25 25 NODE = 1
26 26 # The filename of the delta base's revision. This is useful when delta
27 27 # between different files (like in the case of a move or copy, we can delta
28 28 # against the original file content).
29 29 BASENAME = 2
30 30 # The mercurial file node for the delta base revision. This is the nullid if
31 31 # this delta is a full text.
32 32 BASENODE = 3
33 33 # The actual delta or full text data.
34 34 DATA = 4
35 35
36 36
37 37 class unioncontentstore(basestore.baseunionstore):
38 38 def __init__(self, *args, **kwargs):
39 39 super(unioncontentstore, self).__init__(*args, **kwargs)
40 40
41 41 self.stores = args
42 42 self.writestore = kwargs.get('writestore')
43 43
44 44 # If allowincomplete==True then the union store can return partial
45 45 # delta chains, otherwise it will throw a KeyError if a full
46 46 # deltachain can't be found.
47 47 self.allowincomplete = kwargs.get('allowincomplete', False)
48 48
49 49 def get(self, name, node):
50 50 """Fetches the full text revision contents of the given name+node pair.
51 51 If the full text doesn't exist, throws a KeyError.
52 52
53 53 Under the hood, this uses getdeltachain() across all the stores to build
54 54 up a full chain to produce the full text.
55 55 """
56 56 chain = self.getdeltachain(name, node)
57 57
58 58 if chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
59 59 # If we didn't receive a full chain, throw
60 60 raise KeyError((name, hex(node)))
61 61
62 62 # The last entry in the chain is a full text, so we start our delta
63 63 # applies with that.
64 64 fulltext = chain.pop()[ChainIndicies.DATA]
65 65
66 66 text = fulltext
67 67 while chain:
68 68 delta = chain.pop()[ChainIndicies.DATA]
69 69 text = mdiff.patches(text, [delta])
70 70
71 71 return text
72 72
73 73 @basestore.baseunionstore.retriable
74 74 def getdelta(self, name, node):
75 75 """Return the single delta entry for the given name/node pair."""
76 76 for store in self.stores:
77 77 try:
78 78 return store.getdelta(name, node)
79 79 except KeyError:
80 80 pass
81 81
82 82 raise KeyError((name, hex(node)))
83 83
84 84 def getdeltachain(self, name, node):
85 85 """Returns the deltachain for the given name/node pair.
86 86
87 87 Returns an ordered list of:
88 88
89 89 [(name, node, deltabasename, deltabasenode, deltacontent),...]
90 90
91 91 where the chain is terminated by a full text entry with a nullid
92 92 deltabasenode.
93 93 """
94 94 chain = self._getpartialchain(name, node)
95 95 while chain[-1][ChainIndicies.BASENODE] != sha1nodeconstants.nullid:
96 96 x, x, deltabasename, deltabasenode, x = chain[-1]
97 97 try:
98 98 morechain = self._getpartialchain(deltabasename, deltabasenode)
99 99 chain.extend(morechain)
100 100 except KeyError:
101 101 # If we allow incomplete chains, don't throw.
102 102 if not self.allowincomplete:
103 103 raise
104 104 break
105 105
106 106 return chain
107 107
108 108 @basestore.baseunionstore.retriable
109 109 def getmeta(self, name, node):
110 110 """Returns the metadata dict for given node."""
111 111 for store in self.stores:
112 112 try:
113 113 return store.getmeta(name, node)
114 114 except KeyError:
115 115 pass
116 116 raise KeyError((name, hex(node)))
117 117
118 118 def getmetrics(self):
119 119 metrics = [s.getmetrics() for s in self.stores]
120 120 return shallowutil.sumdicts(*metrics)
121 121
122 122 @basestore.baseunionstore.retriable
123 123 def _getpartialchain(self, name, node):
124 124 """Returns a partial delta chain for the given name/node pair.
125 125
126 126 A partial chain is a chain that may not be terminated in a full-text.
127 127 """
128 128 for store in self.stores:
129 129 try:
130 130 return store.getdeltachain(name, node)
131 131 except KeyError:
132 132 pass
133 133
134 134 raise KeyError((name, hex(node)))
135 135
136 136 def add(self, name, node, data):
137 137 raise RuntimeError(
138 138 b"cannot add content only to remotefilelog contentstore"
139 139 )
140 140
141 141 def getmissing(self, keys):
142 142 missing = keys
143 143 for store in self.stores:
144 144 if missing:
145 145 missing = store.getmissing(missing)
146 146 return missing
147 147
148 148 def addremotefilelognode(self, name, node, data):
149 149 if self.writestore:
150 150 self.writestore.addremotefilelognode(name, node, data)
151 151 else:
152 152 raise RuntimeError(b"no writable store configured")
153 153
154 154 def markledger(self, ledger, options=None):
155 155 for store in self.stores:
156 156 store.markledger(ledger, options)
157 157
158 158
159 159 class remotefilelogcontentstore(basestore.basestore):
160 160 def __init__(self, *args, **kwargs):
161 161 super(remotefilelogcontentstore, self).__init__(*args, **kwargs)
162 162 self._threaddata = threading.local()
163 163
164 164 def get(self, name, node):
165 165 # return raw revision text
166 166 data = self._getdata(name, node)
167 167
168 168 offset, size, flags = shallowutil.parsesizeflags(data)
169 169 content = data[offset : offset + size]
170 170
171 171 ancestormap = shallowutil.ancestormap(data)
172 172 p1, p2, linknode, copyfrom = ancestormap[node]
173 173 copyrev = None
174 174 if copyfrom:
175 175 copyrev = hex(p1)
176 176
177 177 self._updatemetacache(node, size, flags)
178 178
179 179 # lfs tracks renames in its own metadata, remove hg copy metadata,
180 180 # because copy metadata will be re-added by lfs flag processor.
181 181 if flags & revlog.REVIDX_EXTSTORED:
182 182 copyrev = copyfrom = None
183 183 revision = shallowutil.createrevlogtext(content, copyfrom, copyrev)
184 184 return revision
185 185
186 186 def getdelta(self, name, node):
187 187 # Since remotefilelog content stores only contain full texts, just
188 188 # return that.
189 189 revision = self.get(name, node)
190 190 return (
191 191 revision,
192 192 name,
193 193 sha1nodeconstants.nullid,
194 194 self.getmeta(name, node),
195 195 )
196 196
197 197 def getdeltachain(self, name, node):
198 198 # Since remotefilelog content stores just contain full texts, we return
199 199 # a fake delta chain that just consists of a single full text revision.
200 200 # The nullid in the deltabasenode slot indicates that the revision is a
201 201 # fulltext.
202 202 revision = self.get(name, node)
203 203 return [(name, node, None, sha1nodeconstants.nullid, revision)]
204 204
205 205 def getmeta(self, name, node):
206 206 self._sanitizemetacache()
207 207 if node != self._threaddata.metacache[0]:
208 208 data = self._getdata(name, node)
209 209 offset, size, flags = shallowutil.parsesizeflags(data)
210 210 self._updatemetacache(node, size, flags)
211 211 return self._threaddata.metacache[1]
212 212
213 213 def add(self, name, node, data):
214 214 raise RuntimeError(
215 215 b"cannot add content only to remotefilelog contentstore"
216 216 )
217 217
218 218 def _sanitizemetacache(self):
219 219 metacache = getattr(self._threaddata, 'metacache', None)
220 220 if metacache is None:
221 221 self._threaddata.metacache = (None, None) # (node, meta)
222 222
223 223 def _updatemetacache(self, node, size, flags):
224 224 self._sanitizemetacache()
225 225 if node == self._threaddata.metacache[0]:
226 226 return
227 227 meta = {constants.METAKEYFLAG: flags, constants.METAKEYSIZE: size}
228 228 self._threaddata.metacache = (node, meta)
229 229
230 230
231 231 class remotecontentstore:
232 232 def __init__(self, ui, fileservice, shared):
233 233 self._fileservice = fileservice
234 234 # type(shared) is usually remotefilelogcontentstore
235 235 self._shared = shared
236 236
237 237 def get(self, name, node):
238 238 self._fileservice.prefetch(
239 239 [(name, hex(node))], force=True, fetchdata=True
240 240 )
241 241 return self._shared.get(name, node)
242 242
243 243 def getdelta(self, name, node):
244 244 revision = self.get(name, node)
245 245 return (
246 246 revision,
247 247 name,
248 248 sha1nodeconstants.nullid,
249 249 self._shared.getmeta(name, node),
250 250 )
251 251
252 252 def getdeltachain(self, name, node):
253 253 # Since our remote content stores just contain full texts, we return a
254 254 # fake delta chain that just consists of a single full text revision.
255 255 # The nullid in the deltabasenode slot indicates that the revision is a
256 256 # fulltext.
257 257 revision = self.get(name, node)
258 258 return [(name, node, None, sha1nodeconstants.nullid, revision)]
259 259
260 260 def getmeta(self, name, node):
261 261 self._fileservice.prefetch(
262 262 [(name, hex(node))], force=True, fetchdata=True
263 263 )
264 264 return self._shared.getmeta(name, node)
265 265
266 266 def add(self, name, node, data):
267 267 raise RuntimeError(b"cannot add to a remote store")
268 268
269 269 def getmissing(self, keys):
270 270 return keys
271 271
272 272 def markledger(self, ledger, options=None):
273 273 pass
274 274
275 275
276 276 class manifestrevlogstore:
277 277 def __init__(self, repo):
278 278 self._store = repo.store
279 279 self._svfs = repo.svfs
280 280 self._revlogs = dict()
281 281 self._cl = revlog.revlog(self._svfs, radix=b'00changelog.i')
282 282 self._repackstartlinkrev = 0
283 283
284 284 def get(self, name, node):
285 285 return self._revlog(name).rawdata(node)
286 286
287 287 def getdelta(self, name, node):
288 288 revision = self.get(name, node)
289 289 return revision, name, self._cl.nullid, self.getmeta(name, node)
290 290
291 291 def getdeltachain(self, name, node):
292 292 revision = self.get(name, node)
293 293 return [(name, node, None, self._cl.nullid, revision)]
294 294
295 295 def getmeta(self, name, node):
296 296 rl = self._revlog(name)
297 297 rev = rl.rev(node)
298 298 return {
299 299 constants.METAKEYFLAG: rl.flags(rev),
300 300 constants.METAKEYSIZE: rl.rawsize(rev),
301 301 }
302 302
303 303 def getancestors(self, name, node, known=None):
304 304 if known is None:
305 305 known = set()
306 306 if node in known:
307 307 return []
308 308
309 309 rl = self._revlog(name)
310 310 ancestors = {}
311 311 missing = {node}
312 312 for ancrev in rl.ancestors([rl.rev(node)], inclusive=True):
313 313 ancnode = rl.node(ancrev)
314 314 missing.discard(ancnode)
315 315
316 316 p1, p2 = rl.parents(ancnode)
317 317 if p1 != self._cl.nullid and p1 not in known:
318 318 missing.add(p1)
319 319 if p2 != self._cl.nullid and p2 not in known:
320 320 missing.add(p2)
321 321
322 322 linknode = self._cl.node(rl.linkrev(ancrev))
323 323 ancestors[rl.node(ancrev)] = (p1, p2, linknode, b'')
324 324 if not missing:
325 325 break
326 326 return ancestors
327 327
328 328 def getnodeinfo(self, name, node):
329 329 cl = self._cl
330 330 rl = self._revlog(name)
331 331 parents = rl.parents(node)
332 332 linkrev = rl.linkrev(rl.rev(node))
333 333 return (parents[0], parents[1], cl.node(linkrev), None)
334 334
335 335 def add(self, *args):
336 336 raise RuntimeError(b"cannot add to a revlog store")
337 337
338 338 def _revlog(self, name):
339 339 rl = self._revlogs.get(name)
340 340 if rl is None:
341 341 revlogname = b'00manifesttree'
342 342 if name != b'':
343 343 revlogname = b'meta/%s/00manifest' % name
344 344 rl = revlog.revlog(self._svfs, radix=revlogname)
345 345 self._revlogs[name] = rl
346 346 return rl
347 347
348 348 def getmissing(self, keys):
349 349 missing = []
350 350 for name, node in keys:
351 351 mfrevlog = self._revlog(name)
352 352 if node not in mfrevlog.nodemap:
353 353 missing.append((name, node))
354 354
355 355 return missing
356 356
357 357 def setrepacklinkrevrange(self, startrev, endrev):
358 358 self._repackstartlinkrev = startrev
359 359 self._repackendlinkrev = endrev
360 360
361 361 def markledger(self, ledger, options=None):
362 362 if options and options.get(constants.OPTION_PACKSONLY):
363 363 return
364 364 treename = b''
365 365 rl = revlog.revlog(self._svfs, radix=b'00manifesttree')
366 366 startlinkrev = self._repackstartlinkrev
367 367 endlinkrev = self._repackendlinkrev
368 368 for rev in range(len(rl) - 1, -1, -1):
369 369 linkrev = rl.linkrev(rev)
370 370 if linkrev < startlinkrev:
371 371 break
372 372 if linkrev > endlinkrev:
373 373 continue
374 374 node = rl.node(rev)
375 375 ledger.markdataentry(self, treename, node)
376 376 ledger.markhistoryentry(self, treename, node)
377 377
378 for t, path, size in self._store.datafiles():
378 for t, path, size in self._store.data_entries():
379 379 if path[:5] != b'meta/' or path[-2:] != b'.i':
380 380 continue
381 381
382 382 treename = path[5 : -len(b'/00manifest')]
383 383
384 384 rl = revlog.revlog(self._svfs, indexfile=path[:-2])
385 385 for rev in range(len(rl) - 1, -1, -1):
386 386 linkrev = rl.linkrev(rev)
387 387 if linkrev < startlinkrev:
388 388 break
389 389 if linkrev > endlinkrev:
390 390 continue
391 391 node = rl.node(rev)
392 392 ledger.markdataentry(self, treename, node)
393 393 ledger.markhistoryentry(self, treename, node)
394 394
395 395 def cleanup(self, ledger):
396 396 pass
@@ -1,442 +1,442
1 1 # remotefilelogserver.py - server logic for a remotefilelog server
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import os
9 9 import stat
10 10 import time
11 11 import zlib
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.node import bin, hex
15 15 from mercurial.pycompat import open
16 16 from mercurial import (
17 17 changegroup,
18 18 changelog,
19 19 context,
20 20 error,
21 21 extensions,
22 22 match,
23 23 scmutil,
24 24 store,
25 25 streamclone,
26 26 util,
27 27 wireprotoserver,
28 28 wireprototypes,
29 29 wireprotov1server,
30 30 )
31 31 from . import (
32 32 constants,
33 33 shallowutil,
34 34 )
35 35
36 36 _sshv1server = wireprotoserver.sshv1protocolhandler
37 37
38 38
39 39 def setupserver(ui, repo):
40 40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 41 onetimesetup(ui)
42 42
43 43 # don't send files to shallow clients during pulls
44 44 def generatefiles(
45 45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 46 ):
47 47 caps = self._bundlecaps or []
48 48 if constants.BUNDLE2_CAPABLITY in caps:
49 49 # only send files that don't match the specified patterns
50 50 includepattern = None
51 51 excludepattern = None
52 52 for cap in self._bundlecaps or []:
53 53 if cap.startswith(b"includepattern="):
54 54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 55 elif cap.startswith(b"excludepattern="):
56 56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57 57
58 58 m = match.always()
59 59 if includepattern or excludepattern:
60 60 m = match.match(
61 61 repo.root, b'', None, includepattern, excludepattern
62 62 )
63 63
64 64 changedfiles = list([f for f in changedfiles if not m(f)])
65 65 return orig(
66 66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 67 )
68 68
69 69 extensions.wrapfunction(
70 70 changegroup.cgpacker, b'generatefiles', generatefiles
71 71 )
72 72
73 73
74 74 onetime = False
75 75
76 76
77 77 def onetimesetup(ui):
78 78 """Configures the wireprotocol for both clients and servers."""
79 79 global onetime
80 80 if onetime:
81 81 return
82 82 onetime = True
83 83
84 84 # support file content requests
85 85 wireprotov1server.wireprotocommand(
86 86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 87 )(getflogheads)
88 88 wireprotov1server.wireprotocommand(
89 89 b'x_rfl_getfiles', b'', permission=b'pull'
90 90 )(getfiles)
91 91 wireprotov1server.wireprotocommand(
92 92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 93 )(getfile)
94 94
95 95 class streamstate:
96 96 match = None
97 97 shallowremote = False
98 98 noflatmf = False
99 99
100 100 state = streamstate()
101 101
102 102 def stream_out_shallow(repo, proto, other):
103 103 includepattern = None
104 104 excludepattern = None
105 105 raw = other.get(b'includepattern')
106 106 if raw:
107 107 includepattern = raw.split(b'\0')
108 108 raw = other.get(b'excludepattern')
109 109 if raw:
110 110 excludepattern = raw.split(b'\0')
111 111
112 112 oldshallow = state.shallowremote
113 113 oldmatch = state.match
114 114 oldnoflatmf = state.noflatmf
115 115 try:
116 116 state.shallowremote = True
117 117 state.match = match.always()
118 118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 119 if includepattern or excludepattern:
120 120 state.match = match.match(
121 121 repo.root, b'', None, includepattern, excludepattern
122 122 )
123 123 streamres = wireprotov1server.stream(repo, proto)
124 124
125 125 # Force the first value to execute, so the file list is computed
126 126 # within the try/finally scope
127 127 first = next(streamres.gen)
128 128 second = next(streamres.gen)
129 129
130 130 def gen():
131 131 yield first
132 132 yield second
133 133 for value in streamres.gen:
134 134 yield value
135 135
136 136 return wireprototypes.streamres(gen())
137 137 finally:
138 138 state.shallowremote = oldshallow
139 139 state.match = oldmatch
140 140 state.noflatmf = oldnoflatmf
141 141
142 142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 143 stream_out_shallow,
144 144 b'*',
145 145 )
146 146
147 147 # don't clone filelogs to shallow clients
148 148 def _walkstreamfiles(orig, repo, matcher=None):
149 149 if state.shallowremote:
150 150 # if we are shallow ourselves, stream our local commits
151 151 if shallowutil.isenabled(repo):
152 152 striplen = len(repo.store.path) + 1
153 153 readdir = repo.store.rawvfs.readdir
154 154 visit = [os.path.join(repo.store.path, b'data')]
155 155 while visit:
156 156 p = visit.pop()
157 157 for f, kind, st in readdir(p, stat=True):
158 158 fp = p + b'/' + f
159 159 if kind == stat.S_IFREG:
160 160 if not fp.endswith(b'.i') and not fp.endswith(
161 161 b'.d'
162 162 ):
163 163 n = util.pconvert(fp[striplen:])
164 164 d = store.decodedir(n)
165 165 yield store.SimpleStoreEntry(
166 166 entry_path=d,
167 167 is_volatile=False,
168 168 file_size=st.st_size,
169 169 )
170 170
171 171 if kind == stat.S_IFDIR:
172 172 visit.append(fp)
173 173
174 174 if scmutil.istreemanifest(repo):
175 for entry in repo.store.datafiles():
175 for entry in repo.store.data_entries():
176 176 if not entry.is_revlog:
177 177 continue
178 178 if entry.is_manifestlog:
179 179 yield entry
180 180
181 181 # Return .d and .i files that do not match the shallow pattern
182 182 match = state.match
183 183 if match and not match.always():
184 for entry in repo.store.datafiles():
184 for entry in repo.store.data_entries():
185 185 if not entry.is_revlog:
186 186 continue
187 187 if not state.match(entry.target_id):
188 188 yield entry
189 189
190 190 for x in repo.store.topfiles():
191 191 if state.noflatmf and x[1][:11] == b'00manifest.':
192 192 continue
193 193 yield x
194 194
195 195 elif shallowutil.isenabled(repo):
196 196 # don't allow cloning from a shallow repo to a full repo
197 197 # since it would require fetching every version of every
198 198 # file in order to create the revlogs.
199 199 raise error.Abort(
200 200 _(b"Cannot clone from a shallow repo to a full repo.")
201 201 )
202 202 else:
203 203 for x in orig(repo, matcher):
204 204 yield x
205 205
206 206 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
207 207
208 208 # expose remotefilelog capabilities
209 209 def _capabilities(orig, repo, proto):
210 210 caps = orig(repo, proto)
211 211 if shallowutil.isenabled(repo) or ui.configbool(
212 212 b'remotefilelog', b'server'
213 213 ):
214 214 if isinstance(proto, _sshv1server):
215 215 # legacy getfiles method which only works over ssh
216 216 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
217 217 caps.append(b'x_rfl_getflogheads')
218 218 caps.append(b'x_rfl_getfile')
219 219 return caps
220 220
221 221 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
222 222
223 223 def _adjustlinkrev(orig, self, *args, **kwargs):
224 224 # When generating file blobs, taking the real path is too slow on large
225 225 # repos, so force it to just return the linkrev directly.
226 226 repo = self._repo
227 227 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
228 228 return self._filelog.linkrev(self._filelog.rev(self._filenode))
229 229 return orig(self, *args, **kwargs)
230 230
231 231 extensions.wrapfunction(
232 232 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
233 233 )
234 234
235 235 def _iscmd(orig, cmd):
236 236 if cmd == b'x_rfl_getfiles':
237 237 return False
238 238 return orig(cmd)
239 239
240 240 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
241 241
242 242
243 243 def _loadfileblob(repo, cachepath, path, node):
244 244 filecachepath = os.path.join(cachepath, path, hex(node))
245 245 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
246 246 filectx = repo.filectx(path, fileid=node)
247 247 if filectx.node() == repo.nullid:
248 248 repo.changelog = changelog.changelog(repo.svfs)
249 249 filectx = repo.filectx(path, fileid=node)
250 250
251 251 text = createfileblob(filectx)
252 252 # TODO configurable compression engines
253 253 text = zlib.compress(text)
254 254
255 255 # everything should be user & group read/writable
256 256 oldumask = os.umask(0o002)
257 257 try:
258 258 dirname = os.path.dirname(filecachepath)
259 259 if not os.path.exists(dirname):
260 260 try:
261 261 os.makedirs(dirname)
262 262 except FileExistsError:
263 263 pass
264 264
265 265 f = None
266 266 try:
267 267 f = util.atomictempfile(filecachepath, b"wb")
268 268 f.write(text)
269 269 except (IOError, OSError):
270 270 # Don't abort if the user only has permission to read,
271 271 # and not write.
272 272 pass
273 273 finally:
274 274 if f:
275 275 f.close()
276 276 finally:
277 277 os.umask(oldumask)
278 278 else:
279 279 with open(filecachepath, b"rb") as f:
280 280 text = f.read()
281 281 return text
282 282
283 283
284 284 def getflogheads(repo, proto, path):
285 285 """A server api for requesting a filelog's heads"""
286 286 flog = repo.file(path)
287 287 heads = flog.heads()
288 288 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
289 289
290 290
291 291 def getfile(repo, proto, file, node):
292 292 """A server api for requesting a particular version of a file. Can be used
293 293 in batches to request many files at once. The return protocol is:
294 294 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
295 295 non-zero for an error.
296 296
297 297 data is a compressed blob with revlog flag and ancestors information. See
298 298 createfileblob for its content.
299 299 """
300 300 if shallowutil.isenabled(repo):
301 301 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
302 302 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
303 303 if not cachepath:
304 304 cachepath = os.path.join(repo.path, b"remotefilelogcache")
305 305 node = bin(node.strip())
306 306 if node == repo.nullid:
307 307 return b'0\0'
308 308 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
309 309
310 310
311 311 def getfiles(repo, proto):
312 312 """A server api for requesting particular versions of particular files."""
313 313 if shallowutil.isenabled(repo):
314 314 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
315 315 if not isinstance(proto, _sshv1server):
316 316 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
317 317
318 318 def streamer():
319 319 fin = proto._fin
320 320
321 321 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
322 322 if not cachepath:
323 323 cachepath = os.path.join(repo.path, b"remotefilelogcache")
324 324
325 325 while True:
326 326 request = fin.readline()[:-1]
327 327 if not request:
328 328 break
329 329
330 330 node = bin(request[:40])
331 331 if node == repo.nullid:
332 332 yield b'0\n'
333 333 continue
334 334
335 335 path = request[40:]
336 336
337 337 text = _loadfileblob(repo, cachepath, path, node)
338 338
339 339 yield b'%d\n%s' % (len(text), text)
340 340
341 341 # it would be better to only flush after processing a whole batch
342 342 # but currently we don't know if there are more requests coming
343 343 proto._fout.flush()
344 344
345 345 return wireprototypes.streamres(streamer())
346 346
347 347
348 348 def createfileblob(filectx):
349 349 """
350 350 format:
351 351 v0:
352 352 str(len(rawtext)) + '\0' + rawtext + ancestortext
353 353 v1:
354 354 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
355 355 metalist := metalist + '\n' + meta | meta
356 356 meta := sizemeta | flagmeta
357 357 sizemeta := METAKEYSIZE + str(len(rawtext))
358 358 flagmeta := METAKEYFLAG + str(flag)
359 359
360 360 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
361 361 length of 1.
362 362 """
363 363 flog = filectx.filelog()
364 364 frev = filectx.filerev()
365 365 revlogflags = flog._revlog.flags(frev)
366 366 if revlogflags == 0:
367 367 # normal files
368 368 text = filectx.data()
369 369 else:
370 370 # lfs, read raw revision data
371 371 text = flog.rawdata(frev)
372 372
373 373 repo = filectx._repo
374 374
375 375 ancestors = [filectx]
376 376
377 377 try:
378 378 repo.forcelinkrev = True
379 379 ancestors.extend([f for f in filectx.ancestors()])
380 380
381 381 ancestortext = b""
382 382 for ancestorctx in ancestors:
383 383 parents = ancestorctx.parents()
384 384 p1 = repo.nullid
385 385 p2 = repo.nullid
386 386 if len(parents) > 0:
387 387 p1 = parents[0].filenode()
388 388 if len(parents) > 1:
389 389 p2 = parents[1].filenode()
390 390
391 391 copyname = b""
392 392 rename = ancestorctx.renamed()
393 393 if rename:
394 394 copyname = rename[0]
395 395 linknode = ancestorctx.node()
396 396 ancestortext += b"%s%s%s%s%s\0" % (
397 397 ancestorctx.filenode(),
398 398 p1,
399 399 p2,
400 400 linknode,
401 401 copyname,
402 402 )
403 403 finally:
404 404 repo.forcelinkrev = False
405 405
406 406 header = shallowutil.buildfileblobheader(len(text), revlogflags)
407 407
408 408 return b"%s\0%s%s" % (header, text, ancestortext)
409 409
410 410
411 411 def gcserver(ui, repo):
412 412 if not repo.ui.configbool(b"remotefilelog", b"server"):
413 413 return
414 414
415 415 neededfiles = set()
416 416 heads = repo.revs(b"heads(tip~25000:) - null")
417 417
418 418 cachepath = repo.vfs.join(b"remotefilelogcache")
419 419 for head in heads:
420 420 mf = repo[head].manifest()
421 421 for filename, filenode in mf.items():
422 422 filecachepath = os.path.join(cachepath, filename, hex(filenode))
423 423 neededfiles.add(filecachepath)
424 424
425 425 # delete unneeded older files
426 426 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
427 427 expiration = time.time() - (days * 24 * 60 * 60)
428 428
429 429 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
430 430 progress.update(0)
431 431 for root, dirs, files in os.walk(cachepath):
432 432 for file in files:
433 433 filepath = os.path.join(root, file)
434 434 progress.increment()
435 435 if filepath in neededfiles:
436 436 continue
437 437
438 438 stat = os.stat(filepath)
439 439 if stat.st_mtime < expiration:
440 440 os.remove(filepath)
441 441
442 442 progress.complete()
@@ -1,574 +1,574
1 1 # repair.py - functions for repository repair for mercurial
2 2 #
3 3 # Copyright 2005, 2006 Chris Mason <mason@suse.com>
4 4 # Copyright 2007 Olivia Mackall
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 hex,
13 13 short,
14 14 )
15 15 from . import (
16 16 bundle2,
17 17 changegroup,
18 18 discovery,
19 19 error,
20 20 exchange,
21 21 obsolete,
22 22 obsutil,
23 23 pathutil,
24 24 phases,
25 25 requirements,
26 26 scmutil,
27 27 store,
28 28 transaction,
29 29 util,
30 30 )
31 31 from .utils import (
32 32 hashutil,
33 33 urlutil,
34 34 )
35 35
36 36
37 37 def backupbundle(
38 38 repo,
39 39 bases,
40 40 heads,
41 41 node,
42 42 suffix,
43 43 compress=True,
44 44 obsolescence=True,
45 45 tmp_backup=False,
46 46 ):
47 47 """create a bundle with the specified revisions as a backup"""
48 48
49 49 backupdir = b"strip-backup"
50 50 vfs = repo.vfs
51 51 if not vfs.isdir(backupdir):
52 52 vfs.mkdir(backupdir)
53 53
54 54 # Include a hash of all the nodes in the filename for uniqueness
55 55 allcommits = repo.set(b'%ln::%ln', bases, heads)
56 56 allhashes = sorted(c.hex() for c in allcommits)
57 57 totalhash = hashutil.sha1(b''.join(allhashes)).digest()
58 58 name = b"%s/%s-%s-%s.hg" % (
59 59 backupdir,
60 60 short(node),
61 61 hex(totalhash[:4]),
62 62 suffix,
63 63 )
64 64
65 65 cgversion = changegroup.localversion(repo)
66 66 comp = None
67 67 if cgversion != b'01':
68 68 bundletype = b"HG20"
69 69 if compress:
70 70 comp = b'BZ'
71 71 elif compress:
72 72 bundletype = b"HG10BZ"
73 73 else:
74 74 bundletype = b"HG10UN"
75 75
76 76 outgoing = discovery.outgoing(repo, missingroots=bases, ancestorsof=heads)
77 77 contentopts = {
78 78 b'cg.version': cgversion,
79 79 b'obsolescence': obsolescence,
80 80 b'phases': True,
81 81 }
82 82 return bundle2.writenewbundle(
83 83 repo.ui,
84 84 repo,
85 85 b'strip',
86 86 name,
87 87 bundletype,
88 88 outgoing,
89 89 contentopts,
90 90 vfs,
91 91 compression=comp,
92 92 allow_internal=tmp_backup,
93 93 )
94 94
95 95
96 96 def _collectfiles(repo, striprev):
97 97 """find out the filelogs affected by the strip"""
98 98 files = set()
99 99
100 100 for x in range(striprev, len(repo)):
101 101 files.update(repo[x].files())
102 102
103 103 return sorted(files)
104 104
105 105
106 106 def _collectrevlog(revlog, striprev):
107 107 _, brokenset = revlog.getstrippoint(striprev)
108 108 return [revlog.linkrev(r) for r in brokenset]
109 109
110 110
111 111 def _collectbrokencsets(repo, files, striprev):
112 112 """return the changesets which will be broken by the truncation"""
113 113 s = set()
114 114
115 115 for revlog in manifestrevlogs(repo):
116 116 s.update(_collectrevlog(revlog, striprev))
117 117 for fname in files:
118 118 s.update(_collectrevlog(repo.file(fname), striprev))
119 119
120 120 return s
121 121
122 122
123 123 def strip(ui, repo, nodelist, backup=True, topic=b'backup'):
124 124 # This function requires the caller to lock the repo, but it operates
125 125 # within a transaction of its own, and thus requires there to be no current
126 126 # transaction when it is called.
127 127 if repo.currenttransaction() is not None:
128 128 raise error.ProgrammingError(b'cannot strip from inside a transaction')
129 129
130 130 # Simple way to maintain backwards compatibility for this
131 131 # argument.
132 132 if backup in [b'none', b'strip']:
133 133 backup = False
134 134
135 135 repo = repo.unfiltered()
136 136 repo.destroying()
137 137 vfs = repo.vfs
138 138 # load bookmark before changelog to avoid side effect from outdated
139 139 # changelog (see repo._refreshchangelog)
140 140 repo._bookmarks
141 141 cl = repo.changelog
142 142
143 143 # TODO handle undo of merge sets
144 144 if isinstance(nodelist, bytes):
145 145 nodelist = [nodelist]
146 146 striplist = [cl.rev(node) for node in nodelist]
147 147 striprev = min(striplist)
148 148
149 149 files = _collectfiles(repo, striprev)
150 150 saverevs = _collectbrokencsets(repo, files, striprev)
151 151
152 152 # Some revisions with rev > striprev may not be descendants of striprev.
153 153 # We have to find these revisions and put them in a bundle, so that
154 154 # we can restore them after the truncations.
155 155 # To create the bundle we use repo.changegroupsubset which requires
156 156 # the list of heads and bases of the set of interesting revisions.
157 157 # (head = revision in the set that has no descendant in the set;
158 158 # base = revision in the set that has no ancestor in the set)
159 159 tostrip = set(striplist)
160 160 saveheads = set(saverevs)
161 161 for r in cl.revs(start=striprev + 1):
162 162 if any(p in tostrip for p in cl.parentrevs(r)):
163 163 tostrip.add(r)
164 164
165 165 if r not in tostrip:
166 166 saverevs.add(r)
167 167 saveheads.difference_update(cl.parentrevs(r))
168 168 saveheads.add(r)
169 169 saveheads = [cl.node(r) for r in saveheads]
170 170
171 171 # compute base nodes
172 172 if saverevs:
173 173 descendants = set(cl.descendants(saverevs))
174 174 saverevs.difference_update(descendants)
175 175 savebases = [cl.node(r) for r in saverevs]
176 176 stripbases = [cl.node(r) for r in tostrip]
177 177
178 178 stripobsidx = obsmarkers = ()
179 179 if repo.ui.configbool(b'devel', b'strip-obsmarkers'):
180 180 obsmarkers = obsutil.exclusivemarkers(repo, stripbases)
181 181 if obsmarkers:
182 182 stripobsidx = [
183 183 i for i, m in enumerate(repo.obsstore) if m in obsmarkers
184 184 ]
185 185
186 186 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
187 187
188 188 backupfile = None
189 189 node = nodelist[-1]
190 190 if backup:
191 191 backupfile = _createstripbackup(repo, stripbases, node, topic)
192 192 # create a changegroup for all the branches we need to keep
193 193 tmpbundlefile = None
194 194 if saveheads:
195 195 # do not compress temporary bundle if we remove it from disk later
196 196 #
197 197 # We do not include obsolescence, it might re-introduce prune markers
198 198 # we are trying to strip. This is harmless since the stripped markers
199 199 # are already backed up and we did not touched the markers for the
200 200 # saved changesets.
201 201 tmpbundlefile = backupbundle(
202 202 repo,
203 203 savebases,
204 204 saveheads,
205 205 node,
206 206 b'temp',
207 207 compress=False,
208 208 obsolescence=False,
209 209 tmp_backup=True,
210 210 )
211 211
212 212 with ui.uninterruptible():
213 213 try:
214 214 with repo.transaction(b"strip") as tr:
215 215 # TODO this code violates the interface abstraction of the
216 216 # transaction and makes assumptions that file storage is
217 217 # using append-only files. We'll need some kind of storage
218 218 # API to handle stripping for us.
219 219 oldfiles = set(tr._offsetmap.keys())
220 220 oldfiles.update(tr._newfiles)
221 221
222 222 tr.startgroup()
223 223 cl.strip(striprev, tr)
224 224 stripmanifest(repo, striprev, tr, files)
225 225
226 226 for fn in files:
227 227 repo.file(fn).strip(striprev, tr)
228 228 tr.endgroup()
229 229
230 230 entries = tr.readjournal()
231 231
232 232 for file, troffset in entries:
233 233 if file in oldfiles:
234 234 continue
235 235 with repo.svfs(file, b'a', checkambig=True) as fp:
236 236 fp.truncate(troffset)
237 237 if troffset == 0:
238 238 repo.store.markremoved(file)
239 239
240 240 deleteobsmarkers(repo.obsstore, stripobsidx)
241 241 del repo.obsstore
242 242 repo.invalidatevolatilesets()
243 243 repo._phasecache.filterunknown(repo)
244 244
245 245 if tmpbundlefile:
246 246 ui.note(_(b"adding branch\n"))
247 247 f = vfs.open(tmpbundlefile, b"rb")
248 248 gen = exchange.readbundle(ui, f, tmpbundlefile, vfs)
249 249 # silence internal shuffling chatter
250 250 maybe_silent = (
251 251 repo.ui.silent()
252 252 if not repo.ui.verbose
253 253 else util.nullcontextmanager()
254 254 )
255 255 with maybe_silent:
256 256 tmpbundleurl = b'bundle:' + vfs.join(tmpbundlefile)
257 257 txnname = b'strip'
258 258 if not isinstance(gen, bundle2.unbundle20):
259 259 txnname = b"strip\n%s" % urlutil.hidepassword(
260 260 tmpbundleurl
261 261 )
262 262 with repo.transaction(txnname) as tr:
263 263 bundle2.applybundle(
264 264 repo, gen, tr, source=b'strip', url=tmpbundleurl
265 265 )
266 266 f.close()
267 267
268 268 with repo.transaction(b'repair') as tr:
269 269 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
270 270 repo._bookmarks.applychanges(repo, tr, bmchanges)
271 271
272 272 transaction.cleanup_undo_files(repo.ui.warn, repo.vfs_map)
273 273
274 274 except: # re-raises
275 275 if backupfile:
276 276 ui.warn(
277 277 _(b"strip failed, backup bundle stored in '%s'\n")
278 278 % vfs.join(backupfile)
279 279 )
280 280 if tmpbundlefile:
281 281 ui.warn(
282 282 _(b"strip failed, unrecovered changes stored in '%s'\n")
283 283 % vfs.join(tmpbundlefile)
284 284 )
285 285 ui.warn(
286 286 _(
287 287 b"(fix the problem, then recover the changesets with "
288 288 b"\"hg unbundle '%s'\")\n"
289 289 )
290 290 % vfs.join(tmpbundlefile)
291 291 )
292 292 raise
293 293 else:
294 294 if tmpbundlefile:
295 295 # Remove temporary bundle only if there were no exceptions
296 296 vfs.unlink(tmpbundlefile)
297 297
298 298 repo.destroyed()
299 299 # return the backup file path (or None if 'backup' was False) so
300 300 # extensions can use it
301 301 return backupfile
302 302
303 303
304 304 def softstrip(ui, repo, nodelist, backup=True, topic=b'backup'):
305 305 """perform a "soft" strip using the archived phase"""
306 306 tostrip = [c.node() for c in repo.set(b'sort(%ln::)', nodelist)]
307 307 if not tostrip:
308 308 return None
309 309
310 310 backupfile = None
311 311 if backup:
312 312 node = tostrip[0]
313 313 backupfile = _createstripbackup(repo, tostrip, node, topic)
314 314
315 315 newbmtarget, updatebm = _bookmarkmovements(repo, tostrip)
316 316 with repo.transaction(b'strip') as tr:
317 317 phases.retractboundary(repo, tr, phases.archived, tostrip)
318 318 bmchanges = [(m, repo[newbmtarget].node()) for m in updatebm]
319 319 repo._bookmarks.applychanges(repo, tr, bmchanges)
320 320 return backupfile
321 321
322 322
323 323 def _bookmarkmovements(repo, tostrip):
324 324 # compute necessary bookmark movement
325 325 bm = repo._bookmarks
326 326 updatebm = []
327 327 for m in bm:
328 328 rev = repo[bm[m]].rev()
329 329 if rev in tostrip:
330 330 updatebm.append(m)
331 331 newbmtarget = None
332 332 # If we need to move bookmarks, compute bookmark
333 333 # targets. Otherwise we can skip doing this logic.
334 334 if updatebm:
335 335 # For a set s, max(parents(s) - s) is the same as max(heads(::s - s)),
336 336 # but is much faster
337 337 newbmtarget = repo.revs(b'max(parents(%ld) - (%ld))', tostrip, tostrip)
338 338 if newbmtarget:
339 339 newbmtarget = repo[newbmtarget.first()].node()
340 340 else:
341 341 newbmtarget = b'.'
342 342 return newbmtarget, updatebm
343 343
344 344
345 345 def _createstripbackup(repo, stripbases, node, topic):
346 346 # backup the changeset we are about to strip
347 347 vfs = repo.vfs
348 348 unfi = repo.unfiltered()
349 349 to_node = unfi.changelog.node
350 350 # internal changeset are internal implementation details that should not
351 351 # leave the repository and not be exposed to the users. In addition feature
352 352 # using them requires to be resistant to strip. See test case for more
353 353 # details.
354 354 all_backup = unfi.revs(
355 355 b"(%ln)::(%ld) and not _internal()",
356 356 stripbases,
357 357 unfi.changelog.headrevs(),
358 358 )
359 359 if not all_backup:
360 360 return None
361 361
362 362 def to_nodes(revs):
363 363 return [to_node(r) for r in revs]
364 364
365 365 bases = to_nodes(unfi.revs("roots(%ld)", all_backup))
366 366 heads = to_nodes(unfi.revs("heads(%ld)", all_backup))
367 367 backupfile = backupbundle(repo, bases, heads, node, topic)
368 368 repo.ui.status(_(b"saved backup bundle to %s\n") % vfs.join(backupfile))
369 369 repo.ui.log(
370 370 b"backupbundle", b"saved backup bundle to %s\n", vfs.join(backupfile)
371 371 )
372 372 return backupfile
373 373
374 374
375 375 def safestriproots(ui, repo, nodes):
376 376 """return list of roots of nodes where descendants are covered by nodes"""
377 377 torev = repo.unfiltered().changelog.rev
378 378 revs = {torev(n) for n in nodes}
379 379 # tostrip = wanted - unsafe = wanted - ancestors(orphaned)
380 380 # orphaned = affected - wanted
381 381 # affected = descendants(roots(wanted))
382 382 # wanted = revs
383 383 revset = b'%ld - ( ::( (roots(%ld):: and not _phase(%s)) -%ld) )'
384 384 tostrip = set(repo.revs(revset, revs, revs, phases.internal, revs))
385 385 notstrip = revs - tostrip
386 386 if notstrip:
387 387 nodestr = b', '.join(sorted(short(repo[n].node()) for n in notstrip))
388 388 ui.warn(
389 389 _(b'warning: orphaned descendants detected, not stripping %s\n')
390 390 % nodestr
391 391 )
392 392 return [c.node() for c in repo.set(b'roots(%ld)', tostrip)]
393 393
394 394
395 395 class stripcallback:
396 396 """used as a transaction postclose callback"""
397 397
398 398 def __init__(self, ui, repo, backup, topic):
399 399 self.ui = ui
400 400 self.repo = repo
401 401 self.backup = backup
402 402 self.topic = topic or b'backup'
403 403 self.nodelist = []
404 404
405 405 def addnodes(self, nodes):
406 406 self.nodelist.extend(nodes)
407 407
408 408 def __call__(self, tr):
409 409 roots = safestriproots(self.ui, self.repo, self.nodelist)
410 410 if roots:
411 411 strip(self.ui, self.repo, roots, self.backup, self.topic)
412 412
413 413
414 414 def delayedstrip(ui, repo, nodelist, topic=None, backup=True):
415 415 """like strip, but works inside transaction and won't strip irreverent revs
416 416
417 417 nodelist must explicitly contain all descendants. Otherwise a warning will
418 418 be printed that some nodes are not stripped.
419 419
420 420 Will do a backup if `backup` is True. The last non-None "topic" will be
421 421 used as the backup topic name. The default backup topic name is "backup".
422 422 """
423 423 tr = repo.currenttransaction()
424 424 if not tr:
425 425 nodes = safestriproots(ui, repo, nodelist)
426 426 return strip(ui, repo, nodes, backup=backup, topic=topic)
427 427 # transaction postclose callbacks are called in alphabet order.
428 428 # use '\xff' as prefix so we are likely to be called last.
429 429 callback = tr.getpostclose(b'\xffstrip')
430 430 if callback is None:
431 431 callback = stripcallback(ui, repo, backup=backup, topic=topic)
432 432 tr.addpostclose(b'\xffstrip', callback)
433 433 if topic:
434 434 callback.topic = topic
435 435 callback.addnodes(nodelist)
436 436
437 437
438 438 def stripmanifest(repo, striprev, tr, files):
439 439 for revlog in manifestrevlogs(repo):
440 440 revlog.strip(striprev, tr)
441 441
442 442
443 443 def manifestrevlogs(repo):
444 444 yield repo.manifestlog.getstorage(b'')
445 445 if scmutil.istreemanifest(repo):
446 446 # This logic is safe if treemanifest isn't enabled, but also
447 447 # pointless, so we skip it if treemanifest isn't enabled.
448 for entry in repo.store.datafiles():
448 for entry in repo.store.data_entries():
449 449 if not entry.is_revlog:
450 450 continue
451 451 if entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
452 452 yield repo.manifestlog.getstorage(entry.target_id)
453 453
454 454
455 455 def rebuildfncache(ui, repo, only_data=False):
456 456 """Rebuilds the fncache file from repo history.
457 457
458 458 Missing entries will be added. Extra entries will be removed.
459 459 """
460 460 repo = repo.unfiltered()
461 461
462 462 if requirements.FNCACHE_REQUIREMENT not in repo.requirements:
463 463 ui.warn(
464 464 _(
465 465 b'(not rebuilding fncache because repository does not '
466 466 b'support fncache)\n'
467 467 )
468 468 )
469 469 return
470 470
471 471 with repo.lock():
472 472 fnc = repo.store.fncache
473 473 fnc.ensureloaded(warn=ui.warn)
474 474
475 475 oldentries = set(fnc.entries)
476 476 newentries = set()
477 477 seenfiles = set()
478 478
479 479 if only_data:
480 480 # Trust the listing of .i from the fncache, but not the .d. This is
481 481 # much faster, because we only need to stat every possible .d files,
482 482 # instead of reading the full changelog
483 483 for f in fnc:
484 484 if f[:5] == b'data/' and f[-2:] == b'.i':
485 485 seenfiles.add(f[5:-2])
486 486 newentries.add(f)
487 487 dataf = f[:-2] + b'.d'
488 488 if repo.store._exists(dataf):
489 489 newentries.add(dataf)
490 490 else:
491 491 progress = ui.makeprogress(
492 492 _(b'rebuilding'), unit=_(b'changesets'), total=len(repo)
493 493 )
494 494 for rev in repo:
495 495 progress.update(rev)
496 496
497 497 ctx = repo[rev]
498 498 for f in ctx.files():
499 499 # This is to minimize I/O.
500 500 if f in seenfiles:
501 501 continue
502 502 seenfiles.add(f)
503 503
504 504 i = b'data/%s.i' % f
505 505 d = b'data/%s.d' % f
506 506
507 507 if repo.store._exists(i):
508 508 newentries.add(i)
509 509 if repo.store._exists(d):
510 510 newentries.add(d)
511 511
512 512 progress.complete()
513 513
514 514 if requirements.TREEMANIFEST_REQUIREMENT in repo.requirements:
515 515 # This logic is safe if treemanifest isn't enabled, but also
516 516 # pointless, so we skip it if treemanifest isn't enabled.
517 517 for dir in pathutil.dirs(seenfiles):
518 518 i = b'meta/%s/00manifest.i' % dir
519 519 d = b'meta/%s/00manifest.d' % dir
520 520
521 521 if repo.store._exists(i):
522 522 newentries.add(i)
523 523 if repo.store._exists(d):
524 524 newentries.add(d)
525 525
526 526 addcount = len(newentries - oldentries)
527 527 removecount = len(oldentries - newentries)
528 528 for p in sorted(oldentries - newentries):
529 529 ui.write(_(b'removing %s\n') % p)
530 530 for p in sorted(newentries - oldentries):
531 531 ui.write(_(b'adding %s\n') % p)
532 532
533 533 if addcount or removecount:
534 534 ui.write(
535 535 _(b'%d items added, %d removed from fncache\n')
536 536 % (addcount, removecount)
537 537 )
538 538 fnc.entries = newentries
539 539 fnc._dirty = True
540 540
541 541 with repo.transaction(b'fncache') as tr:
542 542 fnc.write(tr)
543 543 else:
544 544 ui.write(_(b'fncache already up to date\n'))
545 545
546 546
547 547 def deleteobsmarkers(obsstore, indices):
548 548 """Delete some obsmarkers from obsstore and return how many were deleted
549 549
550 550 'indices' is a list of ints which are the indices
551 551 of the markers to be deleted.
552 552
553 553 Every invocation of this function completely rewrites the obsstore file,
554 554 skipping the markers we want to be removed. The new temporary file is
555 555 created, remaining markers are written there and on .close() this file
556 556 gets atomically renamed to obsstore, thus guaranteeing consistency."""
557 557 if not indices:
558 558 # we don't want to rewrite the obsstore with the same content
559 559 return
560 560
561 561 left = []
562 562 current = obsstore._all
563 563 n = 0
564 564 for i, m in enumerate(current):
565 565 if i in indices:
566 566 n += 1
567 567 continue
568 568 left.append(m)
569 569
570 570 newobsstorefile = obsstore.svfs(b'obsstore', b'w', atomictemp=True)
571 571 for bytes in obsolete.encodemarkers(left, True, obsstore._version):
572 572 newobsstorefile.write(bytes)
573 573 newobsstorefile.close()
574 574 return n
@@ -1,881 +1,881
1 1 # censor code related to censoring revision
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
5 5 # Copyright 2015 Google, Inc <martinvonz@google.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 import binascii
11 11 import contextlib
12 12 import os
13 13 import struct
14 14
15 15 from ..node import (
16 16 nullrev,
17 17 )
18 18 from .constants import (
19 19 COMP_MODE_PLAIN,
20 20 ENTRY_DATA_COMPRESSED_LENGTH,
21 21 ENTRY_DATA_COMPRESSION_MODE,
22 22 ENTRY_DATA_OFFSET,
23 23 ENTRY_DATA_UNCOMPRESSED_LENGTH,
24 24 ENTRY_DELTA_BASE,
25 25 ENTRY_LINK_REV,
26 26 ENTRY_NODE_ID,
27 27 ENTRY_PARENT_1,
28 28 ENTRY_PARENT_2,
29 29 ENTRY_SIDEDATA_COMPRESSED_LENGTH,
30 30 ENTRY_SIDEDATA_COMPRESSION_MODE,
31 31 ENTRY_SIDEDATA_OFFSET,
32 32 REVIDX_ISCENSORED,
33 33 REVLOGV0,
34 34 REVLOGV1,
35 35 )
36 36 from ..i18n import _
37 37
38 38 from .. import (
39 39 error,
40 40 mdiff,
41 41 pycompat,
42 42 revlogutils,
43 43 util,
44 44 )
45 45 from ..utils import (
46 46 storageutil,
47 47 )
48 48 from . import (
49 49 constants,
50 50 deltas,
51 51 )
52 52
53 53
54 54 def v1_censor(rl, tr, censornode, tombstone=b''):
55 55 """censors a revision in a "version 1" revlog"""
56 56 assert rl._format_version == constants.REVLOGV1, rl._format_version
57 57
58 58 # avoid cycle
59 59 from .. import revlog
60 60
61 61 censorrev = rl.rev(censornode)
62 62 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
63 63
64 64 # Rewriting the revlog in place is hard. Our strategy for censoring is
65 65 # to create a new revlog, copy all revisions to it, then replace the
66 66 # revlogs on transaction close.
67 67 #
68 68 # This is a bit dangerous. We could easily have a mismatch of state.
69 69 newrl = revlog.revlog(
70 70 rl.opener,
71 71 target=rl.target,
72 72 radix=rl.radix,
73 73 postfix=b'tmpcensored',
74 74 censorable=True,
75 75 )
76 76 newrl._format_version = rl._format_version
77 77 newrl._format_flags = rl._format_flags
78 78 newrl._generaldelta = rl._generaldelta
79 79 newrl._parse_index = rl._parse_index
80 80
81 81 for rev in rl.revs():
82 82 node = rl.node(rev)
83 83 p1, p2 = rl.parents(node)
84 84
85 85 if rev == censorrev:
86 86 newrl.addrawrevision(
87 87 tombstone,
88 88 tr,
89 89 rl.linkrev(censorrev),
90 90 p1,
91 91 p2,
92 92 censornode,
93 93 constants.REVIDX_ISCENSORED,
94 94 )
95 95
96 96 if newrl.deltaparent(rev) != nullrev:
97 97 m = _(b'censored revision stored as delta; cannot censor')
98 98 h = _(
99 99 b'censoring of revlogs is not fully implemented;'
100 100 b' please report this bug'
101 101 )
102 102 raise error.Abort(m, hint=h)
103 103 continue
104 104
105 105 if rl.iscensored(rev):
106 106 if rl.deltaparent(rev) != nullrev:
107 107 m = _(
108 108 b'cannot censor due to censored '
109 109 b'revision having delta stored'
110 110 )
111 111 raise error.Abort(m)
112 112 rawtext = rl._chunk(rev)
113 113 else:
114 114 rawtext = rl.rawdata(rev)
115 115
116 116 newrl.addrawrevision(
117 117 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
118 118 )
119 119
120 120 tr.addbackup(rl._indexfile, location=b'store')
121 121 if not rl._inline:
122 122 tr.addbackup(rl._datafile, location=b'store')
123 123
124 124 rl.opener.rename(newrl._indexfile, rl._indexfile)
125 125 if not rl._inline:
126 126 rl.opener.rename(newrl._datafile, rl._datafile)
127 127
128 128 rl.clearcaches()
129 129 rl._loadindex()
130 130
131 131
132 132 def v2_censor(revlog, tr, censornode, tombstone=b''):
133 133 """censors a revision in a "version 2" revlog"""
134 134 assert revlog._format_version != REVLOGV0, revlog._format_version
135 135 assert revlog._format_version != REVLOGV1, revlog._format_version
136 136
137 137 censor_revs = {revlog.rev(censornode)}
138 138 _rewrite_v2(revlog, tr, censor_revs, tombstone)
139 139
140 140
141 141 def _rewrite_v2(revlog, tr, censor_revs, tombstone=b''):
142 142 """rewrite a revlog to censor some of its content
143 143
144 144 General principle
145 145
146 146 We create new revlog files (index/data/sidedata) to copy the content of
147 147 the existing data without the censored data.
148 148
149 149 We need to recompute new delta for any revision that used the censored
150 150 revision as delta base. As the cumulative size of the new delta may be
151 151 large, we store them in a temporary file until they are stored in their
152 152 final destination.
153 153
154 154 All data before the censored data can be blindly copied. The rest needs
155 155 to be copied as we go and the associated index entry needs adjustement.
156 156 """
157 157 assert revlog._format_version != REVLOGV0, revlog._format_version
158 158 assert revlog._format_version != REVLOGV1, revlog._format_version
159 159
160 160 old_index = revlog.index
161 161 docket = revlog._docket
162 162
163 163 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
164 164
165 165 first_excl_rev = min(censor_revs)
166 166
167 167 first_excl_entry = revlog.index[first_excl_rev]
168 168 index_cutoff = revlog.index.entry_size * first_excl_rev
169 169 data_cutoff = first_excl_entry[ENTRY_DATA_OFFSET] >> 16
170 170 sidedata_cutoff = revlog.sidedata_cut_off(first_excl_rev)
171 171
172 172 with pycompat.unnamedtempfile(mode=b"w+b") as tmp_storage:
173 173 # rev → (new_base, data_start, data_end, compression_mode)
174 174 rewritten_entries = _precompute_rewritten_delta(
175 175 revlog,
176 176 old_index,
177 177 censor_revs,
178 178 tmp_storage,
179 179 )
180 180
181 181 all_files = _setup_new_files(
182 182 revlog,
183 183 index_cutoff,
184 184 data_cutoff,
185 185 sidedata_cutoff,
186 186 )
187 187
188 188 # we dont need to open the old index file since its content already
189 189 # exist in a usable form in `old_index`.
190 190 with all_files() as open_files:
191 191 (
192 192 old_data_file,
193 193 old_sidedata_file,
194 194 new_index_file,
195 195 new_data_file,
196 196 new_sidedata_file,
197 197 ) = open_files
198 198
199 199 # writing the censored revision
200 200
201 201 # Writing all subsequent revisions
202 202 for rev in range(first_excl_rev, len(old_index)):
203 203 if rev in censor_revs:
204 204 _rewrite_censor(
205 205 revlog,
206 206 old_index,
207 207 open_files,
208 208 rev,
209 209 tombstone,
210 210 )
211 211 else:
212 212 _rewrite_simple(
213 213 revlog,
214 214 old_index,
215 215 open_files,
216 216 rev,
217 217 rewritten_entries,
218 218 tmp_storage,
219 219 )
220 220 docket.write(transaction=None, stripping=True)
221 221
222 222
223 223 def _precompute_rewritten_delta(
224 224 revlog,
225 225 old_index,
226 226 excluded_revs,
227 227 tmp_storage,
228 228 ):
229 229 """Compute new delta for revisions whose delta is based on revision that
230 230 will not survive as is.
231 231
232 232 Return a mapping: {rev → (new_base, data_start, data_end, compression_mode)}
233 233 """
234 234 dc = deltas.deltacomputer(revlog)
235 235 rewritten_entries = {}
236 236 first_excl_rev = min(excluded_revs)
237 237 with revlog._segmentfile._open_read() as dfh:
238 238 for rev in range(first_excl_rev, len(old_index)):
239 239 if rev in excluded_revs:
240 240 # this revision will be preserved as is, so we don't need to
241 241 # consider recomputing a delta.
242 242 continue
243 243 entry = old_index[rev]
244 244 if entry[ENTRY_DELTA_BASE] not in excluded_revs:
245 245 continue
246 246 # This is a revision that use the censored revision as the base
247 247 # for its delta. We need a need new deltas
248 248 if entry[ENTRY_DATA_UNCOMPRESSED_LENGTH] == 0:
249 249 # this revision is empty, we can delta against nullrev
250 250 rewritten_entries[rev] = (nullrev, 0, 0, COMP_MODE_PLAIN)
251 251 else:
252 252
253 253 text = revlog.rawdata(rev, _df=dfh)
254 254 info = revlogutils.revisioninfo(
255 255 node=entry[ENTRY_NODE_ID],
256 256 p1=revlog.node(entry[ENTRY_PARENT_1]),
257 257 p2=revlog.node(entry[ENTRY_PARENT_2]),
258 258 btext=[text],
259 259 textlen=len(text),
260 260 cachedelta=None,
261 261 flags=entry[ENTRY_DATA_OFFSET] & 0xFFFF,
262 262 )
263 263 d = dc.finddeltainfo(
264 264 info, dfh, excluded_bases=excluded_revs, target_rev=rev
265 265 )
266 266 default_comp = revlog._docket.default_compression_header
267 267 comp_mode, d = deltas.delta_compression(default_comp, d)
268 268 # using `tell` is a bit lazy, but we are not here for speed
269 269 start = tmp_storage.tell()
270 270 tmp_storage.write(d.data[1])
271 271 end = tmp_storage.tell()
272 272 rewritten_entries[rev] = (d.base, start, end, comp_mode)
273 273 return rewritten_entries
274 274
275 275
276 276 def _setup_new_files(
277 277 revlog,
278 278 index_cutoff,
279 279 data_cutoff,
280 280 sidedata_cutoff,
281 281 ):
282 282 """
283 283
284 284 return a context manager to open all the relevant files:
285 285 - old_data_file,
286 286 - old_sidedata_file,
287 287 - new_index_file,
288 288 - new_data_file,
289 289 - new_sidedata_file,
290 290
291 291 The old_index_file is not here because it is accessed through the
292 292 `old_index` object if the caller function.
293 293 """
294 294 docket = revlog._docket
295 295 old_index_filepath = revlog.opener.join(docket.index_filepath())
296 296 old_data_filepath = revlog.opener.join(docket.data_filepath())
297 297 old_sidedata_filepath = revlog.opener.join(docket.sidedata_filepath())
298 298
299 299 new_index_filepath = revlog.opener.join(docket.new_index_file())
300 300 new_data_filepath = revlog.opener.join(docket.new_data_file())
301 301 new_sidedata_filepath = revlog.opener.join(docket.new_sidedata_file())
302 302
303 303 util.copyfile(old_index_filepath, new_index_filepath, nb_bytes=index_cutoff)
304 304 util.copyfile(old_data_filepath, new_data_filepath, nb_bytes=data_cutoff)
305 305 util.copyfile(
306 306 old_sidedata_filepath,
307 307 new_sidedata_filepath,
308 308 nb_bytes=sidedata_cutoff,
309 309 )
310 310 revlog.opener.register_file(docket.index_filepath())
311 311 revlog.opener.register_file(docket.data_filepath())
312 312 revlog.opener.register_file(docket.sidedata_filepath())
313 313
314 314 docket.index_end = index_cutoff
315 315 docket.data_end = data_cutoff
316 316 docket.sidedata_end = sidedata_cutoff
317 317
318 318 # reload the revlog internal information
319 319 revlog.clearcaches()
320 320 revlog._loadindex(docket=docket)
321 321
322 322 @contextlib.contextmanager
323 323 def all_files_opener():
324 324 # hide opening in an helper function to please check-code, black
325 325 # and various python version at the same time
326 326 with open(old_data_filepath, 'rb') as old_data_file:
327 327 with open(old_sidedata_filepath, 'rb') as old_sidedata_file:
328 328 with open(new_index_filepath, 'r+b') as new_index_file:
329 329 with open(new_data_filepath, 'r+b') as new_data_file:
330 330 with open(
331 331 new_sidedata_filepath, 'r+b'
332 332 ) as new_sidedata_file:
333 333 new_index_file.seek(0, os.SEEK_END)
334 334 assert new_index_file.tell() == index_cutoff
335 335 new_data_file.seek(0, os.SEEK_END)
336 336 assert new_data_file.tell() == data_cutoff
337 337 new_sidedata_file.seek(0, os.SEEK_END)
338 338 assert new_sidedata_file.tell() == sidedata_cutoff
339 339 yield (
340 340 old_data_file,
341 341 old_sidedata_file,
342 342 new_index_file,
343 343 new_data_file,
344 344 new_sidedata_file,
345 345 )
346 346
347 347 return all_files_opener
348 348
349 349
350 350 def _rewrite_simple(
351 351 revlog,
352 352 old_index,
353 353 all_files,
354 354 rev,
355 355 rewritten_entries,
356 356 tmp_storage,
357 357 ):
358 358 """append a normal revision to the index after the rewritten one(s)"""
359 359 (
360 360 old_data_file,
361 361 old_sidedata_file,
362 362 new_index_file,
363 363 new_data_file,
364 364 new_sidedata_file,
365 365 ) = all_files
366 366 entry = old_index[rev]
367 367 flags = entry[ENTRY_DATA_OFFSET] & 0xFFFF
368 368 old_data_offset = entry[ENTRY_DATA_OFFSET] >> 16
369 369
370 370 if rev not in rewritten_entries:
371 371 old_data_file.seek(old_data_offset)
372 372 new_data_size = entry[ENTRY_DATA_COMPRESSED_LENGTH]
373 373 new_data = old_data_file.read(new_data_size)
374 374 data_delta_base = entry[ENTRY_DELTA_BASE]
375 375 d_comp_mode = entry[ENTRY_DATA_COMPRESSION_MODE]
376 376 else:
377 377 (
378 378 data_delta_base,
379 379 start,
380 380 end,
381 381 d_comp_mode,
382 382 ) = rewritten_entries[rev]
383 383 new_data_size = end - start
384 384 tmp_storage.seek(start)
385 385 new_data = tmp_storage.read(new_data_size)
386 386
387 387 # It might be faster to group continuous read/write operation,
388 388 # however, this is censor, an operation that is not focussed
389 389 # around stellar performance. So I have not written this
390 390 # optimisation yet.
391 391 new_data_offset = new_data_file.tell()
392 392 new_data_file.write(new_data)
393 393
394 394 sidedata_size = entry[ENTRY_SIDEDATA_COMPRESSED_LENGTH]
395 395 new_sidedata_offset = new_sidedata_file.tell()
396 396 if 0 < sidedata_size:
397 397 old_sidedata_offset = entry[ENTRY_SIDEDATA_OFFSET]
398 398 old_sidedata_file.seek(old_sidedata_offset)
399 399 new_sidedata = old_sidedata_file.read(sidedata_size)
400 400 new_sidedata_file.write(new_sidedata)
401 401
402 402 data_uncompressed_length = entry[ENTRY_DATA_UNCOMPRESSED_LENGTH]
403 403 sd_com_mode = entry[ENTRY_SIDEDATA_COMPRESSION_MODE]
404 404 assert data_delta_base <= rev, (data_delta_base, rev)
405 405
406 406 new_entry = revlogutils.entry(
407 407 flags=flags,
408 408 data_offset=new_data_offset,
409 409 data_compressed_length=new_data_size,
410 410 data_uncompressed_length=data_uncompressed_length,
411 411 data_delta_base=data_delta_base,
412 412 link_rev=entry[ENTRY_LINK_REV],
413 413 parent_rev_1=entry[ENTRY_PARENT_1],
414 414 parent_rev_2=entry[ENTRY_PARENT_2],
415 415 node_id=entry[ENTRY_NODE_ID],
416 416 sidedata_offset=new_sidedata_offset,
417 417 sidedata_compressed_length=sidedata_size,
418 418 data_compression_mode=d_comp_mode,
419 419 sidedata_compression_mode=sd_com_mode,
420 420 )
421 421 revlog.index.append(new_entry)
422 422 entry_bin = revlog.index.entry_binary(rev)
423 423 new_index_file.write(entry_bin)
424 424
425 425 revlog._docket.index_end = new_index_file.tell()
426 426 revlog._docket.data_end = new_data_file.tell()
427 427 revlog._docket.sidedata_end = new_sidedata_file.tell()
428 428
429 429
430 430 def _rewrite_censor(
431 431 revlog,
432 432 old_index,
433 433 all_files,
434 434 rev,
435 435 tombstone,
436 436 ):
437 437 """rewrite and append a censored revision"""
438 438 (
439 439 old_data_file,
440 440 old_sidedata_file,
441 441 new_index_file,
442 442 new_data_file,
443 443 new_sidedata_file,
444 444 ) = all_files
445 445 entry = old_index[rev]
446 446
447 447 # XXX consider trying the default compression too
448 448 new_data_size = len(tombstone)
449 449 new_data_offset = new_data_file.tell()
450 450 new_data_file.write(tombstone)
451 451
452 452 # we are not adding any sidedata as they might leak info about the censored version
453 453
454 454 link_rev = entry[ENTRY_LINK_REV]
455 455
456 456 p1 = entry[ENTRY_PARENT_1]
457 457 p2 = entry[ENTRY_PARENT_2]
458 458
459 459 new_entry = revlogutils.entry(
460 460 flags=constants.REVIDX_ISCENSORED,
461 461 data_offset=new_data_offset,
462 462 data_compressed_length=new_data_size,
463 463 data_uncompressed_length=new_data_size,
464 464 data_delta_base=rev,
465 465 link_rev=link_rev,
466 466 parent_rev_1=p1,
467 467 parent_rev_2=p2,
468 468 node_id=entry[ENTRY_NODE_ID],
469 469 sidedata_offset=0,
470 470 sidedata_compressed_length=0,
471 471 data_compression_mode=COMP_MODE_PLAIN,
472 472 sidedata_compression_mode=COMP_MODE_PLAIN,
473 473 )
474 474 revlog.index.append(new_entry)
475 475 entry_bin = revlog.index.entry_binary(rev)
476 476 new_index_file.write(entry_bin)
477 477 revlog._docket.index_end = new_index_file.tell()
478 478 revlog._docket.data_end = new_data_file.tell()
479 479
480 480
481 481 def _get_filename_from_filelog_index(path):
482 482 # Drop the extension and the `data/` prefix
483 483 path_part = path.rsplit(b'.', 1)[0].split(b'/', 1)
484 484 if len(path_part) < 2:
485 485 msg = _(b"cannot recognize filelog from filename: '%s'")
486 486 msg %= path
487 487 raise error.Abort(msg)
488 488
489 489 return path_part[1]
490 490
491 491
492 492 def _filelog_from_filename(repo, path):
493 493 """Returns the filelog for the given `path`. Stolen from `engine.py`"""
494 494
495 495 from .. import filelog # avoid cycle
496 496
497 497 fl = filelog.filelog(repo.svfs, path)
498 498 return fl
499 499
500 500
501 501 def _write_swapped_parents(repo, rl, rev, offset, fp):
502 502 """Swaps p1 and p2 and overwrites the revlog entry for `rev` in `fp`"""
503 503 from ..pure import parsers # avoid cycle
504 504
505 505 if repo._currentlock(repo._lockref) is None:
506 506 # Let's be paranoid about it
507 507 msg = "repo needs to be locked to rewrite parents"
508 508 raise error.ProgrammingError(msg)
509 509
510 510 index_format = parsers.IndexObject.index_format
511 511 entry = rl.index[rev]
512 512 new_entry = list(entry)
513 513 new_entry[5], new_entry[6] = entry[6], entry[5]
514 514 packed = index_format.pack(*new_entry[:8])
515 515 fp.seek(offset)
516 516 fp.write(packed)
517 517
518 518
519 519 def _reorder_filelog_parents(repo, fl, to_fix):
520 520 """
521 521 Swaps p1 and p2 for all `to_fix` revisions of filelog `fl` and writes the
522 522 new version to disk, overwriting the old one with a rename.
523 523 """
524 524 from ..pure import parsers # avoid cycle
525 525
526 526 ui = repo.ui
527 527 assert len(to_fix) > 0
528 528 rl = fl._revlog
529 529 if rl._format_version != constants.REVLOGV1:
530 530 msg = "expected version 1 revlog, got version '%d'" % rl._format_version
531 531 raise error.ProgrammingError(msg)
532 532
533 533 index_file = rl._indexfile
534 534 new_file_path = index_file + b'.tmp-parents-fix'
535 535 repaired_msg = _(b"repaired revision %d of 'filelog %s'\n")
536 536
537 537 with ui.uninterruptible():
538 538 try:
539 539 util.copyfile(
540 540 rl.opener.join(index_file),
541 541 rl.opener.join(new_file_path),
542 542 checkambig=rl._checkambig,
543 543 )
544 544
545 545 with rl.opener(new_file_path, mode=b"r+") as fp:
546 546 if rl._inline:
547 547 index = parsers.InlinedIndexObject(fp.read())
548 548 for rev in fl.revs():
549 549 if rev in to_fix:
550 550 offset = index._calculate_index(rev)
551 551 _write_swapped_parents(repo, rl, rev, offset, fp)
552 552 ui.write(repaired_msg % (rev, index_file))
553 553 else:
554 554 index_format = parsers.IndexObject.index_format
555 555 for rev in to_fix:
556 556 offset = rev * index_format.size
557 557 _write_swapped_parents(repo, rl, rev, offset, fp)
558 558 ui.write(repaired_msg % (rev, index_file))
559 559
560 560 rl.opener.rename(new_file_path, index_file)
561 561 rl.clearcaches()
562 562 rl._loadindex()
563 563 finally:
564 564 util.tryunlink(new_file_path)
565 565
566 566
567 567 def _is_revision_affected(fl, filerev, metadata_cache=None):
568 568 full_text = lambda: fl._revlog.rawdata(filerev)
569 569 parent_revs = lambda: fl._revlog.parentrevs(filerev)
570 570 return _is_revision_affected_inner(
571 571 full_text, parent_revs, filerev, metadata_cache
572 572 )
573 573
574 574
575 575 def _is_revision_affected_inner(
576 576 full_text,
577 577 parents_revs,
578 578 filerev,
579 579 metadata_cache=None,
580 580 ):
581 581 """Mercurial currently (5.9rc0) uses `p1 == nullrev and p2 != nullrev` as a
582 582 special meaning compared to the reverse in the context of filelog-based
583 583 copytracing. issue6528 exists because new code assumed that parent ordering
584 584 didn't matter, so this detects if the revision contains metadata (since
585 585 it's only used for filelog-based copytracing) and its parents are in the
586 586 "wrong" order."""
587 587 try:
588 588 raw_text = full_text()
589 589 except error.CensoredNodeError:
590 590 # We don't care about censored nodes as they never carry metadata
591 591 return False
592 592
593 593 # raw text can be a `memoryview`, which doesn't implement `startswith`
594 594 has_meta = bytes(raw_text[:2]) == b'\x01\n'
595 595 if metadata_cache is not None:
596 596 metadata_cache[filerev] = has_meta
597 597 if has_meta:
598 598 (p1, p2) = parents_revs()
599 599 if p1 != nullrev and p2 == nullrev:
600 600 return True
601 601 return False
602 602
603 603
604 604 def _is_revision_affected_fast(repo, fl, filerev, metadata_cache):
605 605 rl = fl._revlog
606 606 is_censored = lambda: rl.iscensored(filerev)
607 607 delta_base = lambda: rl.deltaparent(filerev)
608 608 delta = lambda: rl._chunk(filerev)
609 609 full_text = lambda: rl.rawdata(filerev)
610 610 parent_revs = lambda: rl.parentrevs(filerev)
611 611 return _is_revision_affected_fast_inner(
612 612 is_censored,
613 613 delta_base,
614 614 delta,
615 615 full_text,
616 616 parent_revs,
617 617 filerev,
618 618 metadata_cache,
619 619 )
620 620
621 621
622 622 def _is_revision_affected_fast_inner(
623 623 is_censored,
624 624 delta_base,
625 625 delta,
626 626 full_text,
627 627 parent_revs,
628 628 filerev,
629 629 metadata_cache,
630 630 ):
631 631 """Optimization fast-path for `_is_revision_affected`.
632 632
633 633 `metadata_cache` is a dict of `{rev: has_metadata}` which allows any
634 634 revision to check if its base has metadata, saving computation of the full
635 635 text, instead looking at the current delta.
636 636
637 637 This optimization only works if the revisions are looked at in order."""
638 638
639 639 if is_censored():
640 640 # Censored revisions don't contain metadata, so they cannot be affected
641 641 metadata_cache[filerev] = False
642 642 return False
643 643
644 644 p1, p2 = parent_revs()
645 645 if p1 == nullrev or p2 != nullrev:
646 646 return False
647 647
648 648 delta_parent = delta_base()
649 649 parent_has_metadata = metadata_cache.get(delta_parent)
650 650 if parent_has_metadata is None:
651 651 return _is_revision_affected_inner(
652 652 full_text,
653 653 parent_revs,
654 654 filerev,
655 655 metadata_cache,
656 656 )
657 657
658 658 chunk = delta()
659 659 if not len(chunk):
660 660 # No diff for this revision
661 661 return parent_has_metadata
662 662
663 663 header_length = 12
664 664 if len(chunk) < header_length:
665 665 raise error.Abort(_(b"patch cannot be decoded"))
666 666
667 667 start, _end, _length = struct.unpack(b">lll", chunk[:header_length])
668 668
669 669 if start < 2: # len(b'\x01\n') == 2
670 670 # This delta does *something* to the metadata marker (if any).
671 671 # Check it the slow way
672 672 is_affected = _is_revision_affected_inner(
673 673 full_text,
674 674 parent_revs,
675 675 filerev,
676 676 metadata_cache,
677 677 )
678 678 return is_affected
679 679
680 680 # The diff did not remove or add the metadata header, it's then in the same
681 681 # situation as its parent
682 682 metadata_cache[filerev] = parent_has_metadata
683 683 return parent_has_metadata
684 684
685 685
686 686 def _from_report(ui, repo, context, from_report, dry_run):
687 687 """
688 688 Fix the revisions given in the `from_report` file, but still checks if the
689 689 revisions are indeed affected to prevent an unfortunate cyclic situation
690 690 where we'd swap well-ordered parents again.
691 691
692 692 See the doc for `debug_fix_issue6528` for the format documentation.
693 693 """
694 694 ui.write(_(b"loading report file '%s'\n") % from_report)
695 695
696 696 with context(), open(from_report, mode='rb') as f:
697 697 for line in f.read().split(b'\n'):
698 698 if not line:
699 699 continue
700 700 filenodes, filename = line.split(b' ', 1)
701 701 fl = _filelog_from_filename(repo, filename)
702 702 to_fix = set(
703 703 fl.rev(binascii.unhexlify(n)) for n in filenodes.split(b',')
704 704 )
705 705 excluded = set()
706 706
707 707 for filerev in to_fix:
708 708 if _is_revision_affected(fl, filerev):
709 709 msg = b"found affected revision %d for filelog '%s'\n"
710 710 ui.warn(msg % (filerev, filename))
711 711 else:
712 712 msg = _(b"revision %s of file '%s' is not affected\n")
713 713 msg %= (binascii.hexlify(fl.node(filerev)), filename)
714 714 ui.warn(msg)
715 715 excluded.add(filerev)
716 716
717 717 to_fix = to_fix - excluded
718 718 if not to_fix:
719 719 msg = _(b"no affected revisions were found for '%s'\n")
720 720 ui.write(msg % filename)
721 721 continue
722 722 if not dry_run:
723 723 _reorder_filelog_parents(repo, fl, sorted(to_fix))
724 724
725 725
726 726 def filter_delta_issue6528(revlog, deltas_iter):
727 727 """filter incomind deltas to repaire issue 6528 on the fly"""
728 728 metadata_cache = {}
729 729
730 730 deltacomputer = deltas.deltacomputer(revlog)
731 731
732 732 for rev, d in enumerate(deltas_iter, len(revlog)):
733 733 (
734 734 node,
735 735 p1_node,
736 736 p2_node,
737 737 linknode,
738 738 deltabase,
739 739 delta,
740 740 flags,
741 741 sidedata,
742 742 ) = d
743 743
744 744 if not revlog.index.has_node(deltabase):
745 745 raise error.LookupError(
746 746 deltabase, revlog.radix, _(b'unknown parent')
747 747 )
748 748 base_rev = revlog.rev(deltabase)
749 749 if not revlog.index.has_node(p1_node):
750 750 raise error.LookupError(p1_node, revlog.radix, _(b'unknown parent'))
751 751 p1_rev = revlog.rev(p1_node)
752 752 if not revlog.index.has_node(p2_node):
753 753 raise error.LookupError(p2_node, revlog.radix, _(b'unknown parent'))
754 754 p2_rev = revlog.rev(p2_node)
755 755
756 756 is_censored = lambda: bool(flags & REVIDX_ISCENSORED)
757 757 delta_base = lambda: revlog.rev(delta_base)
758 758 delta_base = lambda: base_rev
759 759 parent_revs = lambda: (p1_rev, p2_rev)
760 760
761 761 def full_text():
762 762 # note: being able to reuse the full text computation in the
763 763 # underlying addrevision would be useful however this is a bit too
764 764 # intrusive the for the "quick" issue6528 we are writing before the
765 765 # 5.8 release
766 766 textlen = mdiff.patchedsize(revlog.size(base_rev), delta)
767 767
768 768 revinfo = revlogutils.revisioninfo(
769 769 node,
770 770 p1_node,
771 771 p2_node,
772 772 [None],
773 773 textlen,
774 774 (base_rev, delta),
775 775 flags,
776 776 )
777 777 # cached by the global "writing" context
778 778 assert revlog._writinghandles is not None
779 779 if revlog._inline:
780 780 fh = revlog._writinghandles[0]
781 781 else:
782 782 fh = revlog._writinghandles[1]
783 783 return deltacomputer.buildtext(revinfo, fh)
784 784
785 785 is_affected = _is_revision_affected_fast_inner(
786 786 is_censored,
787 787 delta_base,
788 788 lambda: delta,
789 789 full_text,
790 790 parent_revs,
791 791 rev,
792 792 metadata_cache,
793 793 )
794 794 if is_affected:
795 795 d = (
796 796 node,
797 797 p2_node,
798 798 p1_node,
799 799 linknode,
800 800 deltabase,
801 801 delta,
802 802 flags,
803 803 sidedata,
804 804 )
805 805 yield d
806 806
807 807
808 808 def repair_issue6528(
809 809 ui, repo, dry_run=False, to_report=None, from_report=None, paranoid=False
810 810 ):
811 811 @contextlib.contextmanager
812 812 def context():
813 813 if dry_run or to_report: # No need for locking
814 814 yield
815 815 else:
816 816 with repo.wlock(), repo.lock():
817 817 yield
818 818
819 819 if from_report:
820 820 return _from_report(ui, repo, context, from_report, dry_run)
821 821
822 822 report_entries = []
823 823
824 824 with context():
825 825 files = list(
826 826 entry
827 for entry in repo.store.datafiles()
827 for entry in repo.store.data_entries()
828 828 if entry.is_revlog and entry.is_filelog
829 829 )
830 830
831 831 progress = ui.makeprogress(
832 832 _(b"looking for affected revisions"),
833 833 unit=_(b"filelogs"),
834 834 total=len(files),
835 835 )
836 836 found_nothing = True
837 837
838 838 for entry in files:
839 839 progress.increment()
840 840 filename = entry.target_id
841 841 fl = _filelog_from_filename(repo, entry.target_id)
842 842
843 843 # Set of filerevs (or hex filenodes if `to_report`) that need fixing
844 844 to_fix = set()
845 845 metadata_cache = {}
846 846 for filerev in fl.revs():
847 847 affected = _is_revision_affected_fast(
848 848 repo, fl, filerev, metadata_cache
849 849 )
850 850 if paranoid:
851 851 slow = _is_revision_affected(fl, filerev)
852 852 if slow != affected:
853 853 msg = _(b"paranoid check failed for '%s' at node %s")
854 854 node = binascii.hexlify(fl.node(filerev))
855 855 raise error.Abort(msg % (filename, node))
856 856 if affected:
857 857 msg = b"found affected revision %d for file '%s'\n"
858 858 ui.warn(msg % (filerev, filename))
859 859 found_nothing = False
860 860 if not dry_run:
861 861 if to_report:
862 862 to_fix.add(binascii.hexlify(fl.node(filerev)))
863 863 else:
864 864 to_fix.add(filerev)
865 865
866 866 if to_fix:
867 867 to_fix = sorted(to_fix)
868 868 if to_report:
869 869 report_entries.append((filename, to_fix))
870 870 else:
871 871 _reorder_filelog_parents(repo, fl, to_fix)
872 872
873 873 if found_nothing:
874 874 ui.write(_(b"no affected revisions were found\n"))
875 875
876 876 if to_report and report_entries:
877 877 with open(to_report, mode="wb") as f:
878 878 for path, to_fix in report_entries:
879 879 f.write(b"%s %s\n" % (b",".join(to_fix), path))
880 880
881 881 progress.complete()
@@ -1,1079 +1,1081
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _match_tracked_entry(entry, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 if entry.is_filelog:
45 45 return matcher(entry.target_id)
46 46 elif entry.is_manifestlog:
47 47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 49
50 50
51 51 # This avoids a collision between a file named foo and a dir named
52 52 # foo.i or foo.d
53 53 def _encodedir(path):
54 54 """
55 55 >>> _encodedir(b'data/foo.i')
56 56 'data/foo.i'
57 57 >>> _encodedir(b'data/foo.i/bla.i')
58 58 'data/foo.i.hg/bla.i'
59 59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 60 'data/foo.i.hg.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 63 """
64 64 return (
65 65 path.replace(b".hg/", b".hg.hg/")
66 66 .replace(b".i/", b".i.hg/")
67 67 .replace(b".d/", b".d.hg/")
68 68 )
69 69
70 70
71 71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 72
73 73
74 74 def decodedir(path):
75 75 """
76 76 >>> decodedir(b'data/foo.i')
77 77 'data/foo.i'
78 78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 79 'data/foo.i/bla.i'
80 80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 81 'data/foo.i.hg/bla.i'
82 82 """
83 83 if b".hg/" not in path:
84 84 return path
85 85 return (
86 86 path.replace(b".d.hg/", b".d/")
87 87 .replace(b".i.hg/", b".i/")
88 88 .replace(b".hg.hg/", b".hg/")
89 89 )
90 90
91 91
92 92 def _reserved():
93 93 """characters that are problematic for filesystems
94 94
95 95 * ascii escapes (0..31)
96 96 * ascii hi (126..255)
97 97 * windows specials
98 98
99 99 these characters will be escaped by encodefunctions
100 100 """
101 101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 102 for x in range(32):
103 103 yield x
104 104 for x in range(126, 256):
105 105 yield x
106 106 for x in winreserved:
107 107 yield x
108 108
109 109
110 110 def _buildencodefun():
111 111 """
112 112 >>> enc, dec = _buildencodefun()
113 113
114 114 >>> enc(b'nothing/special.txt')
115 115 'nothing/special.txt'
116 116 >>> dec(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118
119 119 >>> enc(b'HELLO')
120 120 '_h_e_l_l_o'
121 121 >>> dec(b'_h_e_l_l_o')
122 122 'HELLO'
123 123
124 124 >>> enc(b'hello:world?')
125 125 'hello~3aworld~3f'
126 126 >>> dec(b'hello~3aworld~3f')
127 127 'hello:world?'
128 128
129 129 >>> enc(b'the\\x07quick\\xADshot')
130 130 'the~07quick~adshot'
131 131 >>> dec(b'the~07quick~adshot')
132 132 'the\\x07quick\\xadshot'
133 133 """
134 134 e = b'_'
135 135 xchr = pycompat.bytechr
136 136 asciistr = list(map(xchr, range(127)))
137 137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 138
139 139 cmap = {x: x for x in asciistr}
140 140 for x in _reserved():
141 141 cmap[xchr(x)] = b"~%02x" % x
142 142 for x in capitals + [ord(e)]:
143 143 cmap[xchr(x)] = e + xchr(x).lower()
144 144
145 145 dmap = {}
146 146 for k, v in cmap.items():
147 147 dmap[v] = k
148 148
149 149 def decode(s):
150 150 i = 0
151 151 while i < len(s):
152 152 for l in range(1, 4):
153 153 try:
154 154 yield dmap[s[i : i + l]]
155 155 i += l
156 156 break
157 157 except KeyError:
158 158 pass
159 159 else:
160 160 raise KeyError
161 161
162 162 return (
163 163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 164 lambda s: b''.join(list(decode(s))),
165 165 )
166 166
167 167
168 168 _encodefname, _decodefname = _buildencodefun()
169 169
170 170
171 171 def encodefilename(s):
172 172 """
173 173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 175 """
176 176 return _encodefname(encodedir(s))
177 177
178 178
179 179 def decodefilename(s):
180 180 """
181 181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 183 """
184 184 return decodedir(_decodefname(s))
185 185
186 186
187 187 def _buildlowerencodefun():
188 188 """
189 189 >>> f = _buildlowerencodefun()
190 190 >>> f(b'nothing/special.txt')
191 191 'nothing/special.txt'
192 192 >>> f(b'HELLO')
193 193 'hello'
194 194 >>> f(b'hello:world?')
195 195 'hello~3aworld~3f'
196 196 >>> f(b'the\\x07quick\\xADshot')
197 197 'the~07quick~adshot'
198 198 """
199 199 xchr = pycompat.bytechr
200 200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 201 for x in _reserved():
202 202 cmap[xchr(x)] = b"~%02x" % x
203 203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 204 cmap[xchr(x)] = xchr(x).lower()
205 205
206 206 def lowerencode(s):
207 207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 208
209 209 return lowerencode
210 210
211 211
212 212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 213
214 214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 217
218 218
219 219 def _auxencode(path, dotencode):
220 220 """
221 221 Encodes filenames containing names reserved by Windows or which end in
222 222 period or space. Does not touch other single reserved characters c.
223 223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 224 Additionally encodes space or period at the beginning, if dotencode is
225 225 True. Parameter path is assumed to be all lowercase.
226 226 A segment only needs encoding if a reserved name appears as a
227 227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 228 doesn't need encoding.
229 229
230 230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 231 >>> _auxencode(s.split(b'/'), True)
232 232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 234 >>> _auxencode(s.split(b'/'), False)
235 235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 236 >>> _auxencode([b'foo. '], True)
237 237 ['foo.~20']
238 238 >>> _auxencode([b' .foo'], True)
239 239 ['~20.foo']
240 240 """
241 241 for i, n in enumerate(path):
242 242 if not n:
243 243 continue
244 244 if dotencode and n[0] in b'. ':
245 245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 246 path[i] = n
247 247 else:
248 248 l = n.find(b'.')
249 249 if l == -1:
250 250 l = len(n)
251 251 if (l == 3 and n[:3] in _winres3) or (
252 252 l == 4
253 253 and n[3:4] <= b'9'
254 254 and n[3:4] >= b'1'
255 255 and n[:3] in _winres4
256 256 ):
257 257 # encode third letter ('aux' -> 'au~78')
258 258 ec = b"~%02x" % ord(n[2:3])
259 259 n = n[0:2] + ec + n[3:]
260 260 path[i] = n
261 261 if n[-1] in b'. ':
262 262 # encode last period or space ('foo...' -> 'foo..~2e')
263 263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 264 return path
265 265
266 266
267 267 _maxstorepathlen = 120
268 268 _dirprefixlen = 8
269 269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 270
271 271
272 272 def _hashencode(path, dotencode):
273 273 digest = hex(hashutil.sha1(path).digest())
274 274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 275 parts = _auxencode(le, dotencode)
276 276 basename = parts[-1]
277 277 _root, ext = os.path.splitext(basename)
278 278 sdirs = []
279 279 sdirslen = 0
280 280 for p in parts[:-1]:
281 281 d = p[:_dirprefixlen]
282 282 if d[-1] in b'. ':
283 283 # Windows can't access dirs ending in period or space
284 284 d = d[:-1] + b'_'
285 285 if sdirslen == 0:
286 286 t = len(d)
287 287 else:
288 288 t = sdirslen + 1 + len(d)
289 289 if t > _maxshortdirslen:
290 290 break
291 291 sdirs.append(d)
292 292 sdirslen = t
293 293 dirs = b'/'.join(sdirs)
294 294 if len(dirs) > 0:
295 295 dirs += b'/'
296 296 res = b'dh/' + dirs + digest + ext
297 297 spaceleft = _maxstorepathlen - len(res)
298 298 if spaceleft > 0:
299 299 filler = basename[:spaceleft]
300 300 res = b'dh/' + dirs + filler + digest + ext
301 301 return res
302 302
303 303
304 304 def _hybridencode(path, dotencode):
305 305 """encodes path with a length limit
306 306
307 307 Encodes all paths that begin with 'data/', according to the following.
308 308
309 309 Default encoding (reversible):
310 310
311 311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 312 characters are encoded as '~xx', where xx is the two digit hex code
313 313 of the character (see encodefilename).
314 314 Relevant path components consisting of Windows reserved filenames are
315 315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 316
317 317 Hashed encoding (not reversible):
318 318
319 319 If the default-encoded path is longer than _maxstorepathlen, a
320 320 non-reversible hybrid hashing of the path is done instead.
321 321 This encoding uses up to _dirprefixlen characters of all directory
322 322 levels of the lowerencoded path, but not more levels than can fit into
323 323 _maxshortdirslen.
324 324 Then follows the filler followed by the sha digest of the full path.
325 325 The filler is the beginning of the basename of the lowerencoded path
326 326 (the basename is everything after the last path separator). The filler
327 327 is as long as possible, filling in characters from the basename until
328 328 the encoded path has _maxstorepathlen characters (or all chars of the
329 329 basename have been taken).
330 330 The extension (e.g. '.i' or '.d') is preserved.
331 331
332 332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 333 encoding was used.
334 334 """
335 335 path = encodedir(path)
336 336 ef = _encodefname(path).split(b'/')
337 337 res = b'/'.join(_auxencode(ef, dotencode))
338 338 if len(res) > _maxstorepathlen:
339 339 res = _hashencode(path, dotencode)
340 340 return res
341 341
342 342
343 343 def _pathencode(path):
344 344 de = encodedir(path)
345 345 if len(path) > _maxstorepathlen:
346 346 return _hashencode(de, True)
347 347 ef = _encodefname(de).split(b'/')
348 348 res = b'/'.join(_auxencode(ef, True))
349 349 if len(res) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 return res
352 352
353 353
354 354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 355
356 356
357 357 def _plainhybridencode(f):
358 358 return _hybridencode(f, False)
359 359
360 360
361 361 def _calcmode(vfs):
362 362 try:
363 363 # files in .hg/ will be created using this mode
364 364 mode = vfs.stat().st_mode
365 365 # avoid some useless chmods
366 366 if (0o777 & ~util.umask) == (0o777 & mode):
367 367 mode = None
368 368 except OSError:
369 369 mode = None
370 370 return mode
371 371
372 372
373 373 _data = [
374 374 b'bookmarks',
375 375 b'narrowspec',
376 376 b'data',
377 377 b'meta',
378 378 b'00manifest.d',
379 379 b'00manifest.i',
380 380 b'00changelog.d',
381 381 b'00changelog.i',
382 382 b'phaseroots',
383 383 b'obsstore',
384 384 b'requires',
385 385 ]
386 386
387 387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 388 REVLOG_FILES_OTHER_EXT = (
389 389 b'.idx',
390 390 b'.d',
391 391 b'.dat',
392 392 b'.n',
393 393 b'.nd',
394 394 b'.sda',
395 395 )
396 396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 397 REVLOG_FILES_LONG_EXT = (
398 398 b'.nd',
399 399 b'.idx',
400 400 b'.dat',
401 401 b'.sda',
402 402 )
403 403 # files that are "volatile" and might change between listing and streaming
404 404 #
405 405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 406 # deleted.
407 407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 408
409 409 # some exception to the above matching
410 410 #
411 411 # XXX This is currently not in use because of issue6542
412 412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 413
414 414
415 415 def is_revlog(f, kind, st):
416 416 if kind != stat.S_IFREG:
417 417 return None
418 418 return revlog_type(f)
419 419
420 420
421 421 def revlog_type(f):
422 422 # XXX we need to filter `undo.` created by the transaction here, however
423 423 # being naive about it also filter revlog for `undo.*` files, leading to
424 424 # issue6542. So we no longer use EXCLUDED.
425 425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 426 return FILEFLAGS_REVLOG_MAIN
427 427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 428 t = FILETYPE_FILELOG_OTHER
429 429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 430 t |= FILEFLAGS_VOLATILE
431 431 return t
432 432 return None
433 433
434 434
435 435 # the file is part of changelog data
436 436 FILEFLAGS_CHANGELOG = 1 << 13
437 437 # the file is part of manifest data
438 438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 439 # the file is part of filelog data
440 440 FILEFLAGS_FILELOG = 1 << 11
441 441 # file that are not directly part of a revlog
442 442 FILEFLAGS_OTHER = 1 << 10
443 443
444 444 # the main entry point for a revlog
445 445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 446 # a secondary file for a revlog
447 447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 448
449 449 # files that are "volatile" and might change between listing and streaming
450 450 FILEFLAGS_VOLATILE = 1 << 20
451 451
452 452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_OTHER = FILEFLAGS_OTHER
459 459
460 460
461 461 @attr.s(slots=True, init=False)
462 462 class BaseStoreEntry:
463 463 """An entry in the store
464 464
465 465 This is returned by `store.walk` and represent some data in the store."""
466 466
467 467
468 468 @attr.s(slots=True, init=False)
469 469 class SimpleStoreEntry(BaseStoreEntry):
470 470 """A generic entry in the store"""
471 471
472 472 is_revlog = False
473 473
474 474 _entry_path = attr.ib()
475 475 _is_volatile = attr.ib(default=False)
476 476 _file_size = attr.ib(default=None)
477 477
478 478 def __init__(
479 479 self,
480 480 entry_path,
481 481 is_volatile=False,
482 482 file_size=None,
483 483 ):
484 484 super().__init__()
485 485 self._entry_path = entry_path
486 486 self._is_volatile = is_volatile
487 487 self._file_size = file_size
488 488
489 489 def files(self):
490 490 return [
491 491 StoreFile(
492 492 unencoded_path=self._entry_path,
493 493 file_size=self._file_size,
494 494 is_volatile=self._is_volatile,
495 495 )
496 496 ]
497 497
498 498
499 499 @attr.s(slots=True, init=False)
500 500 class RevlogStoreEntry(BaseStoreEntry):
501 501 """A revlog entry in the store"""
502 502
503 503 is_revlog = True
504 504
505 505 revlog_type = attr.ib(default=None)
506 506 target_id = attr.ib(default=None)
507 507 _path_prefix = attr.ib(default=None)
508 508 _details = attr.ib(default=None)
509 509
510 510 def __init__(
511 511 self,
512 512 revlog_type,
513 513 path_prefix,
514 514 target_id,
515 515 details,
516 516 ):
517 517 super().__init__()
518 518 self.revlog_type = revlog_type
519 519 self.target_id = target_id
520 520 self._path_prefix = path_prefix
521 521 assert b'.i' in details, (path_prefix, details)
522 522 self._details = details
523 523
524 524 @property
525 525 def is_changelog(self):
526 526 return self.revlog_type & FILEFLAGS_CHANGELOG
527 527
528 528 @property
529 529 def is_manifestlog(self):
530 530 return self.revlog_type & FILEFLAGS_MANIFESTLOG
531 531
532 532 @property
533 533 def is_filelog(self):
534 534 return self.revlog_type & FILEFLAGS_FILELOG
535 535
536 536 def main_file_path(self):
537 537 """unencoded path of the main revlog file"""
538 538 return self._path_prefix + b'.i'
539 539
540 540 def files(self):
541 541 files = []
542 542 for ext in sorted(self._details, key=_ext_key):
543 543 path = self._path_prefix + ext
544 544 data = self._details[ext]
545 545 files.append(StoreFile(unencoded_path=path, **data))
546 546 return files
547 547
548 548
549 549 @attr.s(slots=True)
550 550 class StoreFile:
551 551 """a file matching an entry"""
552 552
553 553 unencoded_path = attr.ib()
554 554 _file_size = attr.ib(default=None)
555 555 is_volatile = attr.ib(default=False)
556 556
557 557 def file_size(self, vfs):
558 558 if self._file_size is not None:
559 559 return self._file_size
560 560 try:
561 561 return vfs.stat(self.unencoded_path).st_size
562 562 except FileNotFoundError:
563 563 return 0
564 564
565 565
566 566 def _gather_revlog(files_data):
567 567 """group files per revlog prefix
568 568
569 569 The returns a two level nested dict. The top level key is the revlog prefix
570 570 without extension, the second level is all the file "suffix" that were
571 571 seen for this revlog and arbitrary file data as value.
572 572 """
573 573 revlogs = collections.defaultdict(dict)
574 574 for u, value in files_data:
575 575 name, ext = _split_revlog_ext(u)
576 576 revlogs[name][ext] = value
577 577 return sorted(revlogs.items())
578 578
579 579
580 580 def _split_revlog_ext(filename):
581 581 """split the revlog file prefix from the variable extension"""
582 582 if filename.endswith(REVLOG_FILES_LONG_EXT):
583 583 char = b'-'
584 584 else:
585 585 char = b'.'
586 586 idx = filename.rfind(char)
587 587 return filename[:idx], filename[idx:]
588 588
589 589
590 590 def _ext_key(ext):
591 591 """a key to order revlog suffix
592 592
593 593 important to issue .i after other entry."""
594 594 # the only important part of this order is to keep the `.i` last.
595 595 if ext.endswith(b'.n'):
596 596 return (0, ext)
597 597 elif ext.endswith(b'.nd'):
598 598 return (10, ext)
599 599 elif ext.endswith(b'.d'):
600 600 return (20, ext)
601 601 elif ext.endswith(b'.i'):
602 602 return (50, ext)
603 603 else:
604 604 return (40, ext)
605 605
606 606
607 607 class basicstore:
608 608 '''base class for local repository stores'''
609 609
610 610 def __init__(self, path, vfstype):
611 611 vfs = vfstype(path)
612 612 self.path = vfs.base
613 613 self.createmode = _calcmode(vfs)
614 614 vfs.createmode = self.createmode
615 615 self.rawvfs = vfs
616 616 self.vfs = vfsmod.filtervfs(vfs, encodedir)
617 617 self.opener = self.vfs
618 618
619 619 def join(self, f):
620 620 return self.path + b'/' + encodedir(f)
621 621
622 622 def _walk(self, relpath, recurse, undecodable=None):
623 623 '''yields (revlog_type, unencoded, size)'''
624 624 path = self.path
625 625 if relpath:
626 626 path += b'/' + relpath
627 627 striplen = len(self.path) + 1
628 628 l = []
629 629 if self.rawvfs.isdir(path):
630 630 visit = [path]
631 631 readdir = self.rawvfs.readdir
632 632 while visit:
633 633 p = visit.pop()
634 634 for f, kind, st in readdir(p, stat=True):
635 635 fp = p + b'/' + f
636 636 rl_type = is_revlog(f, kind, st)
637 637 if rl_type is not None:
638 638 n = util.pconvert(fp[striplen:])
639 639 l.append((decodedir(n), (rl_type, st.st_size)))
640 640 elif kind == stat.S_IFDIR and recurse:
641 641 visit.append(fp)
642 642
643 643 l.sort()
644 644 return l
645 645
646 646 def changelog(self, trypending, concurrencychecker=None):
647 647 return changelog.changelog(
648 648 self.vfs,
649 649 trypending=trypending,
650 650 concurrencychecker=concurrencychecker,
651 651 )
652 652
653 653 def manifestlog(self, repo, storenarrowmatch):
654 654 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
655 655 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
656 656
657 def datafiles(
657 def data_entries(
658 658 self, matcher=None, undecodable=None
659 659 ) -> Generator[BaseStoreEntry, None, None]:
660 660 """Like walk, but excluding the changelog and root manifest.
661 661
662 662 When [undecodable] is None, revlogs names that can't be
663 663 decoded cause an exception. When it is provided, it should
664 664 be a list and the filenames that can't be decoded are added
665 665 to it instead. This is very rarely needed."""
666 666 dirs = [
667 667 (b'data', FILEFLAGS_FILELOG),
668 668 (b'meta', FILEFLAGS_MANIFESTLOG),
669 669 ]
670 670 for base_dir, rl_type in dirs:
671 671 files = self._walk(base_dir, True, undecodable=undecodable)
672 672 files = (f for f in files if f[1][0] is not None)
673 673 for revlog, details in _gather_revlog(files):
674 674 file_details = {}
675 675 revlog_target_id = revlog.split(b'/', 1)[1]
676 676 for ext, (t, s) in sorted(details.items()):
677 677 file_details[ext] = {
678 678 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
679 679 'file_size': s,
680 680 }
681 681 yield RevlogStoreEntry(
682 682 path_prefix=revlog,
683 683 revlog_type=rl_type,
684 684 target_id=revlog_target_id,
685 685 details=file_details,
686 686 )
687 687
688 688 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
689 689 files = reversed(self._walk(b'', False))
690 690
691 691 changelogs = collections.defaultdict(dict)
692 692 manifestlogs = collections.defaultdict(dict)
693 693
694 694 for u, (t, s) in files:
695 695 if u.startswith(b'00changelog'):
696 696 name, ext = _split_revlog_ext(u)
697 697 changelogs[name][ext] = (t, s)
698 698 elif u.startswith(b'00manifest'):
699 699 name, ext = _split_revlog_ext(u)
700 700 manifestlogs[name][ext] = (t, s)
701 701 else:
702 702 yield SimpleStoreEntry(
703 703 entry_path=u,
704 704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
705 705 file_size=s,
706 706 )
707 707 # yield manifest before changelog
708 708 top_rl = [
709 709 (manifestlogs, FILEFLAGS_MANIFESTLOG),
710 710 (changelogs, FILEFLAGS_CHANGELOG),
711 711 ]
712 712 assert len(manifestlogs) <= 1
713 713 assert len(changelogs) <= 1
714 714 for data, revlog_type in top_rl:
715 715 for revlog, details in sorted(data.items()):
716 716 file_details = {}
717 717 for ext, (t, s) in details.items():
718 718 file_details[ext] = {
719 719 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
720 720 'file_size': s,
721 721 }
722 722 yield RevlogStoreEntry(
723 723 path_prefix=revlog,
724 724 revlog_type=revlog_type,
725 725 target_id=b'',
726 726 details=file_details,
727 727 )
728 728
729 729 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
730 730 """return files related to data storage (ie: revlogs)
731 731
732 732 yields (file_type, unencoded, size)
733 733
734 734 if a matcher is passed, storage files of only those tracked paths
735 735 are passed with matches the matcher
736 736 """
737 737 # yield data files first
738 for x in self.datafiles(matcher):
738 for x in self.data_entries(matcher):
739 739 yield x
740 740 for x in self.topfiles():
741 741 yield x
742 742
743 743 def copylist(self):
744 744 return _data
745 745
746 746 def write(self, tr):
747 747 pass
748 748
749 749 def invalidatecaches(self):
750 750 pass
751 751
752 752 def markremoved(self, fn):
753 753 pass
754 754
755 755 def __contains__(self, path):
756 756 '''Checks if the store contains path'''
757 757 path = b"/".join((b"data", path))
758 758 # file?
759 759 if self.vfs.exists(path + b".i"):
760 760 return True
761 761 # dir?
762 762 if not path.endswith(b"/"):
763 763 path = path + b"/"
764 764 return self.vfs.exists(path)
765 765
766 766
767 767 class encodedstore(basicstore):
768 768 def __init__(self, path, vfstype):
769 769 vfs = vfstype(path + b'/store')
770 770 self.path = vfs.base
771 771 self.createmode = _calcmode(vfs)
772 772 vfs.createmode = self.createmode
773 773 self.rawvfs = vfs
774 774 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
775 775 self.opener = self.vfs
776 776
777 777 def _walk(self, relpath, recurse, undecodable=None):
778 778 old = super()._walk(relpath, recurse)
779 779 new = []
780 780 for f1, value in old:
781 781 try:
782 782 f2 = decodefilename(f1)
783 783 except KeyError:
784 784 if undecodable is None:
785 785 msg = _(b'undecodable revlog name %s') % f1
786 786 raise error.StorageError(msg)
787 787 else:
788 788 undecodable.append(f1)
789 789 continue
790 790 new.append((f2, value))
791 791 return new
792 792
793 def datafiles(
793 def data_entries(
794 794 self, matcher=None, undecodable=None
795 795 ) -> Generator[BaseStoreEntry, None, None]:
796 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
796 entries = super(encodedstore, self).data_entries(
797 undecodable=undecodable
798 )
797 799 for entry in entries:
798 800 if _match_tracked_entry(entry, matcher):
799 801 yield entry
800 802
801 803 def join(self, f):
802 804 return self.path + b'/' + encodefilename(f)
803 805
804 806 def copylist(self):
805 807 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
806 808
807 809
808 810 class fncache:
809 811 # the filename used to be partially encoded
810 812 # hence the encodedir/decodedir dance
811 813 def __init__(self, vfs):
812 814 self.vfs = vfs
813 815 self._ignores = set()
814 816 self.entries = None
815 817 self._dirty = False
816 818 # set of new additions to fncache
817 819 self.addls = set()
818 820
819 821 def ensureloaded(self, warn=None):
820 822 """read the fncache file if not already read.
821 823
822 824 If the file on disk is corrupted, raise. If warn is provided,
823 825 warn and keep going instead."""
824 826 if self.entries is None:
825 827 self._load(warn)
826 828
827 829 def _load(self, warn=None):
828 830 '''fill the entries from the fncache file'''
829 831 self._dirty = False
830 832 try:
831 833 fp = self.vfs(b'fncache', mode=b'rb')
832 834 except IOError:
833 835 # skip nonexistent file
834 836 self.entries = set()
835 837 return
836 838
837 839 self.entries = set()
838 840 chunk = b''
839 841 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
840 842 chunk += c
841 843 try:
842 844 p = chunk.rindex(b'\n')
843 845 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
844 846 chunk = chunk[p + 1 :]
845 847 except ValueError:
846 848 # substring '\n' not found, maybe the entry is bigger than the
847 849 # chunksize, so let's keep iterating
848 850 pass
849 851
850 852 if chunk:
851 853 msg = _(b"fncache does not ends with a newline")
852 854 if warn:
853 855 warn(msg + b'\n')
854 856 else:
855 857 raise error.Abort(
856 858 msg,
857 859 hint=_(
858 860 b"use 'hg debugrebuildfncache' to "
859 861 b"rebuild the fncache"
860 862 ),
861 863 )
862 864 self._checkentries(fp, warn)
863 865 fp.close()
864 866
865 867 def _checkentries(self, fp, warn):
866 868 """make sure there is no empty string in entries"""
867 869 if b'' in self.entries:
868 870 fp.seek(0)
869 871 for n, line in enumerate(fp):
870 872 if not line.rstrip(b'\n'):
871 873 t = _(b'invalid entry in fncache, line %d') % (n + 1)
872 874 if warn:
873 875 warn(t + b'\n')
874 876 else:
875 877 raise error.Abort(t)
876 878
877 879 def write(self, tr):
878 880 if self._dirty:
879 881 assert self.entries is not None
880 882 self.entries = self.entries | self.addls
881 883 self.addls = set()
882 884 tr.addbackup(b'fncache')
883 885 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
884 886 if self.entries:
885 887 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
886 888 fp.close()
887 889 self._dirty = False
888 890 if self.addls:
889 891 # if we have just new entries, let's append them to the fncache
890 892 tr.addbackup(b'fncache')
891 893 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
892 894 if self.addls:
893 895 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
894 896 fp.close()
895 897 self.entries = None
896 898 self.addls = set()
897 899
898 900 def addignore(self, fn):
899 901 self._ignores.add(fn)
900 902
901 903 def add(self, fn):
902 904 if fn in self._ignores:
903 905 return
904 906 if self.entries is None:
905 907 self._load()
906 908 if fn not in self.entries:
907 909 self.addls.add(fn)
908 910
909 911 def remove(self, fn):
910 912 if self.entries is None:
911 913 self._load()
912 914 if fn in self.addls:
913 915 self.addls.remove(fn)
914 916 return
915 917 try:
916 918 self.entries.remove(fn)
917 919 self._dirty = True
918 920 except KeyError:
919 921 pass
920 922
921 923 def __contains__(self, fn):
922 924 if fn in self.addls:
923 925 return True
924 926 if self.entries is None:
925 927 self._load()
926 928 return fn in self.entries
927 929
928 930 def __iter__(self):
929 931 if self.entries is None:
930 932 self._load()
931 933 return iter(self.entries | self.addls)
932 934
933 935
934 936 class _fncachevfs(vfsmod.proxyvfs):
935 937 def __init__(self, vfs, fnc, encode):
936 938 vfsmod.proxyvfs.__init__(self, vfs)
937 939 self.fncache = fnc
938 940 self.encode = encode
939 941
940 942 def __call__(self, path, mode=b'r', *args, **kw):
941 943 encoded = self.encode(path)
942 944 if (
943 945 mode not in (b'r', b'rb')
944 946 and (path.startswith(b'data/') or path.startswith(b'meta/'))
945 947 and revlog_type(path) is not None
946 948 ):
947 949 # do not trigger a fncache load when adding a file that already is
948 950 # known to exist.
949 951 notload = self.fncache.entries is None and self.vfs.exists(encoded)
950 952 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
951 953 # when appending to an existing file, if the file has size zero,
952 954 # it should be considered as missing. Such zero-size files are
953 955 # the result of truncation when a transaction is aborted.
954 956 notload = False
955 957 if not notload:
956 958 self.fncache.add(path)
957 959 return self.vfs(encoded, mode, *args, **kw)
958 960
959 961 def join(self, path):
960 962 if path:
961 963 return self.vfs.join(self.encode(path))
962 964 else:
963 965 return self.vfs.join(path)
964 966
965 967 def register_file(self, path):
966 968 """generic hook point to lets fncache steer its stew"""
967 969 if path.startswith(b'data/') or path.startswith(b'meta/'):
968 970 self.fncache.add(path)
969 971
970 972
971 973 class fncachestore(basicstore):
972 974 def __init__(self, path, vfstype, dotencode):
973 975 if dotencode:
974 976 encode = _pathencode
975 977 else:
976 978 encode = _plainhybridencode
977 979 self.encode = encode
978 980 vfs = vfstype(path + b'/store')
979 981 self.path = vfs.base
980 982 self.pathsep = self.path + b'/'
981 983 self.createmode = _calcmode(vfs)
982 984 vfs.createmode = self.createmode
983 985 self.rawvfs = vfs
984 986 fnc = fncache(vfs)
985 987 self.fncache = fnc
986 988 self.vfs = _fncachevfs(vfs, fnc, encode)
987 989 self.opener = self.vfs
988 990
989 991 def join(self, f):
990 992 return self.pathsep + self.encode(f)
991 993
992 994 def getsize(self, path):
993 995 return self.rawvfs.stat(path).st_size
994 996
995 def datafiles(
997 def data_entries(
996 998 self, matcher=None, undecodable=None
997 999 ) -> Generator[BaseStoreEntry, None, None]:
998 1000 files = ((f, revlog_type(f)) for f in self.fncache)
999 1001 # Note: all files in fncache should be revlog related, However the
1000 1002 # fncache might contains such file added by previous version of
1001 1003 # Mercurial.
1002 1004 files = (f for f in files if f[1] is not None)
1003 1005 by_revlog = _gather_revlog(files)
1004 1006 for revlog, details in by_revlog:
1005 1007 file_details = {}
1006 1008 if revlog.startswith(b'data/'):
1007 1009 rl_type = FILEFLAGS_FILELOG
1008 1010 revlog_target_id = revlog.split(b'/', 1)[1]
1009 1011 elif revlog.startswith(b'meta/'):
1010 1012 rl_type = FILEFLAGS_MANIFESTLOG
1011 1013 # drop the initial directory and the `00manifest` file part
1012 1014 tmp = revlog.split(b'/', 1)[1]
1013 1015 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1014 1016 else:
1015 1017 # unreachable
1016 1018 assert False, revlog
1017 1019 for ext, t in details.items():
1018 1020 file_details[ext] = {
1019 1021 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1020 1022 }
1021 1023 entry = RevlogStoreEntry(
1022 1024 path_prefix=revlog,
1023 1025 revlog_type=rl_type,
1024 1026 target_id=revlog_target_id,
1025 1027 details=file_details,
1026 1028 )
1027 1029 if _match_tracked_entry(entry, matcher):
1028 1030 yield entry
1029 1031
1030 1032 def copylist(self):
1031 1033 d = (
1032 1034 b'bookmarks',
1033 1035 b'narrowspec',
1034 1036 b'data',
1035 1037 b'meta',
1036 1038 b'dh',
1037 1039 b'fncache',
1038 1040 b'phaseroots',
1039 1041 b'obsstore',
1040 1042 b'00manifest.d',
1041 1043 b'00manifest.i',
1042 1044 b'00changelog.d',
1043 1045 b'00changelog.i',
1044 1046 b'requires',
1045 1047 )
1046 1048 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1047 1049
1048 1050 def write(self, tr):
1049 1051 self.fncache.write(tr)
1050 1052
1051 1053 def invalidatecaches(self):
1052 1054 self.fncache.entries = None
1053 1055 self.fncache.addls = set()
1054 1056
1055 1057 def markremoved(self, fn):
1056 1058 self.fncache.remove(fn)
1057 1059
1058 1060 def _exists(self, f):
1059 1061 ef = self.encode(f)
1060 1062 try:
1061 1063 self.getsize(ef)
1062 1064 return True
1063 1065 except FileNotFoundError:
1064 1066 return False
1065 1067
1066 1068 def __contains__(self, path):
1067 1069 '''Checks if the store contains path'''
1068 1070 path = b"/".join((b"data", path))
1069 1071 # check for files (exact match)
1070 1072 e = path + b'.i'
1071 1073 if e in self.fncache and self._exists(e):
1072 1074 return True
1073 1075 # now check for directories (prefix match)
1074 1076 if not path.endswith(b'/'):
1075 1077 path += b'/'
1076 1078 for e in self.fncache:
1077 1079 if e.startswith(path) and self._exists(e):
1078 1080 return True
1079 1081 return False
@@ -1,627 +1,627
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import os
10 10
11 11 from .i18n import _
12 12 from .node import short
13 13 from .utils import stringutil
14 14
15 15 from . import (
16 16 error,
17 17 pycompat,
18 18 requirements,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 HINT_FNCACHE = _(
42 42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 43 )
44 44
45 45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 46 b"parent-directory manifest refers to unknown revision %s"
47 47 )
48 48
49 49 WARN_UNKNOWN_COPY_SOURCE = _(
50 50 b"warning: copy source of '%s' not in parents of %s"
51 51 )
52 52
53 53 WARN_NULLID_COPY_SOURCE = _(
54 54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 55 )
56 56
57 57
58 58 class verifier:
59 59 def __init__(self, repo, level=None):
60 60 self.repo = repo.unfiltered()
61 61 self.ui = repo.ui
62 62 self.match = repo.narrowmatch()
63 63 if level is None:
64 64 level = VERIFY_DEFAULT
65 65 self._level = level
66 66 self.badrevs = set()
67 67 self.errors = 0
68 68 self.warnings = 0
69 69 self.havecl = len(repo.changelog) > 0
70 70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 73 self.refersmf = False
74 74 self.fncachewarned = False
75 75 # developer config: verify.skipflags
76 76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 77 self.warnorphanstorefiles = True
78 78
79 79 def _warn(self, msg):
80 80 """record a "warning" level issue"""
81 81 self.ui.warn(msg + b"\n")
82 82 self.warnings += 1
83 83
84 84 def _err(self, linkrev, msg, filename=None):
85 85 """record a "error" level issue"""
86 86 if linkrev is not None:
87 87 self.badrevs.add(linkrev)
88 88 linkrev = b"%d" % linkrev
89 89 else:
90 90 linkrev = b'?'
91 91 msg = b"%s: %s" % (linkrev, msg)
92 92 if filename:
93 93 msg = b"%s@%s" % (filename, msg)
94 94 self.ui.warn(b" " + msg + b"\n")
95 95 self.errors += 1
96 96
97 97 def _exc(self, linkrev, msg, inst, filename=None):
98 98 """record exception raised during the verify process"""
99 99 fmsg = stringutil.forcebytestr(inst)
100 100 if not fmsg:
101 101 fmsg = pycompat.byterepr(inst)
102 102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 103
104 104 def _checkrevlog(self, obj, name, linkrev):
105 105 """verify high level property of a revlog
106 106
107 107 - revlog is present,
108 108 - revlog is non-empty,
109 109 - sizes (index and data) are correct,
110 110 - revlog's format version is correct.
111 111 """
112 112 if not len(obj) and (self.havecl or self.havemf):
113 113 self._err(linkrev, _(b"empty or missing %s") % name)
114 114 return
115 115
116 116 d = obj.checksize()
117 117 if d[0]:
118 118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 119 if d[1]:
120 120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 121
122 122 if obj._format_version != revlog.REVLOGV0:
123 123 if not self.revlogv1:
124 124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 125 elif self.revlogv1:
126 126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 127
128 128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 129 """verify a single revlog entry
130 130
131 131 arguments are:
132 132 - obj: the source revlog
133 133 - i: the revision number
134 134 - node: the revision node id
135 135 - seen: nodes previously seen for this revlog
136 136 - linkrevs: [changelog-revisions] introducing "node"
137 137 - f: string label ("changelog", "manifest", or filename)
138 138
139 139 Performs the following checks:
140 140 - linkrev points to an existing changelog revision,
141 141 - linkrev points to a changelog revision that introduces this revision,
142 142 - linkrev points to the lowest of these changesets,
143 143 - both parents exist in the revlog,
144 144 - the revision is not duplicated.
145 145
146 146 Return the linkrev of the revision (or None for changelog's revisions).
147 147 """
148 148 lr = obj.linkrev(obj.rev(node))
149 149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 150 if lr < 0 or lr >= len(self.repo.changelog):
151 151 msg = _(b"rev %d points to nonexistent changeset %d")
152 152 else:
153 153 msg = _(b"rev %d points to unexpected changeset %d")
154 154 self._err(None, msg % (i, lr), f)
155 155 if linkrevs:
156 156 if f and len(linkrevs) > 1:
157 157 try:
158 158 # attempt to filter down to real linkrevs
159 159 linkrevs = []
160 160 for lr in linkrevs:
161 161 if self.lrugetctx(lr)[f].filenode() == node:
162 162 linkrevs.append(lr)
163 163 except Exception:
164 164 pass
165 165 msg = _(b" (expected %s)")
166 166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 167 self._warn(msg)
168 168 lr = None # can't be trusted
169 169
170 170 try:
171 171 p1, p2 = obj.parents(node)
172 172 if p1 not in seen and p1 != self.repo.nullid:
173 173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 174 self._err(lr, msg, f)
175 175 if p2 not in seen and p2 != self.repo.nullid:
176 176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 177 self._err(lr, msg, f)
178 178 except Exception as inst:
179 179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 180
181 181 if node in seen:
182 182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 183 seen[node] = i
184 184 return lr
185 185
186 186 def verify(self):
187 187 """verify the content of the Mercurial repository
188 188
189 189 This method run all verifications, displaying issues as they are found.
190 190
191 191 return 1 if any error have been encountered, 0 otherwise."""
192 192 # initial validation and generic report
193 193 repo = self.repo
194 194 ui = repo.ui
195 195 if not repo.url().startswith(b'file:'):
196 196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 197
198 198 if os.path.exists(repo.sjoin(b"journal")):
199 199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 200
201 201 if ui.verbose or not self.revlogv1:
202 202 ui.status(
203 203 _(b"repository uses revlog format %d\n")
204 204 % (self.revlogv1 and 1 or 0)
205 205 )
206 206
207 207 # data verification
208 208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 209 filenodes = self._verifymanifest(mflinkrevs)
210 210 del mflinkrevs
211 211 self._crosscheckfiles(filelinkrevs, filenodes)
212 212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 213
214 214 if self.errors:
215 215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 216 dirstate_errors = 0
217 217 else:
218 218 dirstate_errors = self._verify_dirstate()
219 219
220 220 # final report
221 221 ui.status(
222 222 _(b"checked %d changesets with %d changes to %d files\n")
223 223 % (len(repo.changelog), filerevisions, totalfiles)
224 224 )
225 225 if self.warnings:
226 226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 227 if self.fncachewarned:
228 228 ui.warn(HINT_FNCACHE)
229 229 if self.errors:
230 230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 231 if self.badrevs:
232 232 msg = _(b"(first damaged changeset appears to be %d)\n")
233 233 msg %= min(self.badrevs)
234 234 ui.warn(msg)
235 235 if dirstate_errors:
236 236 ui.warn(
237 237 _(b"dirstate inconsistent with current parent's manifest\n")
238 238 )
239 239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 240 return 1
241 241 return 0
242 242
243 243 def _verifychangelog(self):
244 244 """verify the changelog of a repository
245 245
246 246 The following checks are performed:
247 247 - all of `_checkrevlog` checks,
248 248 - all of `_checkentry` checks (for each revisions),
249 249 - each revision can be read.
250 250
251 251 The function returns some of the data observed in the changesets as a
252 252 (mflinkrevs, filelinkrevs) tuples:
253 253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255 255
256 256 If a matcher was specified, filelinkrevs will only contains matched
257 257 files.
258 258 """
259 259 ui = self.ui
260 260 repo = self.repo
261 261 match = self.match
262 262 cl = repo.changelog
263 263
264 264 ui.status(_(b"checking changesets\n"))
265 265 mflinkrevs = {}
266 266 filelinkrevs = {}
267 267 seen = {}
268 268 self._checkrevlog(cl, b"changelog", 0)
269 269 progress = ui.makeprogress(
270 270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 271 )
272 272 for i in repo:
273 273 progress.update(i)
274 274 n = cl.node(i)
275 275 self._checkentry(cl, i, n, seen, [i], b"changelog")
276 276
277 277 try:
278 278 changes = cl.read(n)
279 279 if changes[0] != self.repo.nullid:
280 280 mflinkrevs.setdefault(changes[0], []).append(i)
281 281 self.refersmf = True
282 282 for f in changes[3]:
283 283 if match(f):
284 284 filelinkrevs.setdefault(_normpath(f), []).append(i)
285 285 except Exception as inst:
286 286 self.refersmf = True
287 287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
288 288 progress.complete()
289 289 return mflinkrevs, filelinkrevs
290 290
291 291 def _verifymanifest(
292 292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
293 293 ):
294 294 """verify the manifestlog content
295 295
296 296 Inputs:
297 297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
298 298 - dir: a subdirectory to check (for tree manifest repo)
299 299 - storefiles: set of currently "orphan" files.
300 300 - subdirprogress: a progress object
301 301
302 302 This function checks:
303 303 * all of `_checkrevlog` checks (for all manifest related revlogs)
304 304 * all of `_checkentry` checks (for all manifest related revisions)
305 305 * nodes for subdirectory exists in the sub-directory manifest
306 306 * each manifest entries have a file path
307 307 * each manifest node refered in mflinkrevs exist in the manifest log
308 308
309 309 If tree manifest is in use and a matchers is specified, only the
310 310 sub-directories matching it will be verified.
311 311
312 312 return a two level mapping:
313 313 {"path" -> { filenode -> changelog-revision}}
314 314
315 315 This mapping primarily contains entries for every files in the
316 316 repository. In addition, when tree-manifest is used, it also contains
317 317 sub-directory entries.
318 318
319 319 If a matcher is provided, only matching paths will be included.
320 320 """
321 321 repo = self.repo
322 322 ui = self.ui
323 323 match = self.match
324 324 mfl = self.repo.manifestlog
325 325 mf = mfl.getstorage(dir)
326 326
327 327 if not dir:
328 328 self.ui.status(_(b"checking manifests\n"))
329 329
330 330 filenodes = {}
331 331 subdirnodes = {}
332 332 seen = {}
333 333 label = b"manifest"
334 334 if dir:
335 335 label = dir
336 336 revlogfiles = mf.files()
337 337 storefiles.difference_update(revlogfiles)
338 338 if subdirprogress: # should be true since we're in a subdirectory
339 339 subdirprogress.increment()
340 340 if self.refersmf:
341 341 # Do not check manifest if there are only changelog entries with
342 342 # null manifests.
343 343 self._checkrevlog(mf._revlog, label, 0)
344 344 progress = ui.makeprogress(
345 345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
346 346 )
347 347 for i in mf:
348 348 if not dir:
349 349 progress.update(i)
350 350 n = mf.node(i)
351 351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
352 352 if n in mflinkrevs:
353 353 del mflinkrevs[n]
354 354 elif dir:
355 355 msg = _(b"%s not in parent-directory manifest") % short(n)
356 356 self._err(lr, msg, label)
357 357 else:
358 358 self._err(lr, _(b"%s not in changesets") % short(n), label)
359 359
360 360 try:
361 361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
362 362 for f, fn, fl in mfdelta.iterentries():
363 363 if not f:
364 364 self._err(lr, _(b"entry without name in manifest"))
365 365 elif f == b"/dev/null": # ignore this in very old repos
366 366 continue
367 367 fullpath = dir + _normpath(f)
368 368 if fl == b't':
369 369 if not match.visitdir(fullpath):
370 370 continue
371 371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
372 372 sdn.setdefault(fn, []).append(lr)
373 373 else:
374 374 if not match(fullpath):
375 375 continue
376 376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
377 377 except Exception as inst:
378 378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
379 379 if self._level >= VERIFY_FULL:
380 380 try:
381 381 # Various issues can affect manifest. So we read each full
382 382 # text from storage. This triggers the checks from the core
383 383 # code (eg: hash verification, filename are ordered, etc.)
384 384 mfdelta = mfl.get(dir, n).read()
385 385 except Exception as inst:
386 386 msg = _(b"reading full manifest %s") % short(n)
387 387 self._exc(lr, msg, inst, label)
388 388
389 389 if not dir:
390 390 progress.complete()
391 391
392 392 if self.havemf:
393 393 # since we delete entry in `mflinkrevs` during iteration, any
394 394 # remaining entries are "missing". We need to issue errors for them.
395 395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
396 396 for c, m in sorted(changesetpairs):
397 397 if dir:
398 398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
399 399 else:
400 400 msg = _(b"changeset refers to unknown revision %s")
401 401 msg %= short(m)
402 402 self._err(c, msg, label)
403 403
404 404 if not dir and subdirnodes:
405 405 self.ui.status(_(b"checking directory manifests\n"))
406 406 storefiles = set()
407 407 subdirs = set()
408 408 revlogv1 = self.revlogv1
409 409 undecodable = []
410 for entry in repo.store.datafiles(undecodable=undecodable):
410 for entry in repo.store.data_entries(undecodable=undecodable):
411 411 for file_ in entry.files():
412 412 f = file_.unencoded_path
413 413 size = file_.file_size(repo.store.vfs)
414 414 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
415 415 storefiles.add(_normpath(f))
416 416 subdirs.add(os.path.dirname(f))
417 417 for f in undecodable:
418 418 self._err(None, _(b"cannot decode filename '%s'") % f)
419 419 subdirprogress = ui.makeprogress(
420 420 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
421 421 )
422 422
423 423 for subdir, linkrevs in subdirnodes.items():
424 424 subdirfilenodes = self._verifymanifest(
425 425 linkrevs, subdir, storefiles, subdirprogress
426 426 )
427 427 for f, onefilenodes in subdirfilenodes.items():
428 428 filenodes.setdefault(f, {}).update(onefilenodes)
429 429
430 430 if not dir and subdirnodes:
431 431 assert subdirprogress is not None # help pytype
432 432 subdirprogress.complete()
433 433 if self.warnorphanstorefiles:
434 434 for f in sorted(storefiles):
435 435 self._warn(_(b"warning: orphan data file '%s'") % f)
436 436
437 437 return filenodes
438 438
439 439 def _crosscheckfiles(self, filelinkrevs, filenodes):
440 440 repo = self.repo
441 441 ui = self.ui
442 442 ui.status(_(b"crosschecking files in changesets and manifests\n"))
443 443
444 444 total = len(filelinkrevs) + len(filenodes)
445 445 progress = ui.makeprogress(
446 446 _(b'crosschecking'), unit=_(b'files'), total=total
447 447 )
448 448 if self.havemf:
449 449 for f in sorted(filelinkrevs):
450 450 progress.increment()
451 451 if f not in filenodes:
452 452 lr = filelinkrevs[f][0]
453 453 self._err(lr, _(b"in changeset but not in manifest"), f)
454 454
455 455 if self.havecl:
456 456 for f in sorted(filenodes):
457 457 progress.increment()
458 458 if f not in filelinkrevs:
459 459 try:
460 460 fl = repo.file(f)
461 461 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
462 462 except Exception:
463 463 lr = None
464 464 self._err(lr, _(b"in manifest but not in changeset"), f)
465 465
466 466 progress.complete()
467 467
468 468 def _verifyfiles(self, filenodes, filelinkrevs):
469 469 repo = self.repo
470 470 ui = self.ui
471 471 lrugetctx = self.lrugetctx
472 472 revlogv1 = self.revlogv1
473 473 havemf = self.havemf
474 474 ui.status(_(b"checking files\n"))
475 475
476 476 storefiles = set()
477 477 undecodable = []
478 for entry in repo.store.datafiles(undecodable=undecodable):
478 for entry in repo.store.data_entries(undecodable=undecodable):
479 479 for file_ in entry.files():
480 480 size = file_.file_size(repo.store.vfs)
481 481 f = file_.unencoded_path
482 482 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
483 483 storefiles.add(_normpath(f))
484 484 for f in undecodable:
485 485 self._err(None, _(b"cannot decode filename '%s'") % f)
486 486
487 487 state = {
488 488 # TODO this assumes revlog storage for changelog.
489 489 b'expectedversion': self.repo.changelog._format_version,
490 490 b'skipflags': self.skipflags,
491 491 # experimental config: censor.policy
492 492 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
493 493 }
494 494
495 495 files = sorted(set(filenodes) | set(filelinkrevs))
496 496 revisions = 0
497 497 progress = ui.makeprogress(
498 498 _(b'checking'), unit=_(b'files'), total=len(files)
499 499 )
500 500 for i, f in enumerate(files):
501 501 progress.update(i, item=f)
502 502 try:
503 503 linkrevs = filelinkrevs[f]
504 504 except KeyError:
505 505 # in manifest but not in changelog
506 506 linkrevs = []
507 507
508 508 if linkrevs:
509 509 lr = linkrevs[0]
510 510 else:
511 511 lr = None
512 512
513 513 try:
514 514 fl = repo.file(f)
515 515 except error.StorageError as e:
516 516 self._err(lr, _(b"broken revlog! (%s)") % e, f)
517 517 continue
518 518
519 519 for ff in fl.files():
520 520 try:
521 521 storefiles.remove(ff)
522 522 except KeyError:
523 523 if self.warnorphanstorefiles:
524 524 msg = _(b" warning: revlog '%s' not in fncache!")
525 525 self._warn(msg % ff)
526 526 self.fncachewarned = True
527 527
528 528 if not len(fl) and (self.havecl or self.havemf):
529 529 self._err(lr, _(b"empty or missing %s") % f)
530 530 else:
531 531 # Guard against implementations not setting this.
532 532 state[b'skipread'] = set()
533 533 state[b'safe_renamed'] = set()
534 534
535 535 for problem in fl.verifyintegrity(state):
536 536 if problem.node is not None:
537 537 linkrev = fl.linkrev(fl.rev(problem.node))
538 538 else:
539 539 linkrev = None
540 540
541 541 if problem.warning:
542 542 self._warn(problem.warning)
543 543 elif problem.error:
544 544 linkrev_msg = linkrev if linkrev is not None else lr
545 545 self._err(linkrev_msg, problem.error, f)
546 546 else:
547 547 raise error.ProgrammingError(
548 548 b'problem instance does not set warning or error '
549 549 b'attribute: %s' % problem.msg
550 550 )
551 551
552 552 seen = {}
553 553 for i in fl:
554 554 revisions += 1
555 555 n = fl.node(i)
556 556 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
557 557 if f in filenodes:
558 558 if havemf and n not in filenodes[f]:
559 559 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
560 560 else:
561 561 del filenodes[f][n]
562 562
563 563 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
564 564 continue
565 565
566 566 # check renames
567 567 try:
568 568 # This requires resolving fulltext (at least on revlogs,
569 569 # though not with LFS revisions). We may want
570 570 # ``verifyintegrity()`` to pass a set of nodes with
571 571 # rename metadata as an optimization.
572 572 rp = fl.renamed(n)
573 573 if rp:
574 574 if lr is not None and ui.verbose:
575 575 ctx = lrugetctx(lr)
576 576 if not any(rp[0] in pctx for pctx in ctx.parents()):
577 577 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
578 578 fl2 = repo.file(rp[0])
579 579 if not len(fl2):
580 580 m = _(b"empty or missing copy source revlog %s:%s")
581 581 self._err(lr, m % (rp[0], short(rp[1])), f)
582 582 elif rp[1] == self.repo.nullid:
583 583 msg = WARN_NULLID_COPY_SOURCE
584 584 msg %= (f, lr, rp[0], short(rp[1]))
585 585 ui.note(msg)
586 586 else:
587 587 fl2.rev(rp[1])
588 588 except Exception as inst:
589 589 self._exc(
590 590 lr, _(b"checking rename of %s") % short(n), inst, f
591 591 )
592 592
593 593 # cross-check
594 594 if f in filenodes:
595 595 fns = [(v, k) for k, v in filenodes[f].items()]
596 596 for lr, node in sorted(fns):
597 597 msg = _(b"manifest refers to unknown revision %s")
598 598 self._err(lr, msg % short(node), f)
599 599 progress.complete()
600 600
601 601 if self.warnorphanstorefiles:
602 602 for f in sorted(storefiles):
603 603 self._warn(_(b"warning: orphan data file '%s'") % f)
604 604
605 605 return len(files), revisions
606 606
607 607 def _verify_dirstate(self):
608 608 """Check that the dirstate is consistent with the parent's manifest"""
609 609 repo = self.repo
610 610 ui = self.ui
611 611 ui.status(_(b"checking dirstate\n"))
612 612
613 613 parent1, parent2 = repo.dirstate.parents()
614 614 m1 = repo[parent1].manifest()
615 615 m2 = repo[parent2].manifest()
616 616 dirstate_errors = 0
617 617
618 618 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
619 619 narrow_matcher = repo.narrowmatch() if is_narrow else None
620 620
621 621 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
622 622 ui.error(err)
623 623 dirstate_errors += 1
624 624
625 625 if dirstate_errors:
626 626 self.errors += dirstate_errors
627 627 return dirstate_errors
@@ -1,744 +1,744
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13
14 14 import stat
15 15
16 16 from mercurial.i18n import _
17 17 from mercurial.node import (
18 18 bin,
19 19 hex,
20 20 nullrev,
21 21 )
22 22 from mercurial.thirdparty import attr
23 23 from mercurial import (
24 24 ancestor,
25 25 bundlerepo,
26 26 error,
27 27 extensions,
28 28 localrepo,
29 29 mdiff,
30 30 pycompat,
31 31 revlog,
32 32 store,
33 33 verify,
34 34 )
35 35 from mercurial.interfaces import (
36 36 repository,
37 37 util as interfaceutil,
38 38 )
39 39 from mercurial.utils import (
40 40 cborutil,
41 41 storageutil,
42 42 )
43 43 from mercurial.revlogutils import flagutil
44 44
45 45 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
46 46 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
47 47 # be specifying the version(s) of Mercurial they are tested with, or
48 48 # leave the attribute unspecified.
49 49 testedwith = b'ships-with-hg-core'
50 50
51 51 REQUIREMENT = b'testonly-simplestore'
52 52
53 53
54 54 def validatenode(node):
55 55 if isinstance(node, int):
56 56 raise ValueError('expected node; got int')
57 57
58 58 if len(node) != 20:
59 59 raise ValueError('expected 20 byte node')
60 60
61 61
62 62 def validaterev(rev):
63 63 if not isinstance(rev, int):
64 64 raise ValueError('expected int')
65 65
66 66
67 67 class simplestoreerror(error.StorageError):
68 68 pass
69 69
70 70
71 71 @interfaceutil.implementer(repository.irevisiondelta)
72 72 @attr.s(slots=True)
73 73 class simplestorerevisiondelta:
74 74 node = attr.ib()
75 75 p1node = attr.ib()
76 76 p2node = attr.ib()
77 77 basenode = attr.ib()
78 78 flags = attr.ib()
79 79 baserevisionsize = attr.ib()
80 80 revision = attr.ib()
81 81 delta = attr.ib()
82 82 linknode = attr.ib(default=None)
83 83
84 84
85 85 @interfaceutil.implementer(repository.iverifyproblem)
86 86 @attr.s(frozen=True)
87 87 class simplefilestoreproblem:
88 88 warning = attr.ib(default=None)
89 89 error = attr.ib(default=None)
90 90 node = attr.ib(default=None)
91 91
92 92
93 93 @interfaceutil.implementer(repository.ifilestorage)
94 94 class filestorage:
95 95 """Implements storage for a tracked path.
96 96
97 97 Data is stored in the VFS in a directory corresponding to the tracked
98 98 path.
99 99
100 100 Index data is stored in an ``index`` file using CBOR.
101 101
102 102 Fulltext data is stored in files having names of the node.
103 103 """
104 104
105 105 _flagserrorclass = simplestoreerror
106 106
107 107 def __init__(self, repo, svfs, path):
108 108 self.nullid = repo.nullid
109 109 self._repo = repo
110 110 self._svfs = svfs
111 111 self._path = path
112 112
113 113 self._storepath = b'/'.join([b'data', path])
114 114 self._indexpath = b'/'.join([self._storepath, b'index'])
115 115
116 116 indexdata = self._svfs.tryread(self._indexpath)
117 117 if indexdata:
118 118 indexdata = cborutil.decodeall(indexdata)
119 119
120 120 self._indexdata = indexdata or []
121 121 self._indexbynode = {}
122 122 self._indexbyrev = {}
123 123 self._index = []
124 124 self._refreshindex()
125 125
126 126 self._flagprocessors = dict(flagutil.flagprocessors)
127 127
128 128 def _refreshindex(self):
129 129 self._indexbynode.clear()
130 130 self._indexbyrev.clear()
131 131 self._index = []
132 132
133 133 for i, entry in enumerate(self._indexdata):
134 134 self._indexbynode[entry[b'node']] = entry
135 135 self._indexbyrev[i] = entry
136 136
137 137 self._indexbynode[self._repo.nullid] = {
138 138 b'node': self._repo.nullid,
139 139 b'p1': self._repo.nullid,
140 140 b'p2': self._repo.nullid,
141 141 b'linkrev': nullrev,
142 142 b'flags': 0,
143 143 }
144 144
145 145 self._indexbyrev[nullrev] = {
146 146 b'node': self._repo.nullid,
147 147 b'p1': self._repo.nullid,
148 148 b'p2': self._repo.nullid,
149 149 b'linkrev': nullrev,
150 150 b'flags': 0,
151 151 }
152 152
153 153 for i, entry in enumerate(self._indexdata):
154 154 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
155 155
156 156 # start, length, rawsize, chainbase, linkrev, p1, p2, node
157 157 self._index.append(
158 158 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
159 159 )
160 160
161 161 self._index.append((0, 0, 0, -1, -1, -1, -1, self._repo.nullid))
162 162
163 163 def __len__(self):
164 164 return len(self._indexdata)
165 165
166 166 def __iter__(self):
167 167 return iter(range(len(self)))
168 168
169 169 def revs(self, start=0, stop=None):
170 170 step = 1
171 171 if stop is not None:
172 172 if start > stop:
173 173 step = -1
174 174
175 175 stop += step
176 176 else:
177 177 stop = len(self)
178 178
179 179 return range(start, stop, step)
180 180
181 181 def parents(self, node):
182 182 validatenode(node)
183 183
184 184 if node not in self._indexbynode:
185 185 raise KeyError('unknown node')
186 186
187 187 entry = self._indexbynode[node]
188 188
189 189 return entry[b'p1'], entry[b'p2']
190 190
191 191 def parentrevs(self, rev):
192 192 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
193 193 return self.rev(p1), self.rev(p2)
194 194
195 195 def rev(self, node):
196 196 validatenode(node)
197 197
198 198 try:
199 199 self._indexbynode[node]
200 200 except KeyError:
201 201 raise error.LookupError(node, self._indexpath, _('no node'))
202 202
203 203 for rev, entry in self._indexbyrev.items():
204 204 if entry[b'node'] == node:
205 205 return rev
206 206
207 207 raise error.ProgrammingError(b'this should not occur')
208 208
209 209 def node(self, rev):
210 210 validaterev(rev)
211 211
212 212 return self._indexbyrev[rev][b'node']
213 213
214 214 def hasnode(self, node):
215 215 validatenode(node)
216 216 return node in self._indexbynode
217 217
218 218 def censorrevision(self, tr, censornode, tombstone=b''):
219 219 raise NotImplementedError('TODO')
220 220
221 221 def lookup(self, node):
222 222 if isinstance(node, int):
223 223 return self.node(node)
224 224
225 225 if len(node) == 20:
226 226 self.rev(node)
227 227 return node
228 228
229 229 try:
230 230 rev = int(node)
231 231 if '%d' % rev != node:
232 232 raise ValueError
233 233
234 234 if rev < 0:
235 235 rev = len(self) + rev
236 236 if rev < 0 or rev >= len(self):
237 237 raise ValueError
238 238
239 239 return self.node(rev)
240 240 except (ValueError, OverflowError):
241 241 pass
242 242
243 243 if len(node) == 40:
244 244 try:
245 245 rawnode = bin(node)
246 246 self.rev(rawnode)
247 247 return rawnode
248 248 except TypeError:
249 249 pass
250 250
251 251 raise error.LookupError(node, self._path, _('invalid lookup input'))
252 252
253 253 def linkrev(self, rev):
254 254 validaterev(rev)
255 255
256 256 return self._indexbyrev[rev][b'linkrev']
257 257
258 258 def _flags(self, rev):
259 259 validaterev(rev)
260 260
261 261 return self._indexbyrev[rev][b'flags']
262 262
263 263 def _candelta(self, baserev, rev):
264 264 validaterev(baserev)
265 265 validaterev(rev)
266 266
267 267 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
268 268 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
269 269 ):
270 270 return False
271 271
272 272 return True
273 273
274 274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
275 275 if p1 is None and p2 is None:
276 276 p1, p2 = self.parents(node)
277 277 if node != storageutil.hashrevisionsha1(text, p1, p2):
278 278 raise simplestoreerror(
279 279 _("integrity check failed on %s") % self._path
280 280 )
281 281
282 282 def revision(self, nodeorrev, raw=False):
283 283 if isinstance(nodeorrev, int):
284 284 node = self.node(nodeorrev)
285 285 else:
286 286 node = nodeorrev
287 287 validatenode(node)
288 288
289 289 if node == self._repo.nullid:
290 290 return b''
291 291
292 292 rev = self.rev(node)
293 293 flags = self._flags(rev)
294 294
295 295 path = b'/'.join([self._storepath, hex(node)])
296 296 rawtext = self._svfs.read(path)
297 297
298 298 if raw:
299 299 validatehash = flagutil.processflagsraw(self, rawtext, flags)
300 300 text = rawtext
301 301 else:
302 302 r = flagutil.processflagsread(self, rawtext, flags)
303 303 text, validatehash = r
304 304 if validatehash:
305 305 self.checkhash(text, node, rev=rev)
306 306
307 307 return text
308 308
309 309 def rawdata(self, nodeorrev):
310 310 return self.revision(raw=True)
311 311
312 312 def read(self, node):
313 313 validatenode(node)
314 314
315 315 revision = self.revision(node)
316 316
317 317 if not revision.startswith(b'\1\n'):
318 318 return revision
319 319
320 320 start = revision.index(b'\1\n', 2)
321 321 return revision[start + 2 :]
322 322
323 323 def renamed(self, node):
324 324 validatenode(node)
325 325
326 326 if self.parents(node)[0] != self._repo.nullid:
327 327 return False
328 328
329 329 fulltext = self.revision(node)
330 330 m = storageutil.parsemeta(fulltext)[0]
331 331
332 332 if m and 'copy' in m:
333 333 return m['copy'], bin(m['copyrev'])
334 334
335 335 return False
336 336
337 337 def cmp(self, node, text):
338 338 validatenode(node)
339 339
340 340 t = text
341 341
342 342 if text.startswith(b'\1\n'):
343 343 t = b'\1\n\1\n' + text
344 344
345 345 p1, p2 = self.parents(node)
346 346
347 347 if storageutil.hashrevisionsha1(t, p1, p2) == node:
348 348 return False
349 349
350 350 if self.iscensored(self.rev(node)):
351 351 return text != b''
352 352
353 353 if self.renamed(node):
354 354 t2 = self.read(node)
355 355 return t2 != text
356 356
357 357 return True
358 358
359 359 def size(self, rev):
360 360 validaterev(rev)
361 361
362 362 node = self._indexbyrev[rev][b'node']
363 363
364 364 if self.renamed(node):
365 365 return len(self.read(node))
366 366
367 367 if self.iscensored(rev):
368 368 return 0
369 369
370 370 return len(self.revision(node))
371 371
372 372 def iscensored(self, rev):
373 373 validaterev(rev)
374 374
375 375 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
376 376
377 377 def commonancestorsheads(self, a, b):
378 378 validatenode(a)
379 379 validatenode(b)
380 380
381 381 a = self.rev(a)
382 382 b = self.rev(b)
383 383
384 384 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
385 385 return pycompat.maplist(self.node, ancestors)
386 386
387 387 def descendants(self, revs):
388 388 # This is a copy of revlog.descendants()
389 389 first = min(revs)
390 390 if first == nullrev:
391 391 for i in self:
392 392 yield i
393 393 return
394 394
395 395 seen = set(revs)
396 396 for i in self.revs(start=first + 1):
397 397 for x in self.parentrevs(i):
398 398 if x != nullrev and x in seen:
399 399 seen.add(i)
400 400 yield i
401 401 break
402 402
403 403 # Required by verify.
404 404 def files(self):
405 405 entries = self._svfs.listdir(self._storepath)
406 406
407 407 # Strip out undo.backup.* files created as part of transaction
408 408 # recording.
409 409 entries = [f for f in entries if not f.startswith('undo.backup.')]
410 410
411 411 return [b'/'.join((self._storepath, f)) for f in entries]
412 412
413 413 def storageinfo(
414 414 self,
415 415 exclusivefiles=False,
416 416 sharedfiles=False,
417 417 revisionscount=False,
418 418 trackedsize=False,
419 419 storedsize=False,
420 420 ):
421 421 # TODO do a real implementation of this
422 422 return {
423 423 'exclusivefiles': [],
424 424 'sharedfiles': [],
425 425 'revisionscount': len(self),
426 426 'trackedsize': 0,
427 427 'storedsize': None,
428 428 }
429 429
430 430 def verifyintegrity(self, state):
431 431 state['skipread'] = set()
432 432 for rev in self:
433 433 node = self.node(rev)
434 434 try:
435 435 self.revision(node)
436 436 except Exception as e:
437 437 yield simplefilestoreproblem(
438 438 error='unpacking %s: %s' % (node, e), node=node
439 439 )
440 440 state['skipread'].add(node)
441 441
442 442 def emitrevisions(
443 443 self,
444 444 nodes,
445 445 nodesorder=None,
446 446 revisiondata=False,
447 447 assumehaveparentrevisions=False,
448 448 deltamode=repository.CG_DELTAMODE_STD,
449 449 sidedata_helpers=None,
450 450 ):
451 451 # TODO this will probably break on some ordering options.
452 452 nodes = [n for n in nodes if n != self._repo.nullid]
453 453 if not nodes:
454 454 return
455 455 for delta in storageutil.emitrevisions(
456 456 self,
457 457 nodes,
458 458 nodesorder,
459 459 simplestorerevisiondelta,
460 460 revisiondata=revisiondata,
461 461 assumehaveparentrevisions=assumehaveparentrevisions,
462 462 deltamode=deltamode,
463 463 sidedata_helpers=sidedata_helpers,
464 464 ):
465 465 yield delta
466 466
467 467 def add(self, text, meta, transaction, linkrev, p1, p2):
468 468 if meta or text.startswith(b'\1\n'):
469 469 text = storageutil.packmeta(meta, text)
470 470
471 471 return self.addrevision(text, transaction, linkrev, p1, p2)
472 472
473 473 def addrevision(
474 474 self,
475 475 text,
476 476 transaction,
477 477 linkrev,
478 478 p1,
479 479 p2,
480 480 node=None,
481 481 flags=revlog.REVIDX_DEFAULT_FLAGS,
482 482 cachedelta=None,
483 483 ):
484 484 validatenode(p1)
485 485 validatenode(p2)
486 486
487 487 if flags:
488 488 node = node or storageutil.hashrevisionsha1(text, p1, p2)
489 489
490 490 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
491 491
492 492 node = node or storageutil.hashrevisionsha1(text, p1, p2)
493 493
494 494 if node in self._indexbynode:
495 495 return node
496 496
497 497 if validatehash:
498 498 self.checkhash(rawtext, node, p1=p1, p2=p2)
499 499
500 500 return self._addrawrevision(
501 501 node, rawtext, transaction, linkrev, p1, p2, flags
502 502 )
503 503
504 504 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
505 505 transaction.addbackup(self._indexpath)
506 506
507 507 path = b'/'.join([self._storepath, hex(node)])
508 508
509 509 self._svfs.write(path, rawtext)
510 510
511 511 self._indexdata.append(
512 512 {
513 513 b'node': node,
514 514 b'p1': p1,
515 515 b'p2': p2,
516 516 b'linkrev': link,
517 517 b'flags': flags,
518 518 }
519 519 )
520 520
521 521 self._reflectindexupdate()
522 522
523 523 return node
524 524
525 525 def _reflectindexupdate(self):
526 526 self._refreshindex()
527 527 self._svfs.write(
528 528 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
529 529 )
530 530
531 531 def addgroup(
532 532 self,
533 533 deltas,
534 534 linkmapper,
535 535 transaction,
536 536 addrevisioncb=None,
537 537 duplicaterevisioncb=None,
538 538 maybemissingparents=False,
539 539 ):
540 540 if maybemissingparents:
541 541 raise error.Abort(
542 542 _('simple store does not support missing parents ' 'write mode')
543 543 )
544 544
545 545 empty = True
546 546
547 547 transaction.addbackup(self._indexpath)
548 548
549 549 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
550 550 linkrev = linkmapper(linknode)
551 551 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
552 552
553 553 if node in self._indexbynode:
554 554 if duplicaterevisioncb:
555 555 duplicaterevisioncb(self, self.rev(node))
556 556 empty = False
557 557 continue
558 558
559 559 # Need to resolve the fulltext from the delta base.
560 560 if deltabase == self._repo.nullid:
561 561 text = mdiff.patch(b'', delta)
562 562 else:
563 563 text = mdiff.patch(self.revision(deltabase), delta)
564 564
565 565 rev = self._addrawrevision(
566 566 node, text, transaction, linkrev, p1, p2, flags
567 567 )
568 568
569 569 if addrevisioncb:
570 570 addrevisioncb(self, rev)
571 571 empty = False
572 572 return not empty
573 573
574 574 def _headrevs(self):
575 575 # Assume all revisions are heads by default.
576 576 revishead = {rev: True for rev in self._indexbyrev}
577 577
578 578 for rev, entry in self._indexbyrev.items():
579 579 # Unset head flag for all seen parents.
580 580 revishead[self.rev(entry[b'p1'])] = False
581 581 revishead[self.rev(entry[b'p2'])] = False
582 582
583 583 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
584 584
585 585 def heads(self, start=None, stop=None):
586 586 # This is copied from revlog.py.
587 587 if start is None and stop is None:
588 588 if not len(self):
589 589 return [self._repo.nullid]
590 590 return [self.node(r) for r in self._headrevs()]
591 591
592 592 if start is None:
593 593 start = self._repo.nullid
594 594 if stop is None:
595 595 stop = []
596 596 stoprevs = {self.rev(n) for n in stop}
597 597 startrev = self.rev(start)
598 598 reachable = {startrev}
599 599 heads = {startrev}
600 600
601 601 parentrevs = self.parentrevs
602 602 for r in self.revs(start=startrev + 1):
603 603 for p in parentrevs(r):
604 604 if p in reachable:
605 605 if r not in stoprevs:
606 606 reachable.add(r)
607 607 heads.add(r)
608 608 if p in heads and p not in stoprevs:
609 609 heads.remove(p)
610 610
611 611 return [self.node(r) for r in heads]
612 612
613 613 def children(self, node):
614 614 validatenode(node)
615 615
616 616 # This is a copy of revlog.children().
617 617 c = []
618 618 p = self.rev(node)
619 619 for r in self.revs(start=p + 1):
620 620 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
621 621 if prevs:
622 622 for pr in prevs:
623 623 if pr == p:
624 624 c.append(self.node(r))
625 625 elif p == nullrev:
626 626 c.append(self.node(r))
627 627 return c
628 628
629 629 def getstrippoint(self, minlink):
630 630 return storageutil.resolvestripinfo(
631 631 minlink,
632 632 len(self) - 1,
633 633 self._headrevs(),
634 634 self.linkrev,
635 635 self.parentrevs,
636 636 )
637 637
638 638 def strip(self, minlink, transaction):
639 639 if not len(self):
640 640 return
641 641
642 642 rev, _ignored = self.getstrippoint(minlink)
643 643 if rev == len(self):
644 644 return
645 645
646 646 # Purge index data starting at the requested revision.
647 647 self._indexdata[rev:] = []
648 648 self._reflectindexupdate()
649 649
650 650
651 651 def issimplestorefile(f, kind, st):
652 652 if kind != stat.S_IFREG:
653 653 return False
654 654
655 655 if store.isrevlog(f, kind, st):
656 656 return False
657 657
658 658 # Ignore transaction undo files.
659 659 if f.startswith('undo.'):
660 660 return False
661 661
662 662 # Otherwise assume it belongs to the simple store.
663 663 return True
664 664
665 665
666 666 class simplestore(store.encodedstore):
667 def datafiles(self, undecodable=None):
668 for x in super(simplestore, self).datafiles():
667 def data_entries(self, undecodable=None):
668 for x in super(simplestore, self).data_entries():
669 669 yield x
670 670
671 671 # Supplement with non-revlog files.
672 672 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
673 673
674 674 for f1, size in extrafiles:
675 675 try:
676 676 f2 = store.decodefilename(f1)
677 677 except KeyError:
678 678 if undecodable is None:
679 679 raise error.StorageError(b'undecodable revlog name %s' % f1)
680 680 else:
681 681 undecodable.append(f1)
682 682 continue
683 683
684 684 yield f2, size
685 685
686 686
687 687 def reposetup(ui, repo):
688 688 if not repo.local():
689 689 return
690 690
691 691 if isinstance(repo, bundlerepo.bundlerepository):
692 692 raise error.Abort(_('cannot use simple store with bundlerepo'))
693 693
694 694 class simplestorerepo(repo.__class__):
695 695 def file(self, f):
696 696 return filestorage(repo, self.svfs, f)
697 697
698 698 repo.__class__ = simplestorerepo
699 699
700 700
701 701 def featuresetup(ui, supported):
702 702 supported.add(REQUIREMENT)
703 703
704 704
705 705 def newreporequirements(orig, ui, createopts):
706 706 """Modifies default requirements for new repos to use the simple store."""
707 707 requirements = orig(ui, createopts)
708 708
709 709 # These requirements are only used to affect creation of the store
710 710 # object. We have our own store. So we can remove them.
711 711 # TODO do this once we feel like taking the test hit.
712 712 # if 'fncache' in requirements:
713 713 # requirements.remove('fncache')
714 714 # if 'dotencode' in requirements:
715 715 # requirements.remove('dotencode')
716 716
717 717 requirements.add(REQUIREMENT)
718 718
719 719 return requirements
720 720
721 721
722 722 def makestore(orig, requirements, path, vfstype):
723 723 if REQUIREMENT not in requirements:
724 724 return orig(requirements, path, vfstype)
725 725
726 726 return simplestore(path, vfstype)
727 727
728 728
729 729 def verifierinit(orig, self, *args, **kwargs):
730 730 orig(self, *args, **kwargs)
731 731
732 732 # We don't care that files in the store don't align with what is
733 733 # advertised. So suppress these warnings.
734 734 self.warnorphanstorefiles = False
735 735
736 736
737 737 def extsetup(ui):
738 738 localrepo.featuresetupfuncs.add(featuresetup)
739 739
740 740 extensions.wrapfunction(
741 741 localrepo, 'newreporequirements', newreporequirements
742 742 )
743 743 extensions.wrapfunction(localrepo, 'makestore', makestore)
744 744 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now