##// END OF EJS Templates
verify: allow the storage to signal when renames can be tested on `skipread`...
Matt Harbison -
r44530:b9e174d4 default
parent child Browse files
Show More
@@ -1,542 +1,546 b''
1 1 # wrapper.py - methods wrapping core mercurial logic
2 2 #
3 3 # Copyright 2017 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11
12 12 from mercurial.i18n import _
13 13 from mercurial.node import bin, hex, nullid, short
14 14 from mercurial.pycompat import (
15 15 getattr,
16 16 setattr,
17 17 )
18 18
19 19 from mercurial import (
20 20 bundle2,
21 21 changegroup,
22 22 cmdutil,
23 23 context,
24 24 error,
25 25 exchange,
26 26 exthelper,
27 27 localrepo,
28 28 pycompat,
29 29 revlog,
30 30 scmutil,
31 31 upgrade,
32 32 util,
33 33 vfs as vfsmod,
34 34 wireprotov1server,
35 35 )
36 36
37 37 from mercurial.interfaces import repository
38 38
39 39 from mercurial.utils import (
40 40 storageutil,
41 41 stringutil,
42 42 )
43 43
44 44 from ..largefiles import lfutil
45 45
46 46 from . import (
47 47 blobstore,
48 48 pointer,
49 49 )
50 50
51 51 eh = exthelper.exthelper()
52 52
53 53
54 54 @eh.wrapfunction(localrepo, b'makefilestorage')
55 55 def localrepomakefilestorage(orig, requirements, features, **kwargs):
56 56 if b'lfs' in requirements:
57 57 features.add(repository.REPO_FEATURE_LFS)
58 58
59 59 return orig(requirements=requirements, features=features, **kwargs)
60 60
61 61
62 62 @eh.wrapfunction(changegroup, b'allsupportedversions')
63 63 def allsupportedversions(orig, ui):
64 64 versions = orig(ui)
65 65 versions.add(b'03')
66 66 return versions
67 67
68 68
69 69 @eh.wrapfunction(wireprotov1server, b'_capabilities')
70 70 def _capabilities(orig, repo, proto):
71 71 '''Wrap server command to announce lfs server capability'''
72 72 caps = orig(repo, proto)
73 73 if util.safehasattr(repo.svfs, b'lfslocalblobstore'):
74 74 # Advertise a slightly different capability when lfs is *required*, so
75 75 # that the client knows it MUST load the extension. If lfs is not
76 76 # required on the server, there's no reason to autoload the extension
77 77 # on the client.
78 78 if b'lfs' in repo.requirements:
79 79 caps.append(b'lfs-serve')
80 80
81 81 caps.append(b'lfs')
82 82 return caps
83 83
84 84
85 85 def bypasscheckhash(self, text):
86 86 return False
87 87
88 88
89 89 def readfromstore(self, text):
90 90 """Read filelog content from local blobstore transform for flagprocessor.
91 91
92 92 Default tranform for flagprocessor, returning contents from blobstore.
93 93 Returns a 2-typle (text, validatehash) where validatehash is True as the
94 94 contents of the blobstore should be checked using checkhash.
95 95 """
96 96 p = pointer.deserialize(text)
97 97 oid = p.oid()
98 98 store = self.opener.lfslocalblobstore
99 99 if not store.has(oid):
100 100 p.filename = self.filename
101 101 self.opener.lfsremoteblobstore.readbatch([p], store)
102 102
103 103 # The caller will validate the content
104 104 text = store.read(oid, verify=False)
105 105
106 106 # pack hg filelog metadata
107 107 hgmeta = {}
108 108 for k in p.keys():
109 109 if k.startswith(b'x-hg-'):
110 110 name = k[len(b'x-hg-') :]
111 111 hgmeta[name] = p[k]
112 112 if hgmeta or text.startswith(b'\1\n'):
113 113 text = storageutil.packmeta(hgmeta, text)
114 114
115 115 return (text, True, {})
116 116
117 117
118 118 def writetostore(self, text, sidedata):
119 119 # hg filelog metadata (includes rename, etc)
120 120 hgmeta, offset = storageutil.parsemeta(text)
121 121 if offset and offset > 0:
122 122 # lfs blob does not contain hg filelog metadata
123 123 text = text[offset:]
124 124
125 125 # git-lfs only supports sha256
126 126 oid = hex(hashlib.sha256(text).digest())
127 127 self.opener.lfslocalblobstore.write(oid, text)
128 128
129 129 # replace contents with metadata
130 130 longoid = b'sha256:%s' % oid
131 131 metadata = pointer.gitlfspointer(oid=longoid, size=b'%d' % len(text))
132 132
133 133 # by default, we expect the content to be binary. however, LFS could also
134 134 # be used for non-binary content. add a special entry for non-binary data.
135 135 # this will be used by filectx.isbinary().
136 136 if not stringutil.binary(text):
137 137 # not hg filelog metadata (affecting commit hash), no "x-hg-" prefix
138 138 metadata[b'x-is-binary'] = b'0'
139 139
140 140 # translate hg filelog metadata to lfs metadata with "x-hg-" prefix
141 141 if hgmeta is not None:
142 142 for k, v in pycompat.iteritems(hgmeta):
143 143 metadata[b'x-hg-%s' % k] = v
144 144
145 145 rawtext = metadata.serialize()
146 146 return (rawtext, False)
147 147
148 148
149 149 def _islfs(rlog, node=None, rev=None):
150 150 if rev is None:
151 151 if node is None:
152 152 # both None - likely working copy content where node is not ready
153 153 return False
154 154 rev = rlog.rev(node)
155 155 else:
156 156 node = rlog.node(rev)
157 157 if node == nullid:
158 158 return False
159 159 flags = rlog.flags(rev)
160 160 return bool(flags & revlog.REVIDX_EXTSTORED)
161 161
162 162
163 163 # Wrapping may also be applied by remotefilelog
164 164 def filelogaddrevision(
165 165 orig,
166 166 self,
167 167 text,
168 168 transaction,
169 169 link,
170 170 p1,
171 171 p2,
172 172 cachedelta=None,
173 173 node=None,
174 174 flags=revlog.REVIDX_DEFAULT_FLAGS,
175 175 **kwds
176 176 ):
177 177 # The matcher isn't available if reposetup() wasn't called.
178 178 lfstrack = self._revlog.opener.options.get(b'lfstrack')
179 179
180 180 if lfstrack:
181 181 textlen = len(text)
182 182 # exclude hg rename meta from file size
183 183 meta, offset = storageutil.parsemeta(text)
184 184 if offset:
185 185 textlen -= offset
186 186
187 187 if lfstrack(self._revlog.filename, textlen):
188 188 flags |= revlog.REVIDX_EXTSTORED
189 189
190 190 return orig(
191 191 self,
192 192 text,
193 193 transaction,
194 194 link,
195 195 p1,
196 196 p2,
197 197 cachedelta=cachedelta,
198 198 node=node,
199 199 flags=flags,
200 200 **kwds
201 201 )
202 202
203 203
204 204 # Wrapping may also be applied by remotefilelog
205 205 def filelogrenamed(orig, self, node):
206 206 if _islfs(self._revlog, node):
207 207 rawtext = self._revlog.rawdata(node)
208 208 if not rawtext:
209 209 return False
210 210 metadata = pointer.deserialize(rawtext)
211 211 if b'x-hg-copy' in metadata and b'x-hg-copyrev' in metadata:
212 212 return metadata[b'x-hg-copy'], bin(metadata[b'x-hg-copyrev'])
213 213 else:
214 214 return False
215 215 return orig(self, node)
216 216
217 217
218 218 # Wrapping may also be applied by remotefilelog
219 219 def filelogsize(orig, self, rev):
220 220 if _islfs(self._revlog, rev=rev):
221 221 # fast path: use lfs metadata to answer size
222 222 rawtext = self._revlog.rawdata(rev)
223 223 metadata = pointer.deserialize(rawtext)
224 224 return int(metadata[b'size'])
225 225 return orig(self, rev)
226 226
227 227
228 228 @eh.wrapfunction(revlog, b'_verify_revision')
229 229 def _verify_revision(orig, rl, skipflags, state, node):
230 230 if _islfs(rl, node=node):
231 231 rawtext = rl.rawdata(node)
232 232 metadata = pointer.deserialize(rawtext)
233 233
234 234 # Don't skip blobs that are stored locally, as local verification is
235 235 # relatively cheap and there's no other way to verify the raw data in
236 236 # the revlog.
237 237 if rl.opener.lfslocalblobstore.has(metadata.oid()):
238 238 skipflags &= ~revlog.REVIDX_EXTSTORED
239 elif skipflags & revlog.REVIDX_EXTSTORED:
240 # The wrapped method will set `skipread`, but there's enough local
241 # info to check renames.
242 state[b'safe_renamed'].add(node)
239 243
240 244 orig(rl, skipflags, state, node)
241 245
242 246
243 247 @eh.wrapfunction(context.basefilectx, b'cmp')
244 248 def filectxcmp(orig, self, fctx):
245 249 """returns True if text is different than fctx"""
246 250 # some fctx (ex. hg-git) is not based on basefilectx and do not have islfs
247 251 if self.islfs() and getattr(fctx, 'islfs', lambda: False)():
248 252 # fast path: check LFS oid
249 253 p1 = pointer.deserialize(self.rawdata())
250 254 p2 = pointer.deserialize(fctx.rawdata())
251 255 return p1.oid() != p2.oid()
252 256 return orig(self, fctx)
253 257
254 258
255 259 @eh.wrapfunction(context.basefilectx, b'isbinary')
256 260 def filectxisbinary(orig, self):
257 261 if self.islfs():
258 262 # fast path: use lfs metadata to answer isbinary
259 263 metadata = pointer.deserialize(self.rawdata())
260 264 # if lfs metadata says nothing, assume it's binary by default
261 265 return bool(int(metadata.get(b'x-is-binary', 1)))
262 266 return orig(self)
263 267
264 268
265 269 def filectxislfs(self):
266 270 return _islfs(self.filelog()._revlog, self.filenode())
267 271
268 272
269 273 @eh.wrapfunction(cmdutil, b'_updatecatformatter')
270 274 def _updatecatformatter(orig, fm, ctx, matcher, path, decode):
271 275 orig(fm, ctx, matcher, path, decode)
272 276 fm.data(rawdata=ctx[path].rawdata())
273 277
274 278
275 279 @eh.wrapfunction(scmutil, b'wrapconvertsink')
276 280 def convertsink(orig, sink):
277 281 sink = orig(sink)
278 282 if sink.repotype == b'hg':
279 283
280 284 class lfssink(sink.__class__):
281 285 def putcommit(
282 286 self,
283 287 files,
284 288 copies,
285 289 parents,
286 290 commit,
287 291 source,
288 292 revmap,
289 293 full,
290 294 cleanp2,
291 295 ):
292 296 pc = super(lfssink, self).putcommit
293 297 node = pc(
294 298 files,
295 299 copies,
296 300 parents,
297 301 commit,
298 302 source,
299 303 revmap,
300 304 full,
301 305 cleanp2,
302 306 )
303 307
304 308 if b'lfs' not in self.repo.requirements:
305 309 ctx = self.repo[node]
306 310
307 311 # The file list may contain removed files, so check for
308 312 # membership before assuming it is in the context.
309 313 if any(f in ctx and ctx[f].islfs() for f, n in files):
310 314 self.repo.requirements.add(b'lfs')
311 315 self.repo._writerequirements()
312 316
313 317 return node
314 318
315 319 sink.__class__ = lfssink
316 320
317 321 return sink
318 322
319 323
320 324 # bundlerepo uses "vfsmod.readonlyvfs(othervfs)", we need to make sure lfs
321 325 # options and blob stores are passed from othervfs to the new readonlyvfs.
322 326 @eh.wrapfunction(vfsmod.readonlyvfs, b'__init__')
323 327 def vfsinit(orig, self, othervfs):
324 328 orig(self, othervfs)
325 329 # copy lfs related options
326 330 for k, v in othervfs.options.items():
327 331 if k.startswith(b'lfs'):
328 332 self.options[k] = v
329 333 # also copy lfs blobstores. note: this can run before reposetup, so lfs
330 334 # blobstore attributes are not always ready at this time.
331 335 for name in [b'lfslocalblobstore', b'lfsremoteblobstore']:
332 336 if util.safehasattr(othervfs, name):
333 337 setattr(self, name, getattr(othervfs, name))
334 338
335 339
336 340 def _prefetchfiles(repo, revs, match):
337 341 """Ensure that required LFS blobs are present, fetching them as a group if
338 342 needed."""
339 343 if not util.safehasattr(repo.svfs, b'lfslocalblobstore'):
340 344 return
341 345
342 346 pointers = []
343 347 oids = set()
344 348 localstore = repo.svfs.lfslocalblobstore
345 349
346 350 for rev in revs:
347 351 ctx = repo[rev]
348 352 for f in ctx.walk(match):
349 353 p = pointerfromctx(ctx, f)
350 354 if p and p.oid() not in oids and not localstore.has(p.oid()):
351 355 p.filename = f
352 356 pointers.append(p)
353 357 oids.add(p.oid())
354 358
355 359 if pointers:
356 360 # Recalculating the repo store here allows 'paths.default' that is set
357 361 # on the repo by a clone command to be used for the update.
358 362 blobstore.remote(repo).readbatch(pointers, localstore)
359 363
360 364
361 365 def _canskipupload(repo):
362 366 # Skip if this hasn't been passed to reposetup()
363 367 if not util.safehasattr(repo.svfs, b'lfsremoteblobstore'):
364 368 return True
365 369
366 370 # if remotestore is a null store, upload is a no-op and can be skipped
367 371 return isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
368 372
369 373
370 374 def candownload(repo):
371 375 # Skip if this hasn't been passed to reposetup()
372 376 if not util.safehasattr(repo.svfs, b'lfsremoteblobstore'):
373 377 return False
374 378
375 379 # if remotestore is a null store, downloads will lead to nothing
376 380 return not isinstance(repo.svfs.lfsremoteblobstore, blobstore._nullremote)
377 381
378 382
379 383 def uploadblobsfromrevs(repo, revs):
380 384 '''upload lfs blobs introduced by revs
381 385
382 386 Note: also used by other extensions e. g. infinitepush. avoid renaming.
383 387 '''
384 388 if _canskipupload(repo):
385 389 return
386 390 pointers = extractpointers(repo, revs)
387 391 uploadblobs(repo, pointers)
388 392
389 393
390 394 def prepush(pushop):
391 395 """Prepush hook.
392 396
393 397 Read through the revisions to push, looking for filelog entries that can be
394 398 deserialized into metadata so that we can block the push on their upload to
395 399 the remote blobstore.
396 400 """
397 401 return uploadblobsfromrevs(pushop.repo, pushop.outgoing.missing)
398 402
399 403
400 404 @eh.wrapfunction(exchange, b'push')
401 405 def push(orig, repo, remote, *args, **kwargs):
402 406 """bail on push if the extension isn't enabled on remote when needed, and
403 407 update the remote store based on the destination path."""
404 408 if b'lfs' in repo.requirements:
405 409 # If the remote peer is for a local repo, the requirement tests in the
406 410 # base class method enforce lfs support. Otherwise, some revisions in
407 411 # this repo use lfs, and the remote repo needs the extension loaded.
408 412 if not remote.local() and not remote.capable(b'lfs'):
409 413 # This is a copy of the message in exchange.push() when requirements
410 414 # are missing between local repos.
411 415 m = _(b"required features are not supported in the destination: %s")
412 416 raise error.Abort(
413 417 m % b'lfs', hint=_(b'enable the lfs extension on the server')
414 418 )
415 419
416 420 # Repositories where this extension is disabled won't have the field.
417 421 # But if there's a requirement, then the extension must be loaded AND
418 422 # there may be blobs to push.
419 423 remotestore = repo.svfs.lfsremoteblobstore
420 424 try:
421 425 repo.svfs.lfsremoteblobstore = blobstore.remote(repo, remote.url())
422 426 return orig(repo, remote, *args, **kwargs)
423 427 finally:
424 428 repo.svfs.lfsremoteblobstore = remotestore
425 429 else:
426 430 return orig(repo, remote, *args, **kwargs)
427 431
428 432
429 433 # when writing a bundle via "hg bundle" command, upload related LFS blobs
430 434 @eh.wrapfunction(bundle2, b'writenewbundle')
431 435 def writenewbundle(
432 436 orig, ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
433 437 ):
434 438 """upload LFS blobs added by outgoing revisions on 'hg bundle'"""
435 439 uploadblobsfromrevs(repo, outgoing.missing)
436 440 return orig(
437 441 ui, repo, source, filename, bundletype, outgoing, *args, **kwargs
438 442 )
439 443
440 444
441 445 def extractpointers(repo, revs):
442 446 """return a list of lfs pointers added by given revs"""
443 447 repo.ui.debug(b'lfs: computing set of blobs to upload\n')
444 448 pointers = {}
445 449
446 450 makeprogress = repo.ui.makeprogress
447 451 with makeprogress(
448 452 _(b'lfs search'), _(b'changesets'), len(revs)
449 453 ) as progress:
450 454 for r in revs:
451 455 ctx = repo[r]
452 456 for p in pointersfromctx(ctx).values():
453 457 pointers[p.oid()] = p
454 458 progress.increment()
455 459 return sorted(pointers.values(), key=lambda p: p.oid())
456 460
457 461
458 462 def pointerfromctx(ctx, f, removed=False):
459 463 """return a pointer for the named file from the given changectx, or None if
460 464 the file isn't LFS.
461 465
462 466 Optionally, the pointer for a file deleted from the context can be returned.
463 467 Since no such pointer is actually stored, and to distinguish from a non LFS
464 468 file, this pointer is represented by an empty dict.
465 469 """
466 470 _ctx = ctx
467 471 if f not in ctx:
468 472 if not removed:
469 473 return None
470 474 if f in ctx.p1():
471 475 _ctx = ctx.p1()
472 476 elif f in ctx.p2():
473 477 _ctx = ctx.p2()
474 478 else:
475 479 return None
476 480 fctx = _ctx[f]
477 481 if not _islfs(fctx.filelog()._revlog, fctx.filenode()):
478 482 return None
479 483 try:
480 484 p = pointer.deserialize(fctx.rawdata())
481 485 if ctx == _ctx:
482 486 return p
483 487 return {}
484 488 except pointer.InvalidPointer as ex:
485 489 raise error.Abort(
486 490 _(b'lfs: corrupted pointer (%s@%s): %s\n')
487 491 % (f, short(_ctx.node()), ex)
488 492 )
489 493
490 494
491 495 def pointersfromctx(ctx, removed=False):
492 496 """return a dict {path: pointer} for given single changectx.
493 497
494 498 If ``removed`` == True and the LFS file was removed from ``ctx``, the value
495 499 stored for the path is an empty dict.
496 500 """
497 501 result = {}
498 502 m = ctx.repo().narrowmatch()
499 503
500 504 # TODO: consider manifest.fastread() instead
501 505 for f in ctx.files():
502 506 if not m(f):
503 507 continue
504 508 p = pointerfromctx(ctx, f, removed=removed)
505 509 if p is not None:
506 510 result[f] = p
507 511 return result
508 512
509 513
510 514 def uploadblobs(repo, pointers):
511 515 """upload given pointers from local blobstore"""
512 516 if not pointers:
513 517 return
514 518
515 519 remoteblob = repo.svfs.lfsremoteblobstore
516 520 remoteblob.writebatch(pointers, repo.svfs.lfslocalblobstore)
517 521
518 522
519 523 @eh.wrapfunction(upgrade, b'_finishdatamigration')
520 524 def upgradefinishdatamigration(orig, ui, srcrepo, dstrepo, requirements):
521 525 orig(ui, srcrepo, dstrepo, requirements)
522 526
523 527 # Skip if this hasn't been passed to reposetup()
524 528 if util.safehasattr(
525 529 srcrepo.svfs, b'lfslocalblobstore'
526 530 ) and util.safehasattr(dstrepo.svfs, b'lfslocalblobstore'):
527 531 srclfsvfs = srcrepo.svfs.lfslocalblobstore.vfs
528 532 dstlfsvfs = dstrepo.svfs.lfslocalblobstore.vfs
529 533
530 534 for dirpath, dirs, files in srclfsvfs.walk():
531 535 for oid in files:
532 536 ui.write(_(b'copying lfs blob %s\n') % oid)
533 537 lfutil.link(srclfsvfs.join(oid), dstlfsvfs.join(oid))
534 538
535 539
536 540 @eh.wrapfunction(upgrade, b'preservedrequirements')
537 541 @eh.wrapfunction(upgrade, b'supporteddestrequirements')
538 542 def upgraderequirements(orig, repo):
539 543 reqs = orig(repo)
540 544 if b'lfs' in repo.requirements:
541 545 reqs.add(b'lfs')
542 546 return reqs
@@ -1,1984 +1,1986 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # When narrowing is finalized and no longer subject to format changes,
15 15 # we should move this to just "narrow" or similar.
16 16 NARROW_REQUIREMENT = b'narrowhg-experimental'
17 17
18 18 # Local repository feature string.
19 19
20 20 # Revlogs are being used for file storage.
21 21 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
22 22 # The storage part of the repository is shared from an external source.
23 23 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
24 24 # LFS supported for backing file storage.
25 25 REPO_FEATURE_LFS = b'lfs'
26 26 # Repository supports being stream cloned.
27 27 REPO_FEATURE_STREAM_CLONE = b'streamclone'
28 28 # Files storage may lack data for all ancestors.
29 29 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
30 30
31 31 REVISION_FLAG_CENSORED = 1 << 15
32 32 REVISION_FLAG_ELLIPSIS = 1 << 14
33 33 REVISION_FLAG_EXTSTORED = 1 << 13
34 34 REVISION_FLAG_SIDEDATA = 1 << 12
35 35
36 36 REVISION_FLAGS_KNOWN = (
37 37 REVISION_FLAG_CENSORED
38 38 | REVISION_FLAG_ELLIPSIS
39 39 | REVISION_FLAG_EXTSTORED
40 40 | REVISION_FLAG_SIDEDATA
41 41 )
42 42
43 43 CG_DELTAMODE_STD = b'default'
44 44 CG_DELTAMODE_PREV = b'previous'
45 45 CG_DELTAMODE_FULL = b'fulltext'
46 46 CG_DELTAMODE_P1 = b'p1'
47 47
48 48
49 49 class ipeerconnection(interfaceutil.Interface):
50 50 """Represents a "connection" to a repository.
51 51
52 52 This is the base interface for representing a connection to a repository.
53 53 It holds basic properties and methods applicable to all peer types.
54 54
55 55 This is not a complete interface definition and should not be used
56 56 outside of this module.
57 57 """
58 58
59 59 ui = interfaceutil.Attribute("""ui.ui instance""")
60 60
61 61 def url():
62 62 """Returns a URL string representing this peer.
63 63
64 64 Currently, implementations expose the raw URL used to construct the
65 65 instance. It may contain credentials as part of the URL. The
66 66 expectations of the value aren't well-defined and this could lead to
67 67 data leakage.
68 68
69 69 TODO audit/clean consumers and more clearly define the contents of this
70 70 value.
71 71 """
72 72
73 73 def local():
74 74 """Returns a local repository instance.
75 75
76 76 If the peer represents a local repository, returns an object that
77 77 can be used to interface with it. Otherwise returns ``None``.
78 78 """
79 79
80 80 def peer():
81 81 """Returns an object conforming to this interface.
82 82
83 83 Most implementations will ``return self``.
84 84 """
85 85
86 86 def canpush():
87 87 """Returns a boolean indicating if this peer can be pushed to."""
88 88
89 89 def close():
90 90 """Close the connection to this peer.
91 91
92 92 This is called when the peer will no longer be used. Resources
93 93 associated with the peer should be cleaned up.
94 94 """
95 95
96 96
97 97 class ipeercapabilities(interfaceutil.Interface):
98 98 """Peer sub-interface related to capabilities."""
99 99
100 100 def capable(name):
101 101 """Determine support for a named capability.
102 102
103 103 Returns ``False`` if capability not supported.
104 104
105 105 Returns ``True`` if boolean capability is supported. Returns a string
106 106 if capability support is non-boolean.
107 107
108 108 Capability strings may or may not map to wire protocol capabilities.
109 109 """
110 110
111 111 def requirecap(name, purpose):
112 112 """Require a capability to be present.
113 113
114 114 Raises a ``CapabilityError`` if the capability isn't present.
115 115 """
116 116
117 117
118 118 class ipeercommands(interfaceutil.Interface):
119 119 """Client-side interface for communicating over the wire protocol.
120 120
121 121 This interface is used as a gateway to the Mercurial wire protocol.
122 122 methods commonly call wire protocol commands of the same name.
123 123 """
124 124
125 125 def branchmap():
126 126 """Obtain heads in named branches.
127 127
128 128 Returns a dict mapping branch name to an iterable of nodes that are
129 129 heads on that branch.
130 130 """
131 131
132 132 def capabilities():
133 133 """Obtain capabilities of the peer.
134 134
135 135 Returns a set of string capabilities.
136 136 """
137 137
138 138 def clonebundles():
139 139 """Obtains the clone bundles manifest for the repo.
140 140
141 141 Returns the manifest as unparsed bytes.
142 142 """
143 143
144 144 def debugwireargs(one, two, three=None, four=None, five=None):
145 145 """Used to facilitate debugging of arguments passed over the wire."""
146 146
147 147 def getbundle(source, **kwargs):
148 148 """Obtain remote repository data as a bundle.
149 149
150 150 This command is how the bulk of repository data is transferred from
151 151 the peer to the local repository
152 152
153 153 Returns a generator of bundle data.
154 154 """
155 155
156 156 def heads():
157 157 """Determine all known head revisions in the peer.
158 158
159 159 Returns an iterable of binary nodes.
160 160 """
161 161
162 162 def known(nodes):
163 163 """Determine whether multiple nodes are known.
164 164
165 165 Accepts an iterable of nodes whose presence to check for.
166 166
167 167 Returns an iterable of booleans indicating of the corresponding node
168 168 at that index is known to the peer.
169 169 """
170 170
171 171 def listkeys(namespace):
172 172 """Obtain all keys in a pushkey namespace.
173 173
174 174 Returns an iterable of key names.
175 175 """
176 176
177 177 def lookup(key):
178 178 """Resolve a value to a known revision.
179 179
180 180 Returns a binary node of the resolved revision on success.
181 181 """
182 182
183 183 def pushkey(namespace, key, old, new):
184 184 """Set a value using the ``pushkey`` protocol.
185 185
186 186 Arguments correspond to the pushkey namespace and key to operate on and
187 187 the old and new values for that key.
188 188
189 189 Returns a string with the peer result. The value inside varies by the
190 190 namespace.
191 191 """
192 192
193 193 def stream_out():
194 194 """Obtain streaming clone data.
195 195
196 196 Successful result should be a generator of data chunks.
197 197 """
198 198
199 199 def unbundle(bundle, heads, url):
200 200 """Transfer repository data to the peer.
201 201
202 202 This is how the bulk of data during a push is transferred.
203 203
204 204 Returns the integer number of heads added to the peer.
205 205 """
206 206
207 207
208 208 class ipeerlegacycommands(interfaceutil.Interface):
209 209 """Interface for implementing support for legacy wire protocol commands.
210 210
211 211 Wire protocol commands transition to legacy status when they are no longer
212 212 used by modern clients. To facilitate identifying which commands are
213 213 legacy, the interfaces are split.
214 214 """
215 215
216 216 def between(pairs):
217 217 """Obtain nodes between pairs of nodes.
218 218
219 219 ``pairs`` is an iterable of node pairs.
220 220
221 221 Returns an iterable of iterables of nodes corresponding to each
222 222 requested pair.
223 223 """
224 224
225 225 def branches(nodes):
226 226 """Obtain ancestor changesets of specific nodes back to a branch point.
227 227
228 228 For each requested node, the peer finds the first ancestor node that is
229 229 a DAG root or is a merge.
230 230
231 231 Returns an iterable of iterables with the resolved values for each node.
232 232 """
233 233
234 234 def changegroup(nodes, source):
235 235 """Obtain a changegroup with data for descendants of specified nodes."""
236 236
237 237 def changegroupsubset(bases, heads, source):
238 238 pass
239 239
240 240
241 241 class ipeercommandexecutor(interfaceutil.Interface):
242 242 """Represents a mechanism to execute remote commands.
243 243
244 244 This is the primary interface for requesting that wire protocol commands
245 245 be executed. Instances of this interface are active in a context manager
246 246 and have a well-defined lifetime. When the context manager exits, all
247 247 outstanding requests are waited on.
248 248 """
249 249
250 250 def callcommand(name, args):
251 251 """Request that a named command be executed.
252 252
253 253 Receives the command name and a dictionary of command arguments.
254 254
255 255 Returns a ``concurrent.futures.Future`` that will resolve to the
256 256 result of that command request. That exact value is left up to
257 257 the implementation and possibly varies by command.
258 258
259 259 Not all commands can coexist with other commands in an executor
260 260 instance: it depends on the underlying wire protocol transport being
261 261 used and the command itself.
262 262
263 263 Implementations MAY call ``sendcommands()`` automatically if the
264 264 requested command can not coexist with other commands in this executor.
265 265
266 266 Implementations MAY call ``sendcommands()`` automatically when the
267 267 future's ``result()`` is called. So, consumers using multiple
268 268 commands with an executor MUST ensure that ``result()`` is not called
269 269 until all command requests have been issued.
270 270 """
271 271
272 272 def sendcommands():
273 273 """Trigger submission of queued command requests.
274 274
275 275 Not all transports submit commands as soon as they are requested to
276 276 run. When called, this method forces queued command requests to be
277 277 issued. It will no-op if all commands have already been sent.
278 278
279 279 When called, no more new commands may be issued with this executor.
280 280 """
281 281
282 282 def close():
283 283 """Signal that this command request is finished.
284 284
285 285 When called, no more new commands may be issued. All outstanding
286 286 commands that have previously been issued are waited on before
287 287 returning. This not only includes waiting for the futures to resolve,
288 288 but also waiting for all response data to arrive. In other words,
289 289 calling this waits for all on-wire state for issued command requests
290 290 to finish.
291 291
292 292 When used as a context manager, this method is called when exiting the
293 293 context manager.
294 294
295 295 This method may call ``sendcommands()`` if there are buffered commands.
296 296 """
297 297
298 298
299 299 class ipeerrequests(interfaceutil.Interface):
300 300 """Interface for executing commands on a peer."""
301 301
302 302 limitedarguments = interfaceutil.Attribute(
303 303 """True if the peer cannot receive large argument value for commands."""
304 304 )
305 305
306 306 def commandexecutor():
307 307 """A context manager that resolves to an ipeercommandexecutor.
308 308
309 309 The object this resolves to can be used to issue command requests
310 310 to the peer.
311 311
312 312 Callers should call its ``callcommand`` method to issue command
313 313 requests.
314 314
315 315 A new executor should be obtained for each distinct set of commands
316 316 (possibly just a single command) that the consumer wants to execute
317 317 as part of a single operation or round trip. This is because some
318 318 peers are half-duplex and/or don't support persistent connections.
319 319 e.g. in the case of HTTP peers, commands sent to an executor represent
320 320 a single HTTP request. While some peers may support multiple command
321 321 sends over the wire per executor, consumers need to code to the least
322 322 capable peer. So it should be assumed that command executors buffer
323 323 called commands until they are told to send them and that each
324 324 command executor could result in a new connection or wire-level request
325 325 being issued.
326 326 """
327 327
328 328
329 329 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
330 330 """Unified interface for peer repositories.
331 331
332 332 All peer instances must conform to this interface.
333 333 """
334 334
335 335
336 336 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
337 337 """Unified peer interface for wire protocol version 2 peers."""
338 338
339 339 apidescriptor = interfaceutil.Attribute(
340 340 """Data structure holding description of server API."""
341 341 )
342 342
343 343
344 344 @interfaceutil.implementer(ipeerbase)
345 345 class peer(object):
346 346 """Base class for peer repositories."""
347 347
348 348 limitedarguments = False
349 349
350 350 def capable(self, name):
351 351 caps = self.capabilities()
352 352 if name in caps:
353 353 return True
354 354
355 355 name = b'%s=' % name
356 356 for cap in caps:
357 357 if cap.startswith(name):
358 358 return cap[len(name) :]
359 359
360 360 return False
361 361
362 362 def requirecap(self, name, purpose):
363 363 if self.capable(name):
364 364 return
365 365
366 366 raise error.CapabilityError(
367 367 _(
368 368 b'cannot %s; remote repository does not support the '
369 369 b'\'%s\' capability'
370 370 )
371 371 % (purpose, name)
372 372 )
373 373
374 374
375 375 class iverifyproblem(interfaceutil.Interface):
376 376 """Represents a problem with the integrity of the repository.
377 377
378 378 Instances of this interface are emitted to describe an integrity issue
379 379 with a repository (e.g. corrupt storage, missing data, etc).
380 380
381 381 Instances are essentially messages associated with severity.
382 382 """
383 383
384 384 warning = interfaceutil.Attribute(
385 385 """Message indicating a non-fatal problem."""
386 386 )
387 387
388 388 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
389 389
390 390 node = interfaceutil.Attribute(
391 391 """Revision encountering the problem.
392 392
393 393 ``None`` means the problem doesn't apply to a single revision.
394 394 """
395 395 )
396 396
397 397
398 398 class irevisiondelta(interfaceutil.Interface):
399 399 """Represents a delta between one revision and another.
400 400
401 401 Instances convey enough information to allow a revision to be exchanged
402 402 with another repository.
403 403
404 404 Instances represent the fulltext revision data or a delta against
405 405 another revision. Therefore the ``revision`` and ``delta`` attributes
406 406 are mutually exclusive.
407 407
408 408 Typically used for changegroup generation.
409 409 """
410 410
411 411 node = interfaceutil.Attribute("""20 byte node of this revision.""")
412 412
413 413 p1node = interfaceutil.Attribute(
414 414 """20 byte node of 1st parent of this revision."""
415 415 )
416 416
417 417 p2node = interfaceutil.Attribute(
418 418 """20 byte node of 2nd parent of this revision."""
419 419 )
420 420
421 421 linknode = interfaceutil.Attribute(
422 422 """20 byte node of the changelog revision this node is linked to."""
423 423 )
424 424
425 425 flags = interfaceutil.Attribute(
426 426 """2 bytes of integer flags that apply to this revision.
427 427
428 428 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
429 429 """
430 430 )
431 431
432 432 basenode = interfaceutil.Attribute(
433 433 """20 byte node of the revision this data is a delta against.
434 434
435 435 ``nullid`` indicates that the revision is a full revision and not
436 436 a delta.
437 437 """
438 438 )
439 439
440 440 baserevisionsize = interfaceutil.Attribute(
441 441 """Size of base revision this delta is against.
442 442
443 443 May be ``None`` if ``basenode`` is ``nullid``.
444 444 """
445 445 )
446 446
447 447 revision = interfaceutil.Attribute(
448 448 """Raw fulltext of revision data for this node."""
449 449 )
450 450
451 451 delta = interfaceutil.Attribute(
452 452 """Delta between ``basenode`` and ``node``.
453 453
454 454 Stored in the bdiff delta format.
455 455 """
456 456 )
457 457
458 458
459 459 class ifilerevisionssequence(interfaceutil.Interface):
460 460 """Contains index data for all revisions of a file.
461 461
462 462 Types implementing this behave like lists of tuples. The index
463 463 in the list corresponds to the revision number. The values contain
464 464 index metadata.
465 465
466 466 The *null* revision (revision number -1) is always the last item
467 467 in the index.
468 468 """
469 469
470 470 def __len__():
471 471 """The total number of revisions."""
472 472
473 473 def __getitem__(rev):
474 474 """Returns the object having a specific revision number.
475 475
476 476 Returns an 8-tuple with the following fields:
477 477
478 478 offset+flags
479 479 Contains the offset and flags for the revision. 64-bit unsigned
480 480 integer where first 6 bytes are the offset and the next 2 bytes
481 481 are flags. The offset can be 0 if it is not used by the store.
482 482 compressed size
483 483 Size of the revision data in the store. It can be 0 if it isn't
484 484 needed by the store.
485 485 uncompressed size
486 486 Fulltext size. It can be 0 if it isn't needed by the store.
487 487 base revision
488 488 Revision number of revision the delta for storage is encoded
489 489 against. -1 indicates not encoded against a base revision.
490 490 link revision
491 491 Revision number of changelog revision this entry is related to.
492 492 p1 revision
493 493 Revision number of 1st parent. -1 if no 1st parent.
494 494 p2 revision
495 495 Revision number of 2nd parent. -1 if no 1st parent.
496 496 node
497 497 Binary node value for this revision number.
498 498
499 499 Negative values should index off the end of the sequence. ``-1``
500 500 should return the null revision. ``-2`` should return the most
501 501 recent revision.
502 502 """
503 503
504 504 def __contains__(rev):
505 505 """Whether a revision number exists."""
506 506
507 507 def insert(self, i, entry):
508 508 """Add an item to the index at specific revision."""
509 509
510 510
511 511 class ifileindex(interfaceutil.Interface):
512 512 """Storage interface for index data of a single file.
513 513
514 514 File storage data is divided into index metadata and data storage.
515 515 This interface defines the index portion of the interface.
516 516
517 517 The index logically consists of:
518 518
519 519 * A mapping between revision numbers and nodes.
520 520 * DAG data (storing and querying the relationship between nodes).
521 521 * Metadata to facilitate storage.
522 522 """
523 523
524 524 def __len__():
525 525 """Obtain the number of revisions stored for this file."""
526 526
527 527 def __iter__():
528 528 """Iterate over revision numbers for this file."""
529 529
530 530 def hasnode(node):
531 531 """Returns a bool indicating if a node is known to this store.
532 532
533 533 Implementations must only return True for full, binary node values:
534 534 hex nodes, revision numbers, and partial node matches must be
535 535 rejected.
536 536
537 537 The null node is never present.
538 538 """
539 539
540 540 def revs(start=0, stop=None):
541 541 """Iterate over revision numbers for this file, with control."""
542 542
543 543 def parents(node):
544 544 """Returns a 2-tuple of parent nodes for a revision.
545 545
546 546 Values will be ``nullid`` if the parent is empty.
547 547 """
548 548
549 549 def parentrevs(rev):
550 550 """Like parents() but operates on revision numbers."""
551 551
552 552 def rev(node):
553 553 """Obtain the revision number given a node.
554 554
555 555 Raises ``error.LookupError`` if the node is not known.
556 556 """
557 557
558 558 def node(rev):
559 559 """Obtain the node value given a revision number.
560 560
561 561 Raises ``IndexError`` if the node is not known.
562 562 """
563 563
564 564 def lookup(node):
565 565 """Attempt to resolve a value to a node.
566 566
567 567 Value can be a binary node, hex node, revision number, or a string
568 568 that can be converted to an integer.
569 569
570 570 Raises ``error.LookupError`` if a node could not be resolved.
571 571 """
572 572
573 573 def linkrev(rev):
574 574 """Obtain the changeset revision number a revision is linked to."""
575 575
576 576 def iscensored(rev):
577 577 """Return whether a revision's content has been censored."""
578 578
579 579 def commonancestorsheads(node1, node2):
580 580 """Obtain an iterable of nodes containing heads of common ancestors.
581 581
582 582 See ``ancestor.commonancestorsheads()``.
583 583 """
584 584
585 585 def descendants(revs):
586 586 """Obtain descendant revision numbers for a set of revision numbers.
587 587
588 588 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
589 589 """
590 590
591 591 def heads(start=None, stop=None):
592 592 """Obtain a list of nodes that are DAG heads, with control.
593 593
594 594 The set of revisions examined can be limited by specifying
595 595 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
596 596 iterable of nodes. DAG traversal starts at earlier revision
597 597 ``start`` and iterates forward until any node in ``stop`` is
598 598 encountered.
599 599 """
600 600
601 601 def children(node):
602 602 """Obtain nodes that are children of a node.
603 603
604 604 Returns a list of nodes.
605 605 """
606 606
607 607
608 608 class ifiledata(interfaceutil.Interface):
609 609 """Storage interface for data storage of a specific file.
610 610
611 611 This complements ``ifileindex`` and provides an interface for accessing
612 612 data for a tracked file.
613 613 """
614 614
615 615 def size(rev):
616 616 """Obtain the fulltext size of file data.
617 617
618 618 Any metadata is excluded from size measurements.
619 619 """
620 620
621 621 def revision(node, raw=False):
622 622 """"Obtain fulltext data for a node.
623 623
624 624 By default, any storage transformations are applied before the data
625 625 is returned. If ``raw`` is True, non-raw storage transformations
626 626 are not applied.
627 627
628 628 The fulltext data may contain a header containing metadata. Most
629 629 consumers should use ``read()`` to obtain the actual file data.
630 630 """
631 631
632 632 def rawdata(node):
633 633 """Obtain raw data for a node.
634 634 """
635 635
636 636 def read(node):
637 637 """Resolve file fulltext data.
638 638
639 639 This is similar to ``revision()`` except any metadata in the data
640 640 headers is stripped.
641 641 """
642 642
643 643 def renamed(node):
644 644 """Obtain copy metadata for a node.
645 645
646 646 Returns ``False`` if no copy metadata is stored or a 2-tuple of
647 647 (path, node) from which this revision was copied.
648 648 """
649 649
650 650 def cmp(node, fulltext):
651 651 """Compare fulltext to another revision.
652 652
653 653 Returns True if the fulltext is different from what is stored.
654 654
655 655 This takes copy metadata into account.
656 656
657 657 TODO better document the copy metadata and censoring logic.
658 658 """
659 659
660 660 def emitrevisions(
661 661 nodes,
662 662 nodesorder=None,
663 663 revisiondata=False,
664 664 assumehaveparentrevisions=False,
665 665 deltamode=CG_DELTAMODE_STD,
666 666 ):
667 667 """Produce ``irevisiondelta`` for revisions.
668 668
669 669 Given an iterable of nodes, emits objects conforming to the
670 670 ``irevisiondelta`` interface that describe revisions in storage.
671 671
672 672 This method is a generator.
673 673
674 674 The input nodes may be unordered. Implementations must ensure that a
675 675 node's parents are emitted before the node itself. Transitively, this
676 676 means that a node may only be emitted once all its ancestors in
677 677 ``nodes`` have also been emitted.
678 678
679 679 By default, emits "index" data (the ``node``, ``p1node``, and
680 680 ``p2node`` attributes). If ``revisiondata`` is set, revision data
681 681 will also be present on the emitted objects.
682 682
683 683 With default argument values, implementations can choose to emit
684 684 either fulltext revision data or a delta. When emitting deltas,
685 685 implementations must consider whether the delta's base revision
686 686 fulltext is available to the receiver.
687 687
688 688 The base revision fulltext is guaranteed to be available if any of
689 689 the following are met:
690 690
691 691 * Its fulltext revision was emitted by this method call.
692 692 * A delta for that revision was emitted by this method call.
693 693 * ``assumehaveparentrevisions`` is True and the base revision is a
694 694 parent of the node.
695 695
696 696 ``nodesorder`` can be used to control the order that revisions are
697 697 emitted. By default, revisions can be reordered as long as they are
698 698 in DAG topological order (see above). If the value is ``nodes``,
699 699 the iteration order from ``nodes`` should be used. If the value is
700 700 ``storage``, then the native order from the backing storage layer
701 701 is used. (Not all storage layers will have strong ordering and behavior
702 702 of this mode is storage-dependent.) ``nodes`` ordering can force
703 703 revisions to be emitted before their ancestors, so consumers should
704 704 use it with care.
705 705
706 706 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
707 707 be set and it is the caller's responsibility to resolve it, if needed.
708 708
709 709 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
710 710 all revision data should be emitted as deltas against the revision
711 711 emitted just prior. The initial revision should be a delta against its
712 712 1st parent.
713 713 """
714 714
715 715
716 716 class ifilemutation(interfaceutil.Interface):
717 717 """Storage interface for mutation events of a tracked file."""
718 718
719 719 def add(filedata, meta, transaction, linkrev, p1, p2):
720 720 """Add a new revision to the store.
721 721
722 722 Takes file data, dictionary of metadata, a transaction, linkrev,
723 723 and parent nodes.
724 724
725 725 Returns the node that was added.
726 726
727 727 May no-op if a revision matching the supplied data is already stored.
728 728 """
729 729
730 730 def addrevision(
731 731 revisiondata,
732 732 transaction,
733 733 linkrev,
734 734 p1,
735 735 p2,
736 736 node=None,
737 737 flags=0,
738 738 cachedelta=None,
739 739 ):
740 740 """Add a new revision to the store.
741 741
742 742 This is similar to ``add()`` except it operates at a lower level.
743 743
744 744 The data passed in already contains a metadata header, if any.
745 745
746 746 ``node`` and ``flags`` can be used to define the expected node and
747 747 the flags to use with storage. ``flags`` is a bitwise value composed
748 748 of the various ``REVISION_FLAG_*`` constants.
749 749
750 750 ``add()`` is usually called when adding files from e.g. the working
751 751 directory. ``addrevision()`` is often called by ``add()`` and for
752 752 scenarios where revision data has already been computed, such as when
753 753 applying raw data from a peer repo.
754 754 """
755 755
756 756 def addgroup(
757 757 deltas,
758 758 linkmapper,
759 759 transaction,
760 760 addrevisioncb=None,
761 761 maybemissingparents=False,
762 762 ):
763 763 """Process a series of deltas for storage.
764 764
765 765 ``deltas`` is an iterable of 7-tuples of
766 766 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
767 767 to add.
768 768
769 769 The ``delta`` field contains ``mpatch`` data to apply to a base
770 770 revision, identified by ``deltabase``. The base node can be
771 771 ``nullid``, in which case the header from the delta can be ignored
772 772 and the delta used as the fulltext.
773 773
774 774 ``addrevisioncb`` should be called for each node as it is committed.
775 775
776 776 ``maybemissingparents`` is a bool indicating whether the incoming
777 777 data may reference parents/ancestor revisions that aren't present.
778 778 This flag is set when receiving data into a "shallow" store that
779 779 doesn't hold all history.
780 780
781 781 Returns a list of nodes that were processed. A node will be in the list
782 782 even if it existed in the store previously.
783 783 """
784 784
785 785 def censorrevision(tr, node, tombstone=b''):
786 786 """Remove the content of a single revision.
787 787
788 788 The specified ``node`` will have its content purged from storage.
789 789 Future attempts to access the revision data for this node will
790 790 result in failure.
791 791
792 792 A ``tombstone`` message can optionally be stored. This message may be
793 793 displayed to users when they attempt to access the missing revision
794 794 data.
795 795
796 796 Storage backends may have stored deltas against the previous content
797 797 in this revision. As part of censoring a revision, these storage
798 798 backends are expected to rewrite any internally stored deltas such
799 799 that they no longer reference the deleted content.
800 800 """
801 801
802 802 def getstrippoint(minlink):
803 803 """Find the minimum revision that must be stripped to strip a linkrev.
804 804
805 805 Returns a 2-tuple containing the minimum revision number and a set
806 806 of all revisions numbers that would be broken by this strip.
807 807
808 808 TODO this is highly revlog centric and should be abstracted into
809 809 a higher-level deletion API. ``repair.strip()`` relies on this.
810 810 """
811 811
812 812 def strip(minlink, transaction):
813 813 """Remove storage of items starting at a linkrev.
814 814
815 815 This uses ``getstrippoint()`` to determine the first node to remove.
816 816 Then it effectively truncates storage for all revisions after that.
817 817
818 818 TODO this is highly revlog centric and should be abstracted into a
819 819 higher-level deletion API.
820 820 """
821 821
822 822
823 823 class ifilestorage(ifileindex, ifiledata, ifilemutation):
824 824 """Complete storage interface for a single tracked file."""
825 825
826 826 def files():
827 827 """Obtain paths that are backing storage for this file.
828 828
829 829 TODO this is used heavily by verify code and there should probably
830 830 be a better API for that.
831 831 """
832 832
833 833 def storageinfo(
834 834 exclusivefiles=False,
835 835 sharedfiles=False,
836 836 revisionscount=False,
837 837 trackedsize=False,
838 838 storedsize=False,
839 839 ):
840 840 """Obtain information about storage for this file's data.
841 841
842 842 Returns a dict describing storage for this tracked path. The keys
843 843 in the dict map to arguments of the same. The arguments are bools
844 844 indicating whether to calculate and obtain that data.
845 845
846 846 exclusivefiles
847 847 Iterable of (vfs, path) describing files that are exclusively
848 848 used to back storage for this tracked path.
849 849
850 850 sharedfiles
851 851 Iterable of (vfs, path) describing files that are used to back
852 852 storage for this tracked path. Those files may also provide storage
853 853 for other stored entities.
854 854
855 855 revisionscount
856 856 Number of revisions available for retrieval.
857 857
858 858 trackedsize
859 859 Total size in bytes of all tracked revisions. This is a sum of the
860 860 length of the fulltext of all revisions.
861 861
862 862 storedsize
863 863 Total size in bytes used to store data for all tracked revisions.
864 864 This is commonly less than ``trackedsize`` due to internal usage
865 865 of deltas rather than fulltext revisions.
866 866
867 867 Not all storage backends may support all queries are have a reasonable
868 868 value to use. In that case, the value should be set to ``None`` and
869 869 callers are expected to handle this special value.
870 870 """
871 871
872 872 def verifyintegrity(state):
873 873 """Verifies the integrity of file storage.
874 874
875 875 ``state`` is a dict holding state of the verifier process. It can be
876 876 used to communicate data between invocations of multiple storage
877 877 primitives.
878 878
879 879 If individual revisions cannot have their revision content resolved,
880 880 the method is expected to set the ``skipread`` key to a set of nodes
881 that encountered problems.
881 that encountered problems. If set, the method can also add the node(s)
882 to ``safe_renamed`` in order to indicate nodes that may perform the
883 rename checks with currently accessible data.
882 884
883 885 The method yields objects conforming to the ``iverifyproblem``
884 886 interface.
885 887 """
886 888
887 889
888 890 class idirs(interfaceutil.Interface):
889 891 """Interface representing a collection of directories from paths.
890 892
891 893 This interface is essentially a derived data structure representing
892 894 directories from a collection of paths.
893 895 """
894 896
895 897 def addpath(path):
896 898 """Add a path to the collection.
897 899
898 900 All directories in the path will be added to the collection.
899 901 """
900 902
901 903 def delpath(path):
902 904 """Remove a path from the collection.
903 905
904 906 If the removal was the last path in a particular directory, the
905 907 directory is removed from the collection.
906 908 """
907 909
908 910 def __iter__():
909 911 """Iterate over the directories in this collection of paths."""
910 912
911 913 def __contains__(path):
912 914 """Whether a specific directory is in this collection."""
913 915
914 916
915 917 class imanifestdict(interfaceutil.Interface):
916 918 """Interface representing a manifest data structure.
917 919
918 920 A manifest is effectively a dict mapping paths to entries. Each entry
919 921 consists of a binary node and extra flags affecting that entry.
920 922 """
921 923
922 924 def __getitem__(path):
923 925 """Returns the binary node value for a path in the manifest.
924 926
925 927 Raises ``KeyError`` if the path does not exist in the manifest.
926 928
927 929 Equivalent to ``self.find(path)[0]``.
928 930 """
929 931
930 932 def find(path):
931 933 """Returns the entry for a path in the manifest.
932 934
933 935 Returns a 2-tuple of (node, flags).
934 936
935 937 Raises ``KeyError`` if the path does not exist in the manifest.
936 938 """
937 939
938 940 def __len__():
939 941 """Return the number of entries in the manifest."""
940 942
941 943 def __nonzero__():
942 944 """Returns True if the manifest has entries, False otherwise."""
943 945
944 946 __bool__ = __nonzero__
945 947
946 948 def __setitem__(path, node):
947 949 """Define the node value for a path in the manifest.
948 950
949 951 If the path is already in the manifest, its flags will be copied to
950 952 the new entry.
951 953 """
952 954
953 955 def __contains__(path):
954 956 """Whether a path exists in the manifest."""
955 957
956 958 def __delitem__(path):
957 959 """Remove a path from the manifest.
958 960
959 961 Raises ``KeyError`` if the path is not in the manifest.
960 962 """
961 963
962 964 def __iter__():
963 965 """Iterate over paths in the manifest."""
964 966
965 967 def iterkeys():
966 968 """Iterate over paths in the manifest."""
967 969
968 970 def keys():
969 971 """Obtain a list of paths in the manifest."""
970 972
971 973 def filesnotin(other, match=None):
972 974 """Obtain the set of paths in this manifest but not in another.
973 975
974 976 ``match`` is an optional matcher function to be applied to both
975 977 manifests.
976 978
977 979 Returns a set of paths.
978 980 """
979 981
980 982 def dirs():
981 983 """Returns an object implementing the ``idirs`` interface."""
982 984
983 985 def hasdir(dir):
984 986 """Returns a bool indicating if a directory is in this manifest."""
985 987
986 988 def matches(match):
987 989 """Generate a new manifest filtered through a matcher.
988 990
989 991 Returns an object conforming to the ``imanifestdict`` interface.
990 992 """
991 993
992 994 def walk(match):
993 995 """Generator of paths in manifest satisfying a matcher.
994 996
995 997 This is equivalent to ``self.matches(match).iterkeys()`` except a new
996 998 manifest object is not created.
997 999
998 1000 If the matcher has explicit files listed and they don't exist in
999 1001 the manifest, ``match.bad()`` is called for each missing file.
1000 1002 """
1001 1003
1002 1004 def diff(other, match=None, clean=False):
1003 1005 """Find differences between this manifest and another.
1004 1006
1005 1007 This manifest is compared to ``other``.
1006 1008
1007 1009 If ``match`` is provided, the two manifests are filtered against this
1008 1010 matcher and only entries satisfying the matcher are compared.
1009 1011
1010 1012 If ``clean`` is True, unchanged files are included in the returned
1011 1013 object.
1012 1014
1013 1015 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1014 1016 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1015 1017 represents the node and flags for this manifest and ``(node2, flag2)``
1016 1018 are the same for the other manifest.
1017 1019 """
1018 1020
1019 1021 def setflag(path, flag):
1020 1022 """Set the flag value for a given path.
1021 1023
1022 1024 Raises ``KeyError`` if the path is not already in the manifest.
1023 1025 """
1024 1026
1025 1027 def get(path, default=None):
1026 1028 """Obtain the node value for a path or a default value if missing."""
1027 1029
1028 1030 def flags(path, default=b''):
1029 1031 """Return the flags value for a path or a default value if missing."""
1030 1032
1031 1033 def copy():
1032 1034 """Return a copy of this manifest."""
1033 1035
1034 1036 def items():
1035 1037 """Returns an iterable of (path, node) for items in this manifest."""
1036 1038
1037 1039 def iteritems():
1038 1040 """Identical to items()."""
1039 1041
1040 1042 def iterentries():
1041 1043 """Returns an iterable of (path, node, flags) for this manifest.
1042 1044
1043 1045 Similar to ``iteritems()`` except items are a 3-tuple and include
1044 1046 flags.
1045 1047 """
1046 1048
1047 1049 def text():
1048 1050 """Obtain the raw data representation for this manifest.
1049 1051
1050 1052 Result is used to create a manifest revision.
1051 1053 """
1052 1054
1053 1055 def fastdelta(base, changes):
1054 1056 """Obtain a delta between this manifest and another given changes.
1055 1057
1056 1058 ``base`` in the raw data representation for another manifest.
1057 1059
1058 1060 ``changes`` is an iterable of ``(path, to_delete)``.
1059 1061
1060 1062 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1061 1063 delta between ``base`` and this manifest.
1062 1064 """
1063 1065
1064 1066
1065 1067 class imanifestrevisionbase(interfaceutil.Interface):
1066 1068 """Base interface representing a single revision of a manifest.
1067 1069
1068 1070 Should not be used as a primary interface: should always be inherited
1069 1071 as part of a larger interface.
1070 1072 """
1071 1073
1072 1074 def new():
1073 1075 """Obtain a new manifest instance.
1074 1076
1075 1077 Returns an object conforming to the ``imanifestrevisionwritable``
1076 1078 interface. The instance will be associated with the same
1077 1079 ``imanifestlog`` collection as this instance.
1078 1080 """
1079 1081
1080 1082 def copy():
1081 1083 """Obtain a copy of this manifest instance.
1082 1084
1083 1085 Returns an object conforming to the ``imanifestrevisionwritable``
1084 1086 interface. The instance will be associated with the same
1085 1087 ``imanifestlog`` collection as this instance.
1086 1088 """
1087 1089
1088 1090 def read():
1089 1091 """Obtain the parsed manifest data structure.
1090 1092
1091 1093 The returned object conforms to the ``imanifestdict`` interface.
1092 1094 """
1093 1095
1094 1096
1095 1097 class imanifestrevisionstored(imanifestrevisionbase):
1096 1098 """Interface representing a manifest revision committed to storage."""
1097 1099
1098 1100 def node():
1099 1101 """The binary node for this manifest."""
1100 1102
1101 1103 parents = interfaceutil.Attribute(
1102 1104 """List of binary nodes that are parents for this manifest revision."""
1103 1105 )
1104 1106
1105 1107 def readdelta(shallow=False):
1106 1108 """Obtain the manifest data structure representing changes from parent.
1107 1109
1108 1110 This manifest is compared to its 1st parent. A new manifest representing
1109 1111 those differences is constructed.
1110 1112
1111 1113 The returned object conforms to the ``imanifestdict`` interface.
1112 1114 """
1113 1115
1114 1116 def readfast(shallow=False):
1115 1117 """Calls either ``read()`` or ``readdelta()``.
1116 1118
1117 1119 The faster of the two options is called.
1118 1120 """
1119 1121
1120 1122 def find(key):
1121 1123 """Calls self.read().find(key)``.
1122 1124
1123 1125 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1124 1126 """
1125 1127
1126 1128
1127 1129 class imanifestrevisionwritable(imanifestrevisionbase):
1128 1130 """Interface representing a manifest revision that can be committed."""
1129 1131
1130 1132 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1131 1133 """Add this revision to storage.
1132 1134
1133 1135 Takes a transaction object, the changeset revision number it will
1134 1136 be associated with, its parent nodes, and lists of added and
1135 1137 removed paths.
1136 1138
1137 1139 If match is provided, storage can choose not to inspect or write out
1138 1140 items that do not match. Storage is still required to be able to provide
1139 1141 the full manifest in the future for any directories written (these
1140 1142 manifests should not be "narrowed on disk").
1141 1143
1142 1144 Returns the binary node of the created revision.
1143 1145 """
1144 1146
1145 1147
1146 1148 class imanifeststorage(interfaceutil.Interface):
1147 1149 """Storage interface for manifest data."""
1148 1150
1149 1151 tree = interfaceutil.Attribute(
1150 1152 """The path to the directory this manifest tracks.
1151 1153
1152 1154 The empty bytestring represents the root manifest.
1153 1155 """
1154 1156 )
1155 1157
1156 1158 index = interfaceutil.Attribute(
1157 1159 """An ``ifilerevisionssequence`` instance."""
1158 1160 )
1159 1161
1160 1162 indexfile = interfaceutil.Attribute(
1161 1163 """Path of revlog index file.
1162 1164
1163 1165 TODO this is revlog specific and should not be exposed.
1164 1166 """
1165 1167 )
1166 1168
1167 1169 opener = interfaceutil.Attribute(
1168 1170 """VFS opener to use to access underlying files used for storage.
1169 1171
1170 1172 TODO this is revlog specific and should not be exposed.
1171 1173 """
1172 1174 )
1173 1175
1174 1176 version = interfaceutil.Attribute(
1175 1177 """Revlog version number.
1176 1178
1177 1179 TODO this is revlog specific and should not be exposed.
1178 1180 """
1179 1181 )
1180 1182
1181 1183 _generaldelta = interfaceutil.Attribute(
1182 1184 """Whether generaldelta storage is being used.
1183 1185
1184 1186 TODO this is revlog specific and should not be exposed.
1185 1187 """
1186 1188 )
1187 1189
1188 1190 fulltextcache = interfaceutil.Attribute(
1189 1191 """Dict with cache of fulltexts.
1190 1192
1191 1193 TODO this doesn't feel appropriate for the storage interface.
1192 1194 """
1193 1195 )
1194 1196
1195 1197 def __len__():
1196 1198 """Obtain the number of revisions stored for this manifest."""
1197 1199
1198 1200 def __iter__():
1199 1201 """Iterate over revision numbers for this manifest."""
1200 1202
1201 1203 def rev(node):
1202 1204 """Obtain the revision number given a binary node.
1203 1205
1204 1206 Raises ``error.LookupError`` if the node is not known.
1205 1207 """
1206 1208
1207 1209 def node(rev):
1208 1210 """Obtain the node value given a revision number.
1209 1211
1210 1212 Raises ``error.LookupError`` if the revision is not known.
1211 1213 """
1212 1214
1213 1215 def lookup(value):
1214 1216 """Attempt to resolve a value to a node.
1215 1217
1216 1218 Value can be a binary node, hex node, revision number, or a bytes
1217 1219 that can be converted to an integer.
1218 1220
1219 1221 Raises ``error.LookupError`` if a ndoe could not be resolved.
1220 1222 """
1221 1223
1222 1224 def parents(node):
1223 1225 """Returns a 2-tuple of parent nodes for a node.
1224 1226
1225 1227 Values will be ``nullid`` if the parent is empty.
1226 1228 """
1227 1229
1228 1230 def parentrevs(rev):
1229 1231 """Like parents() but operates on revision numbers."""
1230 1232
1231 1233 def linkrev(rev):
1232 1234 """Obtain the changeset revision number a revision is linked to."""
1233 1235
1234 1236 def revision(node, _df=None, raw=False):
1235 1237 """Obtain fulltext data for a node."""
1236 1238
1237 1239 def rawdata(node, _df=None):
1238 1240 """Obtain raw data for a node."""
1239 1241
1240 1242 def revdiff(rev1, rev2):
1241 1243 """Obtain a delta between two revision numbers.
1242 1244
1243 1245 The returned data is the result of ``bdiff.bdiff()`` on the raw
1244 1246 revision data.
1245 1247 """
1246 1248
1247 1249 def cmp(node, fulltext):
1248 1250 """Compare fulltext to another revision.
1249 1251
1250 1252 Returns True if the fulltext is different from what is stored.
1251 1253 """
1252 1254
1253 1255 def emitrevisions(
1254 1256 nodes,
1255 1257 nodesorder=None,
1256 1258 revisiondata=False,
1257 1259 assumehaveparentrevisions=False,
1258 1260 ):
1259 1261 """Produce ``irevisiondelta`` describing revisions.
1260 1262
1261 1263 See the documentation for ``ifiledata`` for more.
1262 1264 """
1263 1265
1264 1266 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1265 1267 """Process a series of deltas for storage.
1266 1268
1267 1269 See the documentation in ``ifilemutation`` for more.
1268 1270 """
1269 1271
1270 1272 def rawsize(rev):
1271 1273 """Obtain the size of tracked data.
1272 1274
1273 1275 Is equivalent to ``len(m.rawdata(node))``.
1274 1276
1275 1277 TODO this method is only used by upgrade code and may be removed.
1276 1278 """
1277 1279
1278 1280 def getstrippoint(minlink):
1279 1281 """Find minimum revision that must be stripped to strip a linkrev.
1280 1282
1281 1283 See the documentation in ``ifilemutation`` for more.
1282 1284 """
1283 1285
1284 1286 def strip(minlink, transaction):
1285 1287 """Remove storage of items starting at a linkrev.
1286 1288
1287 1289 See the documentation in ``ifilemutation`` for more.
1288 1290 """
1289 1291
1290 1292 def checksize():
1291 1293 """Obtain the expected sizes of backing files.
1292 1294
1293 1295 TODO this is used by verify and it should not be part of the interface.
1294 1296 """
1295 1297
1296 1298 def files():
1297 1299 """Obtain paths that are backing storage for this manifest.
1298 1300
1299 1301 TODO this is used by verify and there should probably be a better API
1300 1302 for this functionality.
1301 1303 """
1302 1304
1303 1305 def deltaparent(rev):
1304 1306 """Obtain the revision that a revision is delta'd against.
1305 1307
1306 1308 TODO delta encoding is an implementation detail of storage and should
1307 1309 not be exposed to the storage interface.
1308 1310 """
1309 1311
1310 1312 def clone(tr, dest, **kwargs):
1311 1313 """Clone this instance to another."""
1312 1314
1313 1315 def clearcaches(clear_persisted_data=False):
1314 1316 """Clear any caches associated with this instance."""
1315 1317
1316 1318 def dirlog(d):
1317 1319 """Obtain a manifest storage instance for a tree."""
1318 1320
1319 1321 def add(
1320 1322 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1321 1323 ):
1322 1324 """Add a revision to storage.
1323 1325
1324 1326 ``m`` is an object conforming to ``imanifestdict``.
1325 1327
1326 1328 ``link`` is the linkrev revision number.
1327 1329
1328 1330 ``p1`` and ``p2`` are the parent revision numbers.
1329 1331
1330 1332 ``added`` and ``removed`` are iterables of added and removed paths,
1331 1333 respectively.
1332 1334
1333 1335 ``readtree`` is a function that can be used to read the child tree(s)
1334 1336 when recursively writing the full tree structure when using
1335 1337 treemanifets.
1336 1338
1337 1339 ``match`` is a matcher that can be used to hint to storage that not all
1338 1340 paths must be inspected; this is an optimization and can be safely
1339 1341 ignored. Note that the storage must still be able to reproduce a full
1340 1342 manifest including files that did not match.
1341 1343 """
1342 1344
1343 1345 def storageinfo(
1344 1346 exclusivefiles=False,
1345 1347 sharedfiles=False,
1346 1348 revisionscount=False,
1347 1349 trackedsize=False,
1348 1350 storedsize=False,
1349 1351 ):
1350 1352 """Obtain information about storage for this manifest's data.
1351 1353
1352 1354 See ``ifilestorage.storageinfo()`` for a description of this method.
1353 1355 This one behaves the same way, except for manifest data.
1354 1356 """
1355 1357
1356 1358
1357 1359 class imanifestlog(interfaceutil.Interface):
1358 1360 """Interface representing a collection of manifest snapshots.
1359 1361
1360 1362 Represents the root manifest in a repository.
1361 1363
1362 1364 Also serves as a means to access nested tree manifests and to cache
1363 1365 tree manifests.
1364 1366 """
1365 1367
1366 1368 def __getitem__(node):
1367 1369 """Obtain a manifest instance for a given binary node.
1368 1370
1369 1371 Equivalent to calling ``self.get('', node)``.
1370 1372
1371 1373 The returned object conforms to the ``imanifestrevisionstored``
1372 1374 interface.
1373 1375 """
1374 1376
1375 1377 def get(tree, node, verify=True):
1376 1378 """Retrieve the manifest instance for a given directory and binary node.
1377 1379
1378 1380 ``node`` always refers to the node of the root manifest (which will be
1379 1381 the only manifest if flat manifests are being used).
1380 1382
1381 1383 If ``tree`` is the empty string, the root manifest is returned.
1382 1384 Otherwise the manifest for the specified directory will be returned
1383 1385 (requires tree manifests).
1384 1386
1385 1387 If ``verify`` is True, ``LookupError`` is raised if the node is not
1386 1388 known.
1387 1389
1388 1390 The returned object conforms to the ``imanifestrevisionstored``
1389 1391 interface.
1390 1392 """
1391 1393
1392 1394 def getstorage(tree):
1393 1395 """Retrieve an interface to storage for a particular tree.
1394 1396
1395 1397 If ``tree`` is the empty bytestring, storage for the root manifest will
1396 1398 be returned. Otherwise storage for a tree manifest is returned.
1397 1399
1398 1400 TODO formalize interface for returned object.
1399 1401 """
1400 1402
1401 1403 def clearcaches():
1402 1404 """Clear caches associated with this collection."""
1403 1405
1404 1406 def rev(node):
1405 1407 """Obtain the revision number for a binary node.
1406 1408
1407 1409 Raises ``error.LookupError`` if the node is not known.
1408 1410 """
1409 1411
1410 1412
1411 1413 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1412 1414 """Local repository sub-interface providing access to tracked file storage.
1413 1415
1414 1416 This interface defines how a repository accesses storage for a single
1415 1417 tracked file path.
1416 1418 """
1417 1419
1418 1420 def file(f):
1419 1421 """Obtain a filelog for a tracked path.
1420 1422
1421 1423 The returned type conforms to the ``ifilestorage`` interface.
1422 1424 """
1423 1425
1424 1426
1425 1427 class ilocalrepositorymain(interfaceutil.Interface):
1426 1428 """Main interface for local repositories.
1427 1429
1428 1430 This currently captures the reality of things - not how things should be.
1429 1431 """
1430 1432
1431 1433 supportedformats = interfaceutil.Attribute(
1432 1434 """Set of requirements that apply to stream clone.
1433 1435
1434 1436 This is actually a class attribute and is shared among all instances.
1435 1437 """
1436 1438 )
1437 1439
1438 1440 supported = interfaceutil.Attribute(
1439 1441 """Set of requirements that this repo is capable of opening."""
1440 1442 )
1441 1443
1442 1444 requirements = interfaceutil.Attribute(
1443 1445 """Set of requirements this repo uses."""
1444 1446 )
1445 1447
1446 1448 features = interfaceutil.Attribute(
1447 1449 """Set of "features" this repository supports.
1448 1450
1449 1451 A "feature" is a loosely-defined term. It can refer to a feature
1450 1452 in the classical sense or can describe an implementation detail
1451 1453 of the repository. For example, a ``readonly`` feature may denote
1452 1454 the repository as read-only. Or a ``revlogfilestore`` feature may
1453 1455 denote that the repository is using revlogs for file storage.
1454 1456
1455 1457 The intent of features is to provide a machine-queryable mechanism
1456 1458 for repo consumers to test for various repository characteristics.
1457 1459
1458 1460 Features are similar to ``requirements``. The main difference is that
1459 1461 requirements are stored on-disk and represent requirements to open the
1460 1462 repository. Features are more run-time capabilities of the repository
1461 1463 and more granular capabilities (which may be derived from requirements).
1462 1464 """
1463 1465 )
1464 1466
1465 1467 filtername = interfaceutil.Attribute(
1466 1468 """Name of the repoview that is active on this repo."""
1467 1469 )
1468 1470
1469 1471 wvfs = interfaceutil.Attribute(
1470 1472 """VFS used to access the working directory."""
1471 1473 )
1472 1474
1473 1475 vfs = interfaceutil.Attribute(
1474 1476 """VFS rooted at the .hg directory.
1475 1477
1476 1478 Used to access repository data not in the store.
1477 1479 """
1478 1480 )
1479 1481
1480 1482 svfs = interfaceutil.Attribute(
1481 1483 """VFS rooted at the store.
1482 1484
1483 1485 Used to access repository data in the store. Typically .hg/store.
1484 1486 But can point elsewhere if the store is shared.
1485 1487 """
1486 1488 )
1487 1489
1488 1490 root = interfaceutil.Attribute(
1489 1491 """Path to the root of the working directory."""
1490 1492 )
1491 1493
1492 1494 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1493 1495
1494 1496 origroot = interfaceutil.Attribute(
1495 1497 """The filesystem path that was used to construct the repo."""
1496 1498 )
1497 1499
1498 1500 auditor = interfaceutil.Attribute(
1499 1501 """A pathauditor for the working directory.
1500 1502
1501 1503 This checks if a path refers to a nested repository.
1502 1504
1503 1505 Operates on the filesystem.
1504 1506 """
1505 1507 )
1506 1508
1507 1509 nofsauditor = interfaceutil.Attribute(
1508 1510 """A pathauditor for the working directory.
1509 1511
1510 1512 This is like ``auditor`` except it doesn't do filesystem checks.
1511 1513 """
1512 1514 )
1513 1515
1514 1516 baseui = interfaceutil.Attribute(
1515 1517 """Original ui instance passed into constructor."""
1516 1518 )
1517 1519
1518 1520 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1519 1521
1520 1522 sharedpath = interfaceutil.Attribute(
1521 1523 """Path to the .hg directory of the repo this repo was shared from."""
1522 1524 )
1523 1525
1524 1526 store = interfaceutil.Attribute("""A store instance.""")
1525 1527
1526 1528 spath = interfaceutil.Attribute("""Path to the store.""")
1527 1529
1528 1530 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1529 1531
1530 1532 cachevfs = interfaceutil.Attribute(
1531 1533 """A VFS used to access the cache directory.
1532 1534
1533 1535 Typically .hg/cache.
1534 1536 """
1535 1537 )
1536 1538
1537 1539 wcachevfs = interfaceutil.Attribute(
1538 1540 """A VFS used to access the cache directory dedicated to working copy
1539 1541
1540 1542 Typically .hg/wcache.
1541 1543 """
1542 1544 )
1543 1545
1544 1546 filteredrevcache = interfaceutil.Attribute(
1545 1547 """Holds sets of revisions to be filtered."""
1546 1548 )
1547 1549
1548 1550 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1549 1551
1550 1552 filecopiesmode = interfaceutil.Attribute(
1551 1553 """The way files copies should be dealt with in this repo."""
1552 1554 )
1553 1555
1554 1556 def close():
1555 1557 """Close the handle on this repository."""
1556 1558
1557 1559 def peer():
1558 1560 """Obtain an object conforming to the ``peer`` interface."""
1559 1561
1560 1562 def unfiltered():
1561 1563 """Obtain an unfiltered/raw view of this repo."""
1562 1564
1563 1565 def filtered(name, visibilityexceptions=None):
1564 1566 """Obtain a named view of this repository."""
1565 1567
1566 1568 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1567 1569
1568 1570 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1569 1571
1570 1572 manifestlog = interfaceutil.Attribute(
1571 1573 """An instance conforming to the ``imanifestlog`` interface.
1572 1574
1573 1575 Provides access to manifests for the repository.
1574 1576 """
1575 1577 )
1576 1578
1577 1579 dirstate = interfaceutil.Attribute("""Working directory state.""")
1578 1580
1579 1581 narrowpats = interfaceutil.Attribute(
1580 1582 """Matcher patterns for this repository's narrowspec."""
1581 1583 )
1582 1584
1583 1585 def narrowmatch(match=None, includeexact=False):
1584 1586 """Obtain a matcher for the narrowspec."""
1585 1587
1586 1588 def setnarrowpats(newincludes, newexcludes):
1587 1589 """Define the narrowspec for this repository."""
1588 1590
1589 1591 def __getitem__(changeid):
1590 1592 """Try to resolve a changectx."""
1591 1593
1592 1594 def __contains__(changeid):
1593 1595 """Whether a changeset exists."""
1594 1596
1595 1597 def __nonzero__():
1596 1598 """Always returns True."""
1597 1599 return True
1598 1600
1599 1601 __bool__ = __nonzero__
1600 1602
1601 1603 def __len__():
1602 1604 """Returns the number of changesets in the repo."""
1603 1605
1604 1606 def __iter__():
1605 1607 """Iterate over revisions in the changelog."""
1606 1608
1607 1609 def revs(expr, *args):
1608 1610 """Evaluate a revset.
1609 1611
1610 1612 Emits revisions.
1611 1613 """
1612 1614
1613 1615 def set(expr, *args):
1614 1616 """Evaluate a revset.
1615 1617
1616 1618 Emits changectx instances.
1617 1619 """
1618 1620
1619 1621 def anyrevs(specs, user=False, localalias=None):
1620 1622 """Find revisions matching one of the given revsets."""
1621 1623
1622 1624 def url():
1623 1625 """Returns a string representing the location of this repo."""
1624 1626
1625 1627 def hook(name, throw=False, **args):
1626 1628 """Call a hook."""
1627 1629
1628 1630 def tags():
1629 1631 """Return a mapping of tag to node."""
1630 1632
1631 1633 def tagtype(tagname):
1632 1634 """Return the type of a given tag."""
1633 1635
1634 1636 def tagslist():
1635 1637 """Return a list of tags ordered by revision."""
1636 1638
1637 1639 def nodetags(node):
1638 1640 """Return the tags associated with a node."""
1639 1641
1640 1642 def nodebookmarks(node):
1641 1643 """Return the list of bookmarks pointing to the specified node."""
1642 1644
1643 1645 def branchmap():
1644 1646 """Return a mapping of branch to heads in that branch."""
1645 1647
1646 1648 def revbranchcache():
1647 1649 pass
1648 1650
1649 1651 def branchtip(branchtip, ignoremissing=False):
1650 1652 """Return the tip node for a given branch."""
1651 1653
1652 1654 def lookup(key):
1653 1655 """Resolve the node for a revision."""
1654 1656
1655 1657 def lookupbranch(key):
1656 1658 """Look up the branch name of the given revision or branch name."""
1657 1659
1658 1660 def known(nodes):
1659 1661 """Determine whether a series of nodes is known.
1660 1662
1661 1663 Returns a list of bools.
1662 1664 """
1663 1665
1664 1666 def local():
1665 1667 """Whether the repository is local."""
1666 1668 return True
1667 1669
1668 1670 def publishing():
1669 1671 """Whether the repository is a publishing repository."""
1670 1672
1671 1673 def cancopy():
1672 1674 pass
1673 1675
1674 1676 def shared():
1675 1677 """The type of shared repository or None."""
1676 1678
1677 1679 def wjoin(f, *insidef):
1678 1680 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1679 1681
1680 1682 def setparents(p1, p2):
1681 1683 """Set the parent nodes of the working directory."""
1682 1684
1683 1685 def filectx(path, changeid=None, fileid=None):
1684 1686 """Obtain a filectx for the given file revision."""
1685 1687
1686 1688 def getcwd():
1687 1689 """Obtain the current working directory from the dirstate."""
1688 1690
1689 1691 def pathto(f, cwd=None):
1690 1692 """Obtain the relative path to a file."""
1691 1693
1692 1694 def adddatafilter(name, fltr):
1693 1695 pass
1694 1696
1695 1697 def wread(filename):
1696 1698 """Read a file from wvfs, using data filters."""
1697 1699
1698 1700 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1699 1701 """Write data to a file in the wvfs, using data filters."""
1700 1702
1701 1703 def wwritedata(filename, data):
1702 1704 """Resolve data for writing to the wvfs, using data filters."""
1703 1705
1704 1706 def currenttransaction():
1705 1707 """Obtain the current transaction instance or None."""
1706 1708
1707 1709 def transaction(desc, report=None):
1708 1710 """Open a new transaction to write to the repository."""
1709 1711
1710 1712 def undofiles():
1711 1713 """Returns a list of (vfs, path) for files to undo transactions."""
1712 1714
1713 1715 def recover():
1714 1716 """Roll back an interrupted transaction."""
1715 1717
1716 1718 def rollback(dryrun=False, force=False):
1717 1719 """Undo the last transaction.
1718 1720
1719 1721 DANGEROUS.
1720 1722 """
1721 1723
1722 1724 def updatecaches(tr=None, full=False):
1723 1725 """Warm repo caches."""
1724 1726
1725 1727 def invalidatecaches():
1726 1728 """Invalidate cached data due to the repository mutating."""
1727 1729
1728 1730 def invalidatevolatilesets():
1729 1731 pass
1730 1732
1731 1733 def invalidatedirstate():
1732 1734 """Invalidate the dirstate."""
1733 1735
1734 1736 def invalidate(clearfilecache=False):
1735 1737 pass
1736 1738
1737 1739 def invalidateall():
1738 1740 pass
1739 1741
1740 1742 def lock(wait=True):
1741 1743 """Lock the repository store and return a lock instance."""
1742 1744
1743 1745 def wlock(wait=True):
1744 1746 """Lock the non-store parts of the repository."""
1745 1747
1746 1748 def currentwlock():
1747 1749 """Return the wlock if it's held or None."""
1748 1750
1749 1751 def checkcommitpatterns(wctx, match, status, fail):
1750 1752 pass
1751 1753
1752 1754 def commit(
1753 1755 text=b'',
1754 1756 user=None,
1755 1757 date=None,
1756 1758 match=None,
1757 1759 force=False,
1758 1760 editor=False,
1759 1761 extra=None,
1760 1762 ):
1761 1763 """Add a new revision to the repository."""
1762 1764
1763 1765 def commitctx(ctx, error=False, origctx=None):
1764 1766 """Commit a commitctx instance to the repository."""
1765 1767
1766 1768 def destroying():
1767 1769 """Inform the repository that nodes are about to be destroyed."""
1768 1770
1769 1771 def destroyed():
1770 1772 """Inform the repository that nodes have been destroyed."""
1771 1773
1772 1774 def status(
1773 1775 node1=b'.',
1774 1776 node2=None,
1775 1777 match=None,
1776 1778 ignored=False,
1777 1779 clean=False,
1778 1780 unknown=False,
1779 1781 listsubrepos=False,
1780 1782 ):
1781 1783 """Convenience method to call repo[x].status()."""
1782 1784
1783 1785 def addpostdsstatus(ps):
1784 1786 pass
1785 1787
1786 1788 def postdsstatus():
1787 1789 pass
1788 1790
1789 1791 def clearpostdsstatus():
1790 1792 pass
1791 1793
1792 1794 def heads(start=None):
1793 1795 """Obtain list of nodes that are DAG heads."""
1794 1796
1795 1797 def branchheads(branch=None, start=None, closed=False):
1796 1798 pass
1797 1799
1798 1800 def branches(nodes):
1799 1801 pass
1800 1802
1801 1803 def between(pairs):
1802 1804 pass
1803 1805
1804 1806 def checkpush(pushop):
1805 1807 pass
1806 1808
1807 1809 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1808 1810
1809 1811 def pushkey(namespace, key, old, new):
1810 1812 pass
1811 1813
1812 1814 def listkeys(namespace):
1813 1815 pass
1814 1816
1815 1817 def debugwireargs(one, two, three=None, four=None, five=None):
1816 1818 pass
1817 1819
1818 1820 def savecommitmessage(text):
1819 1821 pass
1820 1822
1821 1823
1822 1824 class completelocalrepository(
1823 1825 ilocalrepositorymain, ilocalrepositoryfilestorage
1824 1826 ):
1825 1827 """Complete interface for a local repository."""
1826 1828
1827 1829
1828 1830 class iwireprotocolcommandcacher(interfaceutil.Interface):
1829 1831 """Represents a caching backend for wire protocol commands.
1830 1832
1831 1833 Wire protocol version 2 supports transparent caching of many commands.
1832 1834 To leverage this caching, servers can activate objects that cache
1833 1835 command responses. Objects handle both cache writing and reading.
1834 1836 This interface defines how that response caching mechanism works.
1835 1837
1836 1838 Wire protocol version 2 commands emit a series of objects that are
1837 1839 serialized and sent to the client. The caching layer exists between
1838 1840 the invocation of the command function and the sending of its output
1839 1841 objects to an output layer.
1840 1842
1841 1843 Instances of this interface represent a binding to a cache that
1842 1844 can serve a response (in place of calling a command function) and/or
1843 1845 write responses to a cache for subsequent use.
1844 1846
1845 1847 When a command request arrives, the following happens with regards
1846 1848 to this interface:
1847 1849
1848 1850 1. The server determines whether the command request is cacheable.
1849 1851 2. If it is, an instance of this interface is spawned.
1850 1852 3. The cacher is activated in a context manager (``__enter__`` is called).
1851 1853 4. A cache *key* for that request is derived. This will call the
1852 1854 instance's ``adjustcachekeystate()`` method so the derivation
1853 1855 can be influenced.
1854 1856 5. The cacher is informed of the derived cache key via a call to
1855 1857 ``setcachekey()``.
1856 1858 6. The cacher's ``lookup()`` method is called to test for presence of
1857 1859 the derived key in the cache.
1858 1860 7. If ``lookup()`` returns a hit, that cached result is used in place
1859 1861 of invoking the command function. ``__exit__`` is called and the instance
1860 1862 is discarded.
1861 1863 8. The command function is invoked.
1862 1864 9. ``onobject()`` is called for each object emitted by the command
1863 1865 function.
1864 1866 10. After the final object is seen, ``onfinished()`` is called.
1865 1867 11. ``__exit__`` is called to signal the end of use of the instance.
1866 1868
1867 1869 Cache *key* derivation can be influenced by the instance.
1868 1870
1869 1871 Cache keys are initially derived by a deterministic representation of
1870 1872 the command request. This includes the command name, arguments, protocol
1871 1873 version, etc. This initial key derivation is performed by CBOR-encoding a
1872 1874 data structure and feeding that output into a hasher.
1873 1875
1874 1876 Instances of this interface can influence this initial key derivation
1875 1877 via ``adjustcachekeystate()``.
1876 1878
1877 1879 The instance is informed of the derived cache key via a call to
1878 1880 ``setcachekey()``. The instance must store the key locally so it can
1879 1881 be consulted on subsequent operations that may require it.
1880 1882
1881 1883 When constructed, the instance has access to a callable that can be used
1882 1884 for encoding response objects. This callable receives as its single
1883 1885 argument an object emitted by a command function. It returns an iterable
1884 1886 of bytes chunks representing the encoded object. Unless the cacher is
1885 1887 caching native Python objects in memory or has a way of reconstructing
1886 1888 the original Python objects, implementations typically call this function
1887 1889 to produce bytes from the output objects and then store those bytes in
1888 1890 the cache. When it comes time to re-emit those bytes, they are wrapped
1889 1891 in a ``wireprototypes.encodedresponse`` instance to tell the output
1890 1892 layer that they are pre-encoded.
1891 1893
1892 1894 When receiving the objects emitted by the command function, instances
1893 1895 can choose what to do with those objects. The simplest thing to do is
1894 1896 re-emit the original objects. They will be forwarded to the output
1895 1897 layer and will be processed as if the cacher did not exist.
1896 1898
1897 1899 Implementations could also choose to not emit objects - instead locally
1898 1900 buffering objects or their encoded representation. They could then emit
1899 1901 a single "coalesced" object when ``onfinished()`` is called. In
1900 1902 this way, the implementation would function as a filtering layer of
1901 1903 sorts.
1902 1904
1903 1905 When caching objects, typically the encoded form of the object will
1904 1906 be stored. Keep in mind that if the original object is forwarded to
1905 1907 the output layer, it will need to be encoded there as well. For large
1906 1908 output, this redundant encoding could add overhead. Implementations
1907 1909 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1908 1910 instances to avoid this overhead.
1909 1911 """
1910 1912
1911 1913 def __enter__():
1912 1914 """Marks the instance as active.
1913 1915
1914 1916 Should return self.
1915 1917 """
1916 1918
1917 1919 def __exit__(exctype, excvalue, exctb):
1918 1920 """Called when cacher is no longer used.
1919 1921
1920 1922 This can be used by implementations to perform cleanup actions (e.g.
1921 1923 disconnecting network sockets, aborting a partially cached response.
1922 1924 """
1923 1925
1924 1926 def adjustcachekeystate(state):
1925 1927 """Influences cache key derivation by adjusting state to derive key.
1926 1928
1927 1929 A dict defining the state used to derive the cache key is passed.
1928 1930
1929 1931 Implementations can modify this dict to record additional state that
1930 1932 is wanted to influence key derivation.
1931 1933
1932 1934 Implementations are *highly* encouraged to not modify or delete
1933 1935 existing keys.
1934 1936 """
1935 1937
1936 1938 def setcachekey(key):
1937 1939 """Record the derived cache key for this request.
1938 1940
1939 1941 Instances may mutate the key for internal usage, as desired. e.g.
1940 1942 instances may wish to prepend the repo name, introduce path
1941 1943 components for filesystem or URL addressing, etc. Behavior is up to
1942 1944 the cache.
1943 1945
1944 1946 Returns a bool indicating if the request is cacheable by this
1945 1947 instance.
1946 1948 """
1947 1949
1948 1950 def lookup():
1949 1951 """Attempt to resolve an entry in the cache.
1950 1952
1951 1953 The instance is instructed to look for the cache key that it was
1952 1954 informed about via the call to ``setcachekey()``.
1953 1955
1954 1956 If there's no cache hit or the cacher doesn't wish to use the cached
1955 1957 entry, ``None`` should be returned.
1956 1958
1957 1959 Else, a dict defining the cached result should be returned. The
1958 1960 dict may have the following keys:
1959 1961
1960 1962 objs
1961 1963 An iterable of objects that should be sent to the client. That
1962 1964 iterable of objects is expected to be what the command function
1963 1965 would return if invoked or an equivalent representation thereof.
1964 1966 """
1965 1967
1966 1968 def onobject(obj):
1967 1969 """Called when a new object is emitted from the command function.
1968 1970
1969 1971 Receives as its argument the object that was emitted from the
1970 1972 command function.
1971 1973
1972 1974 This method returns an iterator of objects to forward to the output
1973 1975 layer. The easiest implementation is a generator that just
1974 1976 ``yield obj``.
1975 1977 """
1976 1978
1977 1979 def onfinished():
1978 1980 """Called after all objects have been emitted from the command function.
1979 1981
1980 1982 Implementations should return an iterator of objects to forward to
1981 1983 the output layer.
1982 1984
1983 1985 This method can be a generator.
1984 1986 """
@@ -1,2988 +1,2989 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 REVIDX_SIDEDATA,
59 59 )
60 60 from .thirdparty import attr
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 nodemap as nodemaputil,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88 REVLOGV0
89 89 REVLOGV1
90 90 REVLOGV2
91 91 FLAG_INLINE_DATA
92 92 FLAG_GENERALDELTA
93 93 REVLOG_DEFAULT_FLAGS
94 94 REVLOG_DEFAULT_FORMAT
95 95 REVLOG_DEFAULT_VERSION
96 96 REVLOGV1_FLAGS
97 97 REVLOGV2_FLAGS
98 98 REVIDX_ISCENSORED
99 99 REVIDX_ELLIPSIS
100 100 REVIDX_SIDEDATA
101 101 REVIDX_EXTSTORED
102 102 REVIDX_DEFAULT_FLAGS
103 103 REVIDX_FLAGS_ORDER
104 104 REVIDX_RAWTEXT_CHANGING_FLAGS
105 105
106 106 parsers = policy.importmod('parsers')
107 107 rustancestor = policy.importrust('ancestor')
108 108 rustdagop = policy.importrust('dagop')
109 109 rustrevlog = policy.importrust('revlog')
110 110
111 111 # Aliased for performance.
112 112 _zlibdecompress = zlib.decompress
113 113
114 114 # max size of revlog with inline data
115 115 _maxinline = 131072
116 116 _chunksize = 1048576
117 117
118 118 # Flag processors for REVIDX_ELLIPSIS.
119 119 def ellipsisreadprocessor(rl, text):
120 120 return text, False, {}
121 121
122 122
123 123 def ellipsiswriteprocessor(rl, text, sidedata):
124 124 return text, False
125 125
126 126
127 127 def ellipsisrawprocessor(rl, text):
128 128 return False
129 129
130 130
131 131 ellipsisprocessor = (
132 132 ellipsisreadprocessor,
133 133 ellipsiswriteprocessor,
134 134 ellipsisrawprocessor,
135 135 )
136 136
137 137
138 138 def getoffset(q):
139 139 return int(q >> 16)
140 140
141 141
142 142 def gettype(q):
143 143 return int(q & 0xFFFF)
144 144
145 145
146 146 def offset_type(offset, type):
147 147 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 148 raise ValueError(b'unknown revlog index flags')
149 149 return int(int(offset) << 16 | type)
150 150
151 151
152 152 def _verify_revision(rl, skipflags, state, node):
153 153 """Verify the integrity of the given revlog ``node`` while providing a hook
154 154 point for extensions to influence the operation."""
155 155 if skipflags:
156 156 state[b'skipread'].add(node)
157 157 else:
158 158 # Side-effect: read content and verify hash.
159 159 rl.revision(node)
160 160
161 161
162 162 @attr.s(slots=True, frozen=True)
163 163 class _revisioninfo(object):
164 164 """Information about a revision that allows building its fulltext
165 165 node: expected hash of the revision
166 166 p1, p2: parent revs of the revision
167 167 btext: built text cache consisting of a one-element list
168 168 cachedelta: (baserev, uncompressed_delta) or None
169 169 flags: flags associated to the revision storage
170 170
171 171 One of btext[0] or cachedelta must be set.
172 172 """
173 173
174 174 node = attr.ib()
175 175 p1 = attr.ib()
176 176 p2 = attr.ib()
177 177 btext = attr.ib()
178 178 textlen = attr.ib()
179 179 cachedelta = attr.ib()
180 180 flags = attr.ib()
181 181
182 182
183 183 @interfaceutil.implementer(repository.irevisiondelta)
184 184 @attr.s(slots=True)
185 185 class revlogrevisiondelta(object):
186 186 node = attr.ib()
187 187 p1node = attr.ib()
188 188 p2node = attr.ib()
189 189 basenode = attr.ib()
190 190 flags = attr.ib()
191 191 baserevisionsize = attr.ib()
192 192 revision = attr.ib()
193 193 delta = attr.ib()
194 194 linknode = attr.ib(default=None)
195 195
196 196
197 197 @interfaceutil.implementer(repository.iverifyproblem)
198 198 @attr.s(frozen=True)
199 199 class revlogproblem(object):
200 200 warning = attr.ib(default=None)
201 201 error = attr.ib(default=None)
202 202 node = attr.ib(default=None)
203 203
204 204
205 205 # index v0:
206 206 # 4 bytes: offset
207 207 # 4 bytes: compressed length
208 208 # 4 bytes: base rev
209 209 # 4 bytes: link rev
210 210 # 20 bytes: parent 1 nodeid
211 211 # 20 bytes: parent 2 nodeid
212 212 # 20 bytes: nodeid
213 213 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 214 indexformatv0_pack = indexformatv0.pack
215 215 indexformatv0_unpack = indexformatv0.unpack
216 216
217 217
218 218 class revlogoldindex(list):
219 219 @property
220 220 def nodemap(self):
221 221 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 222 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 223 return self._nodemap
224 224
225 225 @util.propertycache
226 226 def _nodemap(self):
227 227 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 228 for r in range(0, len(self)):
229 229 n = self[r][7]
230 230 nodemap[n] = r
231 231 return nodemap
232 232
233 233 def has_node(self, node):
234 234 """return True if the node exist in the index"""
235 235 return node in self._nodemap
236 236
237 237 def rev(self, node):
238 238 """return a revision for a node
239 239
240 240 If the node is unknown, raise a RevlogError"""
241 241 return self._nodemap[node]
242 242
243 243 def get_rev(self, node):
244 244 """return a revision for a node
245 245
246 246 If the node is unknown, return None"""
247 247 return self._nodemap.get(node)
248 248
249 249 def append(self, tup):
250 250 self._nodemap[tup[7]] = len(self)
251 251 super(revlogoldindex, self).append(tup)
252 252
253 253 def __delitem__(self, i):
254 254 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 255 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 256 for r in pycompat.xrange(i.start, len(self)):
257 257 del self._nodemap[self[r][7]]
258 258 super(revlogoldindex, self).__delitem__(i)
259 259
260 260 def clearcaches(self):
261 261 self.__dict__.pop('_nodemap', None)
262 262
263 263 def __getitem__(self, i):
264 264 if i == -1:
265 265 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 266 return list.__getitem__(self, i)
267 267
268 268
269 269 class revlogoldio(object):
270 270 def __init__(self):
271 271 self.size = indexformatv0.size
272 272
273 273 def parseindex(self, data, inline):
274 274 s = self.size
275 275 index = []
276 276 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 277 n = off = 0
278 278 l = len(data)
279 279 while off + s <= l:
280 280 cur = data[off : off + s]
281 281 off += s
282 282 e = indexformatv0_unpack(cur)
283 283 # transform to revlogv1 format
284 284 e2 = (
285 285 offset_type(e[0], 0),
286 286 e[1],
287 287 -1,
288 288 e[2],
289 289 e[3],
290 290 nodemap.get(e[4], nullrev),
291 291 nodemap.get(e[5], nullrev),
292 292 e[6],
293 293 )
294 294 index.append(e2)
295 295 nodemap[e[6]] = n
296 296 n += 1
297 297
298 298 index = revlogoldindex(index)
299 299 return index, None
300 300
301 301 def packentry(self, entry, node, version, rev):
302 302 if gettype(entry[0]):
303 303 raise error.RevlogError(
304 304 _(b'index entry flags need revlog version 1')
305 305 )
306 306 e2 = (
307 307 getoffset(entry[0]),
308 308 entry[1],
309 309 entry[3],
310 310 entry[4],
311 311 node(entry[5]),
312 312 node(entry[6]),
313 313 entry[7],
314 314 )
315 315 return indexformatv0_pack(*e2)
316 316
317 317
318 318 # index ng:
319 319 # 6 bytes: offset
320 320 # 2 bytes: flags
321 321 # 4 bytes: compressed length
322 322 # 4 bytes: uncompressed length
323 323 # 4 bytes: base rev
324 324 # 4 bytes: link rev
325 325 # 4 bytes: parent 1 rev
326 326 # 4 bytes: parent 2 rev
327 327 # 32 bytes: nodeid
328 328 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 329 indexformatng_pack = indexformatng.pack
330 330 versionformat = struct.Struct(b">I")
331 331 versionformat_pack = versionformat.pack
332 332 versionformat_unpack = versionformat.unpack
333 333
334 334 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 335 # signed integer)
336 336 _maxentrysize = 0x7FFFFFFF
337 337
338 338
339 339 class revlogio(object):
340 340 def __init__(self):
341 341 self.size = indexformatng.size
342 342
343 343 def parseindex(self, data, inline):
344 344 # call the C implementation to parse the index data
345 345 index, cache = parsers.parse_index2(data, inline)
346 346 return index, cache
347 347
348 348 def packentry(self, entry, node, version, rev):
349 349 p = indexformatng_pack(*entry)
350 350 if rev == 0:
351 351 p = versionformat_pack(version) + p[4:]
352 352 return p
353 353
354 354
355 355 class rustrevlogio(revlogio):
356 356 def parseindex(self, data, inline):
357 357 index, cache = super(rustrevlogio, self).parseindex(data, inline)
358 358 return rustrevlog.MixedIndex(index), cache
359 359
360 360
361 361 class revlog(object):
362 362 """
363 363 the underlying revision storage object
364 364
365 365 A revlog consists of two parts, an index and the revision data.
366 366
367 367 The index is a file with a fixed record size containing
368 368 information on each revision, including its nodeid (hash), the
369 369 nodeids of its parents, the position and offset of its data within
370 370 the data file, and the revision it's based on. Finally, each entry
371 371 contains a linkrev entry that can serve as a pointer to external
372 372 data.
373 373
374 374 The revision data itself is a linear collection of data chunks.
375 375 Each chunk represents a revision and is usually represented as a
376 376 delta against the previous chunk. To bound lookup time, runs of
377 377 deltas are limited to about 2 times the length of the original
378 378 version data. This makes retrieval of a version proportional to
379 379 its size, or O(1) relative to the number of revisions.
380 380
381 381 Both pieces of the revlog are written to in an append-only
382 382 fashion, which means we never need to rewrite a file to insert or
383 383 remove data, and can use some simple techniques to avoid the need
384 384 for locking while reading.
385 385
386 386 If checkambig, indexfile is opened with checkambig=True at
387 387 writing, to avoid file stat ambiguity.
388 388
389 389 If mmaplargeindex is True, and an mmapindexthreshold is set, the
390 390 index will be mmapped rather than read if it is larger than the
391 391 configured threshold.
392 392
393 393 If censorable is True, the revlog can have censored revisions.
394 394
395 395 If `upperboundcomp` is not None, this is the expected maximal gain from
396 396 compression for the data content.
397 397 """
398 398
399 399 _flagserrorclass = error.RevlogError
400 400
401 401 def __init__(
402 402 self,
403 403 opener,
404 404 indexfile,
405 405 datafile=None,
406 406 checkambig=False,
407 407 mmaplargeindex=False,
408 408 censorable=False,
409 409 upperboundcomp=None,
410 410 ):
411 411 """
412 412 create a revlog object
413 413
414 414 opener is a function that abstracts the file opening operation
415 415 and can be used to implement COW semantics or the like.
416 416
417 417 """
418 418 self.upperboundcomp = upperboundcomp
419 419 self.indexfile = indexfile
420 420 self.datafile = datafile or (indexfile[:-2] + b".d")
421 421 self.opener = opener
422 422 # When True, indexfile is opened with checkambig=True at writing, to
423 423 # avoid file stat ambiguity.
424 424 self._checkambig = checkambig
425 425 self._mmaplargeindex = mmaplargeindex
426 426 self._censorable = censorable
427 427 # 3-tuple of (node, rev, text) for a raw revision.
428 428 self._revisioncache = None
429 429 # Maps rev to chain base rev.
430 430 self._chainbasecache = util.lrucachedict(100)
431 431 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
432 432 self._chunkcache = (0, b'')
433 433 # How much data to read and cache into the raw revlog data cache.
434 434 self._chunkcachesize = 65536
435 435 self._maxchainlen = None
436 436 self._deltabothparents = True
437 437 self.index = None
438 438 # Mapping of partial identifiers to full nodes.
439 439 self._pcache = {}
440 440 # Mapping of revision integer to full node.
441 441 self._compengine = b'zlib'
442 442 self._compengineopts = {}
443 443 self._maxdeltachainspan = -1
444 444 self._withsparseread = False
445 445 self._sparserevlog = False
446 446 self._srdensitythreshold = 0.50
447 447 self._srmingapsize = 262144
448 448
449 449 # Make copy of flag processors so each revlog instance can support
450 450 # custom flags.
451 451 self._flagprocessors = dict(flagutil.flagprocessors)
452 452
453 453 # 2-tuple of file handles being used for active writing.
454 454 self._writinghandles = None
455 455
456 456 self._loadindex()
457 457
458 458 def _loadindex(self):
459 459 mmapindexthreshold = None
460 460 opts = self.opener.options
461 461
462 462 if b'revlogv2' in opts:
463 463 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
464 464 elif b'revlogv1' in opts:
465 465 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
466 466 if b'generaldelta' in opts:
467 467 newversionflags |= FLAG_GENERALDELTA
468 468 elif b'revlogv0' in self.opener.options:
469 469 newversionflags = REVLOGV0
470 470 else:
471 471 newversionflags = REVLOG_DEFAULT_VERSION
472 472
473 473 if b'chunkcachesize' in opts:
474 474 self._chunkcachesize = opts[b'chunkcachesize']
475 475 if b'maxchainlen' in opts:
476 476 self._maxchainlen = opts[b'maxchainlen']
477 477 if b'deltabothparents' in opts:
478 478 self._deltabothparents = opts[b'deltabothparents']
479 479 self._lazydelta = bool(opts.get(b'lazydelta', True))
480 480 self._lazydeltabase = False
481 481 if self._lazydelta:
482 482 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
483 483 if b'compengine' in opts:
484 484 self._compengine = opts[b'compengine']
485 485 if b'zlib.level' in opts:
486 486 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
487 487 if b'zstd.level' in opts:
488 488 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
489 489 if b'maxdeltachainspan' in opts:
490 490 self._maxdeltachainspan = opts[b'maxdeltachainspan']
491 491 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
492 492 mmapindexthreshold = opts[b'mmapindexthreshold']
493 493 self.hassidedata = bool(opts.get(b'side-data', False))
494 494 if self.hassidedata:
495 495 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
496 496 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
497 497 withsparseread = bool(opts.get(b'with-sparse-read', False))
498 498 # sparse-revlog forces sparse-read
499 499 self._withsparseread = self._sparserevlog or withsparseread
500 500 if b'sparse-read-density-threshold' in opts:
501 501 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
502 502 if b'sparse-read-min-gap-size' in opts:
503 503 self._srmingapsize = opts[b'sparse-read-min-gap-size']
504 504 if opts.get(b'enableellipsis'):
505 505 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
506 506
507 507 # revlog v0 doesn't have flag processors
508 508 for flag, processor in pycompat.iteritems(
509 509 opts.get(b'flagprocessors', {})
510 510 ):
511 511 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
512 512
513 513 if self._chunkcachesize <= 0:
514 514 raise error.RevlogError(
515 515 _(b'revlog chunk cache size %r is not greater than 0')
516 516 % self._chunkcachesize
517 517 )
518 518 elif self._chunkcachesize & (self._chunkcachesize - 1):
519 519 raise error.RevlogError(
520 520 _(b'revlog chunk cache size %r is not a power of 2')
521 521 % self._chunkcachesize
522 522 )
523 523
524 524 indexdata = b''
525 525 self._initempty = True
526 526 try:
527 527 with self._indexfp() as f:
528 528 if (
529 529 mmapindexthreshold is not None
530 530 and self.opener.fstat(f).st_size >= mmapindexthreshold
531 531 ):
532 532 # TODO: should .close() to release resources without
533 533 # relying on Python GC
534 534 indexdata = util.buffer(util.mmapread(f))
535 535 else:
536 536 indexdata = f.read()
537 537 if len(indexdata) > 0:
538 538 versionflags = versionformat_unpack(indexdata[:4])[0]
539 539 self._initempty = False
540 540 else:
541 541 versionflags = newversionflags
542 542 except IOError as inst:
543 543 if inst.errno != errno.ENOENT:
544 544 raise
545 545
546 546 versionflags = newversionflags
547 547
548 548 self.version = versionflags
549 549
550 550 flags = versionflags & ~0xFFFF
551 551 fmt = versionflags & 0xFFFF
552 552
553 553 if fmt == REVLOGV0:
554 554 if flags:
555 555 raise error.RevlogError(
556 556 _(b'unknown flags (%#04x) in version %d revlog %s')
557 557 % (flags >> 16, fmt, self.indexfile)
558 558 )
559 559
560 560 self._inline = False
561 561 self._generaldelta = False
562 562
563 563 elif fmt == REVLOGV1:
564 564 if flags & ~REVLOGV1_FLAGS:
565 565 raise error.RevlogError(
566 566 _(b'unknown flags (%#04x) in version %d revlog %s')
567 567 % (flags >> 16, fmt, self.indexfile)
568 568 )
569 569
570 570 self._inline = versionflags & FLAG_INLINE_DATA
571 571 self._generaldelta = versionflags & FLAG_GENERALDELTA
572 572
573 573 elif fmt == REVLOGV2:
574 574 if flags & ~REVLOGV2_FLAGS:
575 575 raise error.RevlogError(
576 576 _(b'unknown flags (%#04x) in version %d revlog %s')
577 577 % (flags >> 16, fmt, self.indexfile)
578 578 )
579 579
580 580 self._inline = versionflags & FLAG_INLINE_DATA
581 581 # generaldelta implied by version 2 revlogs.
582 582 self._generaldelta = True
583 583
584 584 else:
585 585 raise error.RevlogError(
586 586 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
587 587 )
588 588 # sparse-revlog can't be on without general-delta (issue6056)
589 589 if not self._generaldelta:
590 590 self._sparserevlog = False
591 591
592 592 self._storedeltachains = True
593 593
594 594 self._io = revlogio()
595 595 if self.version == REVLOGV0:
596 596 self._io = revlogoldio()
597 597 elif rustrevlog is not None and self.opener.options.get(b'rust.index'):
598 598 self._io = rustrevlogio()
599 599 try:
600 600 d = self._io.parseindex(indexdata, self._inline)
601 601 except (ValueError, IndexError):
602 602 raise error.RevlogError(
603 603 _(b"index %s is corrupted") % self.indexfile
604 604 )
605 605 self.index, self._chunkcache = d
606 606 if not self._chunkcache:
607 607 self._chunkclear()
608 608 # revnum -> (chain-length, sum-delta-length)
609 609 self._chaininfocache = {}
610 610 # revlog header -> revlog compressor
611 611 self._decompressors = {}
612 612
613 613 @util.propertycache
614 614 def _compressor(self):
615 615 engine = util.compengines[self._compengine]
616 616 return engine.revlogcompressor(self._compengineopts)
617 617
618 618 def _indexfp(self, mode=b'r'):
619 619 """file object for the revlog's index file"""
620 620 args = {'mode': mode}
621 621 if mode != b'r':
622 622 args['checkambig'] = self._checkambig
623 623 if mode == b'w':
624 624 args['atomictemp'] = True
625 625 return self.opener(self.indexfile, **args)
626 626
627 627 def _datafp(self, mode=b'r'):
628 628 """file object for the revlog's data file"""
629 629 return self.opener(self.datafile, mode=mode)
630 630
631 631 @contextlib.contextmanager
632 632 def _datareadfp(self, existingfp=None):
633 633 """file object suitable to read data"""
634 634 # Use explicit file handle, if given.
635 635 if existingfp is not None:
636 636 yield existingfp
637 637
638 638 # Use a file handle being actively used for writes, if available.
639 639 # There is some danger to doing this because reads will seek the
640 640 # file. However, _writeentry() performs a SEEK_END before all writes,
641 641 # so we should be safe.
642 642 elif self._writinghandles:
643 643 if self._inline:
644 644 yield self._writinghandles[0]
645 645 else:
646 646 yield self._writinghandles[1]
647 647
648 648 # Otherwise open a new file handle.
649 649 else:
650 650 if self._inline:
651 651 func = self._indexfp
652 652 else:
653 653 func = self._datafp
654 654 with func() as fp:
655 655 yield fp
656 656
657 657 def tiprev(self):
658 658 return len(self.index) - 1
659 659
660 660 def tip(self):
661 661 return self.node(self.tiprev())
662 662
663 663 def __contains__(self, rev):
664 664 return 0 <= rev < len(self)
665 665
666 666 def __len__(self):
667 667 return len(self.index)
668 668
669 669 def __iter__(self):
670 670 return iter(pycompat.xrange(len(self)))
671 671
672 672 def revs(self, start=0, stop=None):
673 673 """iterate over all rev in this revlog (from start to stop)"""
674 674 return storageutil.iterrevs(len(self), start=start, stop=stop)
675 675
676 676 @property
677 677 def nodemap(self):
678 678 msg = (
679 679 b"revlog.nodemap is deprecated, "
680 680 b"use revlog.index.[has_node|rev|get_rev]"
681 681 )
682 682 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
683 683 return self.index.nodemap
684 684
685 685 @property
686 686 def _nodecache(self):
687 687 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
688 688 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
689 689 return self.index.nodemap
690 690
691 691 def hasnode(self, node):
692 692 try:
693 693 self.rev(node)
694 694 return True
695 695 except KeyError:
696 696 return False
697 697
698 698 def candelta(self, baserev, rev):
699 699 """whether two revisions (baserev, rev) can be delta-ed or not"""
700 700 # Disable delta if either rev requires a content-changing flag
701 701 # processor (ex. LFS). This is because such flag processor can alter
702 702 # the rawtext content that the delta will be based on, and two clients
703 703 # could have a same revlog node with different flags (i.e. different
704 704 # rawtext contents) and the delta could be incompatible.
705 705 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
706 706 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
707 707 ):
708 708 return False
709 709 return True
710 710
711 711 def clearcaches(self):
712 712 self._revisioncache = None
713 713 self._chainbasecache.clear()
714 714 self._chunkcache = (0, b'')
715 715 self._pcache = {}
716 716 self.index.clearcaches()
717 717
718 718 def rev(self, node):
719 719 try:
720 720 return self.index.rev(node)
721 721 except TypeError:
722 722 raise
723 723 except error.RevlogError:
724 724 # parsers.c radix tree lookup failed
725 725 if node == wdirid or node in wdirfilenodeids:
726 726 raise error.WdirUnsupported
727 727 raise error.LookupError(node, self.indexfile, _(b'no node'))
728 728
729 729 # Accessors for index entries.
730 730
731 731 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
732 732 # are flags.
733 733 def start(self, rev):
734 734 return int(self.index[rev][0] >> 16)
735 735
736 736 def flags(self, rev):
737 737 return self.index[rev][0] & 0xFFFF
738 738
739 739 def length(self, rev):
740 740 return self.index[rev][1]
741 741
742 742 def rawsize(self, rev):
743 743 """return the length of the uncompressed text for a given revision"""
744 744 l = self.index[rev][2]
745 745 if l >= 0:
746 746 return l
747 747
748 748 t = self.rawdata(rev)
749 749 return len(t)
750 750
751 751 def size(self, rev):
752 752 """length of non-raw text (processed by a "read" flag processor)"""
753 753 # fast path: if no "read" flag processor could change the content,
754 754 # size is rawsize. note: ELLIPSIS is known to not change the content.
755 755 flags = self.flags(rev)
756 756 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
757 757 return self.rawsize(rev)
758 758
759 759 return len(self.revision(rev, raw=False))
760 760
761 761 def chainbase(self, rev):
762 762 base = self._chainbasecache.get(rev)
763 763 if base is not None:
764 764 return base
765 765
766 766 index = self.index
767 767 iterrev = rev
768 768 base = index[iterrev][3]
769 769 while base != iterrev:
770 770 iterrev = base
771 771 base = index[iterrev][3]
772 772
773 773 self._chainbasecache[rev] = base
774 774 return base
775 775
776 776 def linkrev(self, rev):
777 777 return self.index[rev][4]
778 778
779 779 def parentrevs(self, rev):
780 780 try:
781 781 entry = self.index[rev]
782 782 except IndexError:
783 783 if rev == wdirrev:
784 784 raise error.WdirUnsupported
785 785 raise
786 786
787 787 return entry[5], entry[6]
788 788
789 789 # fast parentrevs(rev) where rev isn't filtered
790 790 _uncheckedparentrevs = parentrevs
791 791
792 792 def node(self, rev):
793 793 try:
794 794 return self.index[rev][7]
795 795 except IndexError:
796 796 if rev == wdirrev:
797 797 raise error.WdirUnsupported
798 798 raise
799 799
800 800 # Derived from index values.
801 801
802 802 def end(self, rev):
803 803 return self.start(rev) + self.length(rev)
804 804
805 805 def parents(self, node):
806 806 i = self.index
807 807 d = i[self.rev(node)]
808 808 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
809 809
810 810 def chainlen(self, rev):
811 811 return self._chaininfo(rev)[0]
812 812
813 813 def _chaininfo(self, rev):
814 814 chaininfocache = self._chaininfocache
815 815 if rev in chaininfocache:
816 816 return chaininfocache[rev]
817 817 index = self.index
818 818 generaldelta = self._generaldelta
819 819 iterrev = rev
820 820 e = index[iterrev]
821 821 clen = 0
822 822 compresseddeltalen = 0
823 823 while iterrev != e[3]:
824 824 clen += 1
825 825 compresseddeltalen += e[1]
826 826 if generaldelta:
827 827 iterrev = e[3]
828 828 else:
829 829 iterrev -= 1
830 830 if iterrev in chaininfocache:
831 831 t = chaininfocache[iterrev]
832 832 clen += t[0]
833 833 compresseddeltalen += t[1]
834 834 break
835 835 e = index[iterrev]
836 836 else:
837 837 # Add text length of base since decompressing that also takes
838 838 # work. For cache hits the length is already included.
839 839 compresseddeltalen += e[1]
840 840 r = (clen, compresseddeltalen)
841 841 chaininfocache[rev] = r
842 842 return r
843 843
844 844 def _deltachain(self, rev, stoprev=None):
845 845 """Obtain the delta chain for a revision.
846 846
847 847 ``stoprev`` specifies a revision to stop at. If not specified, we
848 848 stop at the base of the chain.
849 849
850 850 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
851 851 revs in ascending order and ``stopped`` is a bool indicating whether
852 852 ``stoprev`` was hit.
853 853 """
854 854 # Try C implementation.
855 855 try:
856 856 return self.index.deltachain(rev, stoprev, self._generaldelta)
857 857 except AttributeError:
858 858 pass
859 859
860 860 chain = []
861 861
862 862 # Alias to prevent attribute lookup in tight loop.
863 863 index = self.index
864 864 generaldelta = self._generaldelta
865 865
866 866 iterrev = rev
867 867 e = index[iterrev]
868 868 while iterrev != e[3] and iterrev != stoprev:
869 869 chain.append(iterrev)
870 870 if generaldelta:
871 871 iterrev = e[3]
872 872 else:
873 873 iterrev -= 1
874 874 e = index[iterrev]
875 875
876 876 if iterrev == stoprev:
877 877 stopped = True
878 878 else:
879 879 chain.append(iterrev)
880 880 stopped = False
881 881
882 882 chain.reverse()
883 883 return chain, stopped
884 884
885 885 def ancestors(self, revs, stoprev=0, inclusive=False):
886 886 """Generate the ancestors of 'revs' in reverse revision order.
887 887 Does not generate revs lower than stoprev.
888 888
889 889 See the documentation for ancestor.lazyancestors for more details."""
890 890
891 891 # first, make sure start revisions aren't filtered
892 892 revs = list(revs)
893 893 checkrev = self.node
894 894 for r in revs:
895 895 checkrev(r)
896 896 # and we're sure ancestors aren't filtered as well
897 897
898 898 if rustancestor is not None:
899 899 lazyancestors = rustancestor.LazyAncestors
900 900 arg = self.index
901 901 elif util.safehasattr(parsers, b'rustlazyancestors'):
902 902 lazyancestors = ancestor.rustlazyancestors
903 903 arg = self.index
904 904 else:
905 905 lazyancestors = ancestor.lazyancestors
906 906 arg = self._uncheckedparentrevs
907 907 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
908 908
909 909 def descendants(self, revs):
910 910 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
911 911
912 912 def findcommonmissing(self, common=None, heads=None):
913 913 """Return a tuple of the ancestors of common and the ancestors of heads
914 914 that are not ancestors of common. In revset terminology, we return the
915 915 tuple:
916 916
917 917 ::common, (::heads) - (::common)
918 918
919 919 The list is sorted by revision number, meaning it is
920 920 topologically sorted.
921 921
922 922 'heads' and 'common' are both lists of node IDs. If heads is
923 923 not supplied, uses all of the revlog's heads. If common is not
924 924 supplied, uses nullid."""
925 925 if common is None:
926 926 common = [nullid]
927 927 if heads is None:
928 928 heads = self.heads()
929 929
930 930 common = [self.rev(n) for n in common]
931 931 heads = [self.rev(n) for n in heads]
932 932
933 933 # we want the ancestors, but inclusive
934 934 class lazyset(object):
935 935 def __init__(self, lazyvalues):
936 936 self.addedvalues = set()
937 937 self.lazyvalues = lazyvalues
938 938
939 939 def __contains__(self, value):
940 940 return value in self.addedvalues or value in self.lazyvalues
941 941
942 942 def __iter__(self):
943 943 added = self.addedvalues
944 944 for r in added:
945 945 yield r
946 946 for r in self.lazyvalues:
947 947 if not r in added:
948 948 yield r
949 949
950 950 def add(self, value):
951 951 self.addedvalues.add(value)
952 952
953 953 def update(self, values):
954 954 self.addedvalues.update(values)
955 955
956 956 has = lazyset(self.ancestors(common))
957 957 has.add(nullrev)
958 958 has.update(common)
959 959
960 960 # take all ancestors from heads that aren't in has
961 961 missing = set()
962 962 visit = collections.deque(r for r in heads if r not in has)
963 963 while visit:
964 964 r = visit.popleft()
965 965 if r in missing:
966 966 continue
967 967 else:
968 968 missing.add(r)
969 969 for p in self.parentrevs(r):
970 970 if p not in has:
971 971 visit.append(p)
972 972 missing = list(missing)
973 973 missing.sort()
974 974 return has, [self.node(miss) for miss in missing]
975 975
976 976 def incrementalmissingrevs(self, common=None):
977 977 """Return an object that can be used to incrementally compute the
978 978 revision numbers of the ancestors of arbitrary sets that are not
979 979 ancestors of common. This is an ancestor.incrementalmissingancestors
980 980 object.
981 981
982 982 'common' is a list of revision numbers. If common is not supplied, uses
983 983 nullrev.
984 984 """
985 985 if common is None:
986 986 common = [nullrev]
987 987
988 988 if rustancestor is not None:
989 989 return rustancestor.MissingAncestors(self.index, common)
990 990 return ancestor.incrementalmissingancestors(self.parentrevs, common)
991 991
992 992 def findmissingrevs(self, common=None, heads=None):
993 993 """Return the revision numbers of the ancestors of heads that
994 994 are not ancestors of common.
995 995
996 996 More specifically, return a list of revision numbers corresponding to
997 997 nodes N such that every N satisfies the following constraints:
998 998
999 999 1. N is an ancestor of some node in 'heads'
1000 1000 2. N is not an ancestor of any node in 'common'
1001 1001
1002 1002 The list is sorted by revision number, meaning it is
1003 1003 topologically sorted.
1004 1004
1005 1005 'heads' and 'common' are both lists of revision numbers. If heads is
1006 1006 not supplied, uses all of the revlog's heads. If common is not
1007 1007 supplied, uses nullid."""
1008 1008 if common is None:
1009 1009 common = [nullrev]
1010 1010 if heads is None:
1011 1011 heads = self.headrevs()
1012 1012
1013 1013 inc = self.incrementalmissingrevs(common=common)
1014 1014 return inc.missingancestors(heads)
1015 1015
1016 1016 def findmissing(self, common=None, heads=None):
1017 1017 """Return the ancestors of heads that are not ancestors of common.
1018 1018
1019 1019 More specifically, return a list of nodes N such that every N
1020 1020 satisfies the following constraints:
1021 1021
1022 1022 1. N is an ancestor of some node in 'heads'
1023 1023 2. N is not an ancestor of any node in 'common'
1024 1024
1025 1025 The list is sorted by revision number, meaning it is
1026 1026 topologically sorted.
1027 1027
1028 1028 'heads' and 'common' are both lists of node IDs. If heads is
1029 1029 not supplied, uses all of the revlog's heads. If common is not
1030 1030 supplied, uses nullid."""
1031 1031 if common is None:
1032 1032 common = [nullid]
1033 1033 if heads is None:
1034 1034 heads = self.heads()
1035 1035
1036 1036 common = [self.rev(n) for n in common]
1037 1037 heads = [self.rev(n) for n in heads]
1038 1038
1039 1039 inc = self.incrementalmissingrevs(common=common)
1040 1040 return [self.node(r) for r in inc.missingancestors(heads)]
1041 1041
1042 1042 def nodesbetween(self, roots=None, heads=None):
1043 1043 """Return a topological path from 'roots' to 'heads'.
1044 1044
1045 1045 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1046 1046 topologically sorted list of all nodes N that satisfy both of
1047 1047 these constraints:
1048 1048
1049 1049 1. N is a descendant of some node in 'roots'
1050 1050 2. N is an ancestor of some node in 'heads'
1051 1051
1052 1052 Every node is considered to be both a descendant and an ancestor
1053 1053 of itself, so every reachable node in 'roots' and 'heads' will be
1054 1054 included in 'nodes'.
1055 1055
1056 1056 'outroots' is the list of reachable nodes in 'roots', i.e., the
1057 1057 subset of 'roots' that is returned in 'nodes'. Likewise,
1058 1058 'outheads' is the subset of 'heads' that is also in 'nodes'.
1059 1059
1060 1060 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1061 1061 unspecified, uses nullid as the only root. If 'heads' is
1062 1062 unspecified, uses list of all of the revlog's heads."""
1063 1063 nonodes = ([], [], [])
1064 1064 if roots is not None:
1065 1065 roots = list(roots)
1066 1066 if not roots:
1067 1067 return nonodes
1068 1068 lowestrev = min([self.rev(n) for n in roots])
1069 1069 else:
1070 1070 roots = [nullid] # Everybody's a descendant of nullid
1071 1071 lowestrev = nullrev
1072 1072 if (lowestrev == nullrev) and (heads is None):
1073 1073 # We want _all_ the nodes!
1074 1074 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1075 1075 if heads is None:
1076 1076 # All nodes are ancestors, so the latest ancestor is the last
1077 1077 # node.
1078 1078 highestrev = len(self) - 1
1079 1079 # Set ancestors to None to signal that every node is an ancestor.
1080 1080 ancestors = None
1081 1081 # Set heads to an empty dictionary for later discovery of heads
1082 1082 heads = {}
1083 1083 else:
1084 1084 heads = list(heads)
1085 1085 if not heads:
1086 1086 return nonodes
1087 1087 ancestors = set()
1088 1088 # Turn heads into a dictionary so we can remove 'fake' heads.
1089 1089 # Also, later we will be using it to filter out the heads we can't
1090 1090 # find from roots.
1091 1091 heads = dict.fromkeys(heads, False)
1092 1092 # Start at the top and keep marking parents until we're done.
1093 1093 nodestotag = set(heads)
1094 1094 # Remember where the top was so we can use it as a limit later.
1095 1095 highestrev = max([self.rev(n) for n in nodestotag])
1096 1096 while nodestotag:
1097 1097 # grab a node to tag
1098 1098 n = nodestotag.pop()
1099 1099 # Never tag nullid
1100 1100 if n == nullid:
1101 1101 continue
1102 1102 # A node's revision number represents its place in a
1103 1103 # topologically sorted list of nodes.
1104 1104 r = self.rev(n)
1105 1105 if r >= lowestrev:
1106 1106 if n not in ancestors:
1107 1107 # If we are possibly a descendant of one of the roots
1108 1108 # and we haven't already been marked as an ancestor
1109 1109 ancestors.add(n) # Mark as ancestor
1110 1110 # Add non-nullid parents to list of nodes to tag.
1111 1111 nodestotag.update(
1112 1112 [p for p in self.parents(n) if p != nullid]
1113 1113 )
1114 1114 elif n in heads: # We've seen it before, is it a fake head?
1115 1115 # So it is, real heads should not be the ancestors of
1116 1116 # any other heads.
1117 1117 heads.pop(n)
1118 1118 if not ancestors:
1119 1119 return nonodes
1120 1120 # Now that we have our set of ancestors, we want to remove any
1121 1121 # roots that are not ancestors.
1122 1122
1123 1123 # If one of the roots was nullid, everything is included anyway.
1124 1124 if lowestrev > nullrev:
1125 1125 # But, since we weren't, let's recompute the lowest rev to not
1126 1126 # include roots that aren't ancestors.
1127 1127
1128 1128 # Filter out roots that aren't ancestors of heads
1129 1129 roots = [root for root in roots if root in ancestors]
1130 1130 # Recompute the lowest revision
1131 1131 if roots:
1132 1132 lowestrev = min([self.rev(root) for root in roots])
1133 1133 else:
1134 1134 # No more roots? Return empty list
1135 1135 return nonodes
1136 1136 else:
1137 1137 # We are descending from nullid, and don't need to care about
1138 1138 # any other roots.
1139 1139 lowestrev = nullrev
1140 1140 roots = [nullid]
1141 1141 # Transform our roots list into a set.
1142 1142 descendants = set(roots)
1143 1143 # Also, keep the original roots so we can filter out roots that aren't
1144 1144 # 'real' roots (i.e. are descended from other roots).
1145 1145 roots = descendants.copy()
1146 1146 # Our topologically sorted list of output nodes.
1147 1147 orderedout = []
1148 1148 # Don't start at nullid since we don't want nullid in our output list,
1149 1149 # and if nullid shows up in descendants, empty parents will look like
1150 1150 # they're descendants.
1151 1151 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1152 1152 n = self.node(r)
1153 1153 isdescendant = False
1154 1154 if lowestrev == nullrev: # Everybody is a descendant of nullid
1155 1155 isdescendant = True
1156 1156 elif n in descendants:
1157 1157 # n is already a descendant
1158 1158 isdescendant = True
1159 1159 # This check only needs to be done here because all the roots
1160 1160 # will start being marked is descendants before the loop.
1161 1161 if n in roots:
1162 1162 # If n was a root, check if it's a 'real' root.
1163 1163 p = tuple(self.parents(n))
1164 1164 # If any of its parents are descendants, it's not a root.
1165 1165 if (p[0] in descendants) or (p[1] in descendants):
1166 1166 roots.remove(n)
1167 1167 else:
1168 1168 p = tuple(self.parents(n))
1169 1169 # A node is a descendant if either of its parents are
1170 1170 # descendants. (We seeded the dependents list with the roots
1171 1171 # up there, remember?)
1172 1172 if (p[0] in descendants) or (p[1] in descendants):
1173 1173 descendants.add(n)
1174 1174 isdescendant = True
1175 1175 if isdescendant and ((ancestors is None) or (n in ancestors)):
1176 1176 # Only include nodes that are both descendants and ancestors.
1177 1177 orderedout.append(n)
1178 1178 if (ancestors is not None) and (n in heads):
1179 1179 # We're trying to figure out which heads are reachable
1180 1180 # from roots.
1181 1181 # Mark this head as having been reached
1182 1182 heads[n] = True
1183 1183 elif ancestors is None:
1184 1184 # Otherwise, we're trying to discover the heads.
1185 1185 # Assume this is a head because if it isn't, the next step
1186 1186 # will eventually remove it.
1187 1187 heads[n] = True
1188 1188 # But, obviously its parents aren't.
1189 1189 for p in self.parents(n):
1190 1190 heads.pop(p, None)
1191 1191 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1192 1192 roots = list(roots)
1193 1193 assert orderedout
1194 1194 assert roots
1195 1195 assert heads
1196 1196 return (orderedout, roots, heads)
1197 1197
1198 1198 def headrevs(self, revs=None):
1199 1199 if revs is None:
1200 1200 try:
1201 1201 return self.index.headrevs()
1202 1202 except AttributeError:
1203 1203 return self._headrevs()
1204 1204 if rustdagop is not None:
1205 1205 return rustdagop.headrevs(self.index, revs)
1206 1206 return dagop.headrevs(revs, self._uncheckedparentrevs)
1207 1207
1208 1208 def computephases(self, roots):
1209 1209 return self.index.computephasesmapsets(roots)
1210 1210
1211 1211 def _headrevs(self):
1212 1212 count = len(self)
1213 1213 if not count:
1214 1214 return [nullrev]
1215 1215 # we won't iter over filtered rev so nobody is a head at start
1216 1216 ishead = [0] * (count + 1)
1217 1217 index = self.index
1218 1218 for r in self:
1219 1219 ishead[r] = 1 # I may be an head
1220 1220 e = index[r]
1221 1221 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1222 1222 return [r for r, val in enumerate(ishead) if val]
1223 1223
1224 1224 def heads(self, start=None, stop=None):
1225 1225 """return the list of all nodes that have no children
1226 1226
1227 1227 if start is specified, only heads that are descendants of
1228 1228 start will be returned
1229 1229 if stop is specified, it will consider all the revs from stop
1230 1230 as if they had no children
1231 1231 """
1232 1232 if start is None and stop is None:
1233 1233 if not len(self):
1234 1234 return [nullid]
1235 1235 return [self.node(r) for r in self.headrevs()]
1236 1236
1237 1237 if start is None:
1238 1238 start = nullrev
1239 1239 else:
1240 1240 start = self.rev(start)
1241 1241
1242 1242 stoprevs = set(self.rev(n) for n in stop or [])
1243 1243
1244 1244 revs = dagop.headrevssubset(
1245 1245 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1246 1246 )
1247 1247
1248 1248 return [self.node(rev) for rev in revs]
1249 1249
1250 1250 def children(self, node):
1251 1251 """find the children of a given node"""
1252 1252 c = []
1253 1253 p = self.rev(node)
1254 1254 for r in self.revs(start=p + 1):
1255 1255 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1256 1256 if prevs:
1257 1257 for pr in prevs:
1258 1258 if pr == p:
1259 1259 c.append(self.node(r))
1260 1260 elif p == nullrev:
1261 1261 c.append(self.node(r))
1262 1262 return c
1263 1263
1264 1264 def commonancestorsheads(self, a, b):
1265 1265 """calculate all the heads of the common ancestors of nodes a and b"""
1266 1266 a, b = self.rev(a), self.rev(b)
1267 1267 ancs = self._commonancestorsheads(a, b)
1268 1268 return pycompat.maplist(self.node, ancs)
1269 1269
1270 1270 def _commonancestorsheads(self, *revs):
1271 1271 """calculate all the heads of the common ancestors of revs"""
1272 1272 try:
1273 1273 ancs = self.index.commonancestorsheads(*revs)
1274 1274 except (AttributeError, OverflowError): # C implementation failed
1275 1275 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1276 1276 return ancs
1277 1277
1278 1278 def isancestor(self, a, b):
1279 1279 """return True if node a is an ancestor of node b
1280 1280
1281 1281 A revision is considered an ancestor of itself."""
1282 1282 a, b = self.rev(a), self.rev(b)
1283 1283 return self.isancestorrev(a, b)
1284 1284
1285 1285 def isancestorrev(self, a, b):
1286 1286 """return True if revision a is an ancestor of revision b
1287 1287
1288 1288 A revision is considered an ancestor of itself.
1289 1289
1290 1290 The implementation of this is trivial but the use of
1291 1291 reachableroots is not."""
1292 1292 if a == nullrev:
1293 1293 return True
1294 1294 elif a == b:
1295 1295 return True
1296 1296 elif a > b:
1297 1297 return False
1298 1298 return bool(self.reachableroots(a, [b], [a], includepath=False))
1299 1299
1300 1300 def reachableroots(self, minroot, heads, roots, includepath=False):
1301 1301 """return (heads(::(<roots> and <roots>::<heads>)))
1302 1302
1303 1303 If includepath is True, return (<roots>::<heads>)."""
1304 1304 try:
1305 1305 return self.index.reachableroots2(
1306 1306 minroot, heads, roots, includepath
1307 1307 )
1308 1308 except AttributeError:
1309 1309 return dagop._reachablerootspure(
1310 1310 self.parentrevs, minroot, roots, heads, includepath
1311 1311 )
1312 1312
1313 1313 def ancestor(self, a, b):
1314 1314 """calculate the "best" common ancestor of nodes a and b"""
1315 1315
1316 1316 a, b = self.rev(a), self.rev(b)
1317 1317 try:
1318 1318 ancs = self.index.ancestors(a, b)
1319 1319 except (AttributeError, OverflowError):
1320 1320 ancs = ancestor.ancestors(self.parentrevs, a, b)
1321 1321 if ancs:
1322 1322 # choose a consistent winner when there's a tie
1323 1323 return min(map(self.node, ancs))
1324 1324 return nullid
1325 1325
1326 1326 def _match(self, id):
1327 1327 if isinstance(id, int):
1328 1328 # rev
1329 1329 return self.node(id)
1330 1330 if len(id) == 20:
1331 1331 # possibly a binary node
1332 1332 # odds of a binary node being all hex in ASCII are 1 in 10**25
1333 1333 try:
1334 1334 node = id
1335 1335 self.rev(node) # quick search the index
1336 1336 return node
1337 1337 except error.LookupError:
1338 1338 pass # may be partial hex id
1339 1339 try:
1340 1340 # str(rev)
1341 1341 rev = int(id)
1342 1342 if b"%d" % rev != id:
1343 1343 raise ValueError
1344 1344 if rev < 0:
1345 1345 rev = len(self) + rev
1346 1346 if rev < 0 or rev >= len(self):
1347 1347 raise ValueError
1348 1348 return self.node(rev)
1349 1349 except (ValueError, OverflowError):
1350 1350 pass
1351 1351 if len(id) == 40:
1352 1352 try:
1353 1353 # a full hex nodeid?
1354 1354 node = bin(id)
1355 1355 self.rev(node)
1356 1356 return node
1357 1357 except (TypeError, error.LookupError):
1358 1358 pass
1359 1359
1360 1360 def _partialmatch(self, id):
1361 1361 # we don't care wdirfilenodeids as they should be always full hash
1362 1362 maybewdir = wdirhex.startswith(id)
1363 1363 try:
1364 1364 partial = self.index.partialmatch(id)
1365 1365 if partial and self.hasnode(partial):
1366 1366 if maybewdir:
1367 1367 # single 'ff...' match in radix tree, ambiguous with wdir
1368 1368 raise error.RevlogError
1369 1369 return partial
1370 1370 if maybewdir:
1371 1371 # no 'ff...' match in radix tree, wdir identified
1372 1372 raise error.WdirUnsupported
1373 1373 return None
1374 1374 except error.RevlogError:
1375 1375 # parsers.c radix tree lookup gave multiple matches
1376 1376 # fast path: for unfiltered changelog, radix tree is accurate
1377 1377 if not getattr(self, 'filteredrevs', None):
1378 1378 raise error.AmbiguousPrefixLookupError(
1379 1379 id, self.indexfile, _(b'ambiguous identifier')
1380 1380 )
1381 1381 # fall through to slow path that filters hidden revisions
1382 1382 except (AttributeError, ValueError):
1383 1383 # we are pure python, or key was too short to search radix tree
1384 1384 pass
1385 1385
1386 1386 if id in self._pcache:
1387 1387 return self._pcache[id]
1388 1388
1389 1389 if len(id) <= 40:
1390 1390 try:
1391 1391 # hex(node)[:...]
1392 1392 l = len(id) // 2 # grab an even number of digits
1393 1393 prefix = bin(id[: l * 2])
1394 1394 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1395 1395 nl = [
1396 1396 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1397 1397 ]
1398 1398 if nullhex.startswith(id):
1399 1399 nl.append(nullid)
1400 1400 if len(nl) > 0:
1401 1401 if len(nl) == 1 and not maybewdir:
1402 1402 self._pcache[id] = nl[0]
1403 1403 return nl[0]
1404 1404 raise error.AmbiguousPrefixLookupError(
1405 1405 id, self.indexfile, _(b'ambiguous identifier')
1406 1406 )
1407 1407 if maybewdir:
1408 1408 raise error.WdirUnsupported
1409 1409 return None
1410 1410 except TypeError:
1411 1411 pass
1412 1412
1413 1413 def lookup(self, id):
1414 1414 """locate a node based on:
1415 1415 - revision number or str(revision number)
1416 1416 - nodeid or subset of hex nodeid
1417 1417 """
1418 1418 n = self._match(id)
1419 1419 if n is not None:
1420 1420 return n
1421 1421 n = self._partialmatch(id)
1422 1422 if n:
1423 1423 return n
1424 1424
1425 1425 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1426 1426
1427 1427 def shortest(self, node, minlength=1):
1428 1428 """Find the shortest unambiguous prefix that matches node."""
1429 1429
1430 1430 def isvalid(prefix):
1431 1431 try:
1432 1432 matchednode = self._partialmatch(prefix)
1433 1433 except error.AmbiguousPrefixLookupError:
1434 1434 return False
1435 1435 except error.WdirUnsupported:
1436 1436 # single 'ff...' match
1437 1437 return True
1438 1438 if matchednode is None:
1439 1439 raise error.LookupError(node, self.indexfile, _(b'no node'))
1440 1440 return True
1441 1441
1442 1442 def maybewdir(prefix):
1443 1443 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1444 1444
1445 1445 hexnode = hex(node)
1446 1446
1447 1447 def disambiguate(hexnode, minlength):
1448 1448 """Disambiguate against wdirid."""
1449 1449 for length in range(minlength, 41):
1450 1450 prefix = hexnode[:length]
1451 1451 if not maybewdir(prefix):
1452 1452 return prefix
1453 1453
1454 1454 if not getattr(self, 'filteredrevs', None):
1455 1455 try:
1456 1456 length = max(self.index.shortest(node), minlength)
1457 1457 return disambiguate(hexnode, length)
1458 1458 except error.RevlogError:
1459 1459 if node != wdirid:
1460 1460 raise error.LookupError(node, self.indexfile, _(b'no node'))
1461 1461 except AttributeError:
1462 1462 # Fall through to pure code
1463 1463 pass
1464 1464
1465 1465 if node == wdirid:
1466 1466 for length in range(minlength, 41):
1467 1467 prefix = hexnode[:length]
1468 1468 if isvalid(prefix):
1469 1469 return prefix
1470 1470
1471 1471 for length in range(minlength, 41):
1472 1472 prefix = hexnode[:length]
1473 1473 if isvalid(prefix):
1474 1474 return disambiguate(hexnode, length)
1475 1475
1476 1476 def cmp(self, node, text):
1477 1477 """compare text with a given file revision
1478 1478
1479 1479 returns True if text is different than what is stored.
1480 1480 """
1481 1481 p1, p2 = self.parents(node)
1482 1482 return storageutil.hashrevisionsha1(text, p1, p2) != node
1483 1483
1484 1484 def _cachesegment(self, offset, data):
1485 1485 """Add a segment to the revlog cache.
1486 1486
1487 1487 Accepts an absolute offset and the data that is at that location.
1488 1488 """
1489 1489 o, d = self._chunkcache
1490 1490 # try to add to existing cache
1491 1491 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1492 1492 self._chunkcache = o, d + data
1493 1493 else:
1494 1494 self._chunkcache = offset, data
1495 1495
1496 1496 def _readsegment(self, offset, length, df=None):
1497 1497 """Load a segment of raw data from the revlog.
1498 1498
1499 1499 Accepts an absolute offset, length to read, and an optional existing
1500 1500 file handle to read from.
1501 1501
1502 1502 If an existing file handle is passed, it will be seeked and the
1503 1503 original seek position will NOT be restored.
1504 1504
1505 1505 Returns a str or buffer of raw byte data.
1506 1506
1507 1507 Raises if the requested number of bytes could not be read.
1508 1508 """
1509 1509 # Cache data both forward and backward around the requested
1510 1510 # data, in a fixed size window. This helps speed up operations
1511 1511 # involving reading the revlog backwards.
1512 1512 cachesize = self._chunkcachesize
1513 1513 realoffset = offset & ~(cachesize - 1)
1514 1514 reallength = (
1515 1515 (offset + length + cachesize) & ~(cachesize - 1)
1516 1516 ) - realoffset
1517 1517 with self._datareadfp(df) as df:
1518 1518 df.seek(realoffset)
1519 1519 d = df.read(reallength)
1520 1520
1521 1521 self._cachesegment(realoffset, d)
1522 1522 if offset != realoffset or reallength != length:
1523 1523 startoffset = offset - realoffset
1524 1524 if len(d) - startoffset < length:
1525 1525 raise error.RevlogError(
1526 1526 _(
1527 1527 b'partial read of revlog %s; expected %d bytes from '
1528 1528 b'offset %d, got %d'
1529 1529 )
1530 1530 % (
1531 1531 self.indexfile if self._inline else self.datafile,
1532 1532 length,
1533 1533 realoffset,
1534 1534 len(d) - startoffset,
1535 1535 )
1536 1536 )
1537 1537
1538 1538 return util.buffer(d, startoffset, length)
1539 1539
1540 1540 if len(d) < length:
1541 1541 raise error.RevlogError(
1542 1542 _(
1543 1543 b'partial read of revlog %s; expected %d bytes from offset '
1544 1544 b'%d, got %d'
1545 1545 )
1546 1546 % (
1547 1547 self.indexfile if self._inline else self.datafile,
1548 1548 length,
1549 1549 offset,
1550 1550 len(d),
1551 1551 )
1552 1552 )
1553 1553
1554 1554 return d
1555 1555
1556 1556 def _getsegment(self, offset, length, df=None):
1557 1557 """Obtain a segment of raw data from the revlog.
1558 1558
1559 1559 Accepts an absolute offset, length of bytes to obtain, and an
1560 1560 optional file handle to the already-opened revlog. If the file
1561 1561 handle is used, it's original seek position will not be preserved.
1562 1562
1563 1563 Requests for data may be returned from a cache.
1564 1564
1565 1565 Returns a str or a buffer instance of raw byte data.
1566 1566 """
1567 1567 o, d = self._chunkcache
1568 1568 l = len(d)
1569 1569
1570 1570 # is it in the cache?
1571 1571 cachestart = offset - o
1572 1572 cacheend = cachestart + length
1573 1573 if cachestart >= 0 and cacheend <= l:
1574 1574 if cachestart == 0 and cacheend == l:
1575 1575 return d # avoid a copy
1576 1576 return util.buffer(d, cachestart, cacheend - cachestart)
1577 1577
1578 1578 return self._readsegment(offset, length, df=df)
1579 1579
1580 1580 def _getsegmentforrevs(self, startrev, endrev, df=None):
1581 1581 """Obtain a segment of raw data corresponding to a range of revisions.
1582 1582
1583 1583 Accepts the start and end revisions and an optional already-open
1584 1584 file handle to be used for reading. If the file handle is read, its
1585 1585 seek position will not be preserved.
1586 1586
1587 1587 Requests for data may be satisfied by a cache.
1588 1588
1589 1589 Returns a 2-tuple of (offset, data) for the requested range of
1590 1590 revisions. Offset is the integer offset from the beginning of the
1591 1591 revlog and data is a str or buffer of the raw byte data.
1592 1592
1593 1593 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1594 1594 to determine where each revision's data begins and ends.
1595 1595 """
1596 1596 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1597 1597 # (functions are expensive).
1598 1598 index = self.index
1599 1599 istart = index[startrev]
1600 1600 start = int(istart[0] >> 16)
1601 1601 if startrev == endrev:
1602 1602 end = start + istart[1]
1603 1603 else:
1604 1604 iend = index[endrev]
1605 1605 end = int(iend[0] >> 16) + iend[1]
1606 1606
1607 1607 if self._inline:
1608 1608 start += (startrev + 1) * self._io.size
1609 1609 end += (endrev + 1) * self._io.size
1610 1610 length = end - start
1611 1611
1612 1612 return start, self._getsegment(start, length, df=df)
1613 1613
1614 1614 def _chunk(self, rev, df=None):
1615 1615 """Obtain a single decompressed chunk for a revision.
1616 1616
1617 1617 Accepts an integer revision and an optional already-open file handle
1618 1618 to be used for reading. If used, the seek position of the file will not
1619 1619 be preserved.
1620 1620
1621 1621 Returns a str holding uncompressed data for the requested revision.
1622 1622 """
1623 1623 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1624 1624
1625 1625 def _chunks(self, revs, df=None, targetsize=None):
1626 1626 """Obtain decompressed chunks for the specified revisions.
1627 1627
1628 1628 Accepts an iterable of numeric revisions that are assumed to be in
1629 1629 ascending order. Also accepts an optional already-open file handle
1630 1630 to be used for reading. If used, the seek position of the file will
1631 1631 not be preserved.
1632 1632
1633 1633 This function is similar to calling ``self._chunk()`` multiple times,
1634 1634 but is faster.
1635 1635
1636 1636 Returns a list with decompressed data for each requested revision.
1637 1637 """
1638 1638 if not revs:
1639 1639 return []
1640 1640 start = self.start
1641 1641 length = self.length
1642 1642 inline = self._inline
1643 1643 iosize = self._io.size
1644 1644 buffer = util.buffer
1645 1645
1646 1646 l = []
1647 1647 ladd = l.append
1648 1648
1649 1649 if not self._withsparseread:
1650 1650 slicedchunks = (revs,)
1651 1651 else:
1652 1652 slicedchunks = deltautil.slicechunk(
1653 1653 self, revs, targetsize=targetsize
1654 1654 )
1655 1655
1656 1656 for revschunk in slicedchunks:
1657 1657 firstrev = revschunk[0]
1658 1658 # Skip trailing revisions with empty diff
1659 1659 for lastrev in revschunk[::-1]:
1660 1660 if length(lastrev) != 0:
1661 1661 break
1662 1662
1663 1663 try:
1664 1664 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1665 1665 except OverflowError:
1666 1666 # issue4215 - we can't cache a run of chunks greater than
1667 1667 # 2G on Windows
1668 1668 return [self._chunk(rev, df=df) for rev in revschunk]
1669 1669
1670 1670 decomp = self.decompress
1671 1671 for rev in revschunk:
1672 1672 chunkstart = start(rev)
1673 1673 if inline:
1674 1674 chunkstart += (rev + 1) * iosize
1675 1675 chunklength = length(rev)
1676 1676 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1677 1677
1678 1678 return l
1679 1679
1680 1680 def _chunkclear(self):
1681 1681 """Clear the raw chunk cache."""
1682 1682 self._chunkcache = (0, b'')
1683 1683
1684 1684 def deltaparent(self, rev):
1685 1685 """return deltaparent of the given revision"""
1686 1686 base = self.index[rev][3]
1687 1687 if base == rev:
1688 1688 return nullrev
1689 1689 elif self._generaldelta:
1690 1690 return base
1691 1691 else:
1692 1692 return rev - 1
1693 1693
1694 1694 def issnapshot(self, rev):
1695 1695 """tells whether rev is a snapshot
1696 1696 """
1697 1697 if not self._sparserevlog:
1698 1698 return self.deltaparent(rev) == nullrev
1699 1699 elif util.safehasattr(self.index, b'issnapshot'):
1700 1700 # directly assign the method to cache the testing and access
1701 1701 self.issnapshot = self.index.issnapshot
1702 1702 return self.issnapshot(rev)
1703 1703 if rev == nullrev:
1704 1704 return True
1705 1705 entry = self.index[rev]
1706 1706 base = entry[3]
1707 1707 if base == rev:
1708 1708 return True
1709 1709 if base == nullrev:
1710 1710 return True
1711 1711 p1 = entry[5]
1712 1712 p2 = entry[6]
1713 1713 if base == p1 or base == p2:
1714 1714 return False
1715 1715 return self.issnapshot(base)
1716 1716
1717 1717 def snapshotdepth(self, rev):
1718 1718 """number of snapshot in the chain before this one"""
1719 1719 if not self.issnapshot(rev):
1720 1720 raise error.ProgrammingError(b'revision %d not a snapshot')
1721 1721 return len(self._deltachain(rev)[0]) - 1
1722 1722
1723 1723 def revdiff(self, rev1, rev2):
1724 1724 """return or calculate a delta between two revisions
1725 1725
1726 1726 The delta calculated is in binary form and is intended to be written to
1727 1727 revlog data directly. So this function needs raw revision data.
1728 1728 """
1729 1729 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1730 1730 return bytes(self._chunk(rev2))
1731 1731
1732 1732 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1733 1733
1734 1734 def _processflags(self, text, flags, operation, raw=False):
1735 1735 """deprecated entry point to access flag processors"""
1736 1736 msg = b'_processflag(...) use the specialized variant'
1737 1737 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1738 1738 if raw:
1739 1739 return text, flagutil.processflagsraw(self, text, flags)
1740 1740 elif operation == b'read':
1741 1741 return flagutil.processflagsread(self, text, flags)
1742 1742 else: # write operation
1743 1743 return flagutil.processflagswrite(self, text, flags)
1744 1744
1745 1745 def revision(self, nodeorrev, _df=None, raw=False):
1746 1746 """return an uncompressed revision of a given node or revision
1747 1747 number.
1748 1748
1749 1749 _df - an existing file handle to read from. (internal-only)
1750 1750 raw - an optional argument specifying if the revision data is to be
1751 1751 treated as raw data when applying flag transforms. 'raw' should be set
1752 1752 to True when generating changegroups or in debug commands.
1753 1753 """
1754 1754 if raw:
1755 1755 msg = (
1756 1756 b'revlog.revision(..., raw=True) is deprecated, '
1757 1757 b'use revlog.rawdata(...)'
1758 1758 )
1759 1759 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1760 1760 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1761 1761
1762 1762 def sidedata(self, nodeorrev, _df=None):
1763 1763 """a map of extra data related to the changeset but not part of the hash
1764 1764
1765 1765 This function currently return a dictionary. However, more advanced
1766 1766 mapping object will likely be used in the future for a more
1767 1767 efficient/lazy code.
1768 1768 """
1769 1769 return self._revisiondata(nodeorrev, _df)[1]
1770 1770
1771 1771 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1772 1772 # deal with <nodeorrev> argument type
1773 1773 if isinstance(nodeorrev, int):
1774 1774 rev = nodeorrev
1775 1775 node = self.node(rev)
1776 1776 else:
1777 1777 node = nodeorrev
1778 1778 rev = None
1779 1779
1780 1780 # fast path the special `nullid` rev
1781 1781 if node == nullid:
1782 1782 return b"", {}
1783 1783
1784 1784 # ``rawtext`` is the text as stored inside the revlog. Might be the
1785 1785 # revision or might need to be processed to retrieve the revision.
1786 1786 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1787 1787
1788 1788 if raw and validated:
1789 1789 # if we don't want to process the raw text and that raw
1790 1790 # text is cached, we can exit early.
1791 1791 return rawtext, {}
1792 1792 if rev is None:
1793 1793 rev = self.rev(node)
1794 1794 # the revlog's flag for this revision
1795 1795 # (usually alter its state or content)
1796 1796 flags = self.flags(rev)
1797 1797
1798 1798 if validated and flags == REVIDX_DEFAULT_FLAGS:
1799 1799 # no extra flags set, no flag processor runs, text = rawtext
1800 1800 return rawtext, {}
1801 1801
1802 1802 sidedata = {}
1803 1803 if raw:
1804 1804 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1805 1805 text = rawtext
1806 1806 else:
1807 1807 try:
1808 1808 r = flagutil.processflagsread(self, rawtext, flags)
1809 1809 except error.SidedataHashError as exc:
1810 1810 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1811 1811 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1812 1812 raise error.RevlogError(msg)
1813 1813 text, validatehash, sidedata = r
1814 1814 if validatehash:
1815 1815 self.checkhash(text, node, rev=rev)
1816 1816 if not validated:
1817 1817 self._revisioncache = (node, rev, rawtext)
1818 1818
1819 1819 return text, sidedata
1820 1820
1821 1821 def _rawtext(self, node, rev, _df=None):
1822 1822 """return the possibly unvalidated rawtext for a revision
1823 1823
1824 1824 returns (rev, rawtext, validated)
1825 1825 """
1826 1826
1827 1827 # revision in the cache (could be useful to apply delta)
1828 1828 cachedrev = None
1829 1829 # An intermediate text to apply deltas to
1830 1830 basetext = None
1831 1831
1832 1832 # Check if we have the entry in cache
1833 1833 # The cache entry looks like (node, rev, rawtext)
1834 1834 if self._revisioncache:
1835 1835 if self._revisioncache[0] == node:
1836 1836 return (rev, self._revisioncache[2], True)
1837 1837 cachedrev = self._revisioncache[1]
1838 1838
1839 1839 if rev is None:
1840 1840 rev = self.rev(node)
1841 1841
1842 1842 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1843 1843 if stopped:
1844 1844 basetext = self._revisioncache[2]
1845 1845
1846 1846 # drop cache to save memory, the caller is expected to
1847 1847 # update self._revisioncache after validating the text
1848 1848 self._revisioncache = None
1849 1849
1850 1850 targetsize = None
1851 1851 rawsize = self.index[rev][2]
1852 1852 if 0 <= rawsize:
1853 1853 targetsize = 4 * rawsize
1854 1854
1855 1855 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1856 1856 if basetext is None:
1857 1857 basetext = bytes(bins[0])
1858 1858 bins = bins[1:]
1859 1859
1860 1860 rawtext = mdiff.patches(basetext, bins)
1861 1861 del basetext # let us have a chance to free memory early
1862 1862 return (rev, rawtext, False)
1863 1863
1864 1864 def rawdata(self, nodeorrev, _df=None):
1865 1865 """return an uncompressed raw data of a given node or revision number.
1866 1866
1867 1867 _df - an existing file handle to read from. (internal-only)
1868 1868 """
1869 1869 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1870 1870
1871 1871 def hash(self, text, p1, p2):
1872 1872 """Compute a node hash.
1873 1873
1874 1874 Available as a function so that subclasses can replace the hash
1875 1875 as needed.
1876 1876 """
1877 1877 return storageutil.hashrevisionsha1(text, p1, p2)
1878 1878
1879 1879 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1880 1880 """Check node hash integrity.
1881 1881
1882 1882 Available as a function so that subclasses can extend hash mismatch
1883 1883 behaviors as needed.
1884 1884 """
1885 1885 try:
1886 1886 if p1 is None and p2 is None:
1887 1887 p1, p2 = self.parents(node)
1888 1888 if node != self.hash(text, p1, p2):
1889 1889 # Clear the revision cache on hash failure. The revision cache
1890 1890 # only stores the raw revision and clearing the cache does have
1891 1891 # the side-effect that we won't have a cache hit when the raw
1892 1892 # revision data is accessed. But this case should be rare and
1893 1893 # it is extra work to teach the cache about the hash
1894 1894 # verification state.
1895 1895 if self._revisioncache and self._revisioncache[0] == node:
1896 1896 self._revisioncache = None
1897 1897
1898 1898 revornode = rev
1899 1899 if revornode is None:
1900 1900 revornode = templatefilters.short(hex(node))
1901 1901 raise error.RevlogError(
1902 1902 _(b"integrity check failed on %s:%s")
1903 1903 % (self.indexfile, pycompat.bytestr(revornode))
1904 1904 )
1905 1905 except error.RevlogError:
1906 1906 if self._censorable and storageutil.iscensoredtext(text):
1907 1907 raise error.CensoredNodeError(self.indexfile, node, text)
1908 1908 raise
1909 1909
1910 1910 def _enforceinlinesize(self, tr, fp=None):
1911 1911 """Check if the revlog is too big for inline and convert if so.
1912 1912
1913 1913 This should be called after revisions are added to the revlog. If the
1914 1914 revlog has grown too large to be an inline revlog, it will convert it
1915 1915 to use multiple index and data files.
1916 1916 """
1917 1917 tiprev = len(self) - 1
1918 1918 if (
1919 1919 not self._inline
1920 1920 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1921 1921 ):
1922 1922 return
1923 1923
1924 1924 trinfo = tr.find(self.indexfile)
1925 1925 if trinfo is None:
1926 1926 raise error.RevlogError(
1927 1927 _(b"%s not found in the transaction") % self.indexfile
1928 1928 )
1929 1929
1930 1930 trindex = trinfo[2]
1931 1931 if trindex is not None:
1932 1932 dataoff = self.start(trindex)
1933 1933 else:
1934 1934 # revlog was stripped at start of transaction, use all leftover data
1935 1935 trindex = len(self) - 1
1936 1936 dataoff = self.end(tiprev)
1937 1937
1938 1938 tr.add(self.datafile, dataoff)
1939 1939
1940 1940 if fp:
1941 1941 fp.flush()
1942 1942 fp.close()
1943 1943 # We can't use the cached file handle after close(). So prevent
1944 1944 # its usage.
1945 1945 self._writinghandles = None
1946 1946
1947 1947 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1948 1948 for r in self:
1949 1949 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1950 1950
1951 1951 with self._indexfp(b'w') as fp:
1952 1952 self.version &= ~FLAG_INLINE_DATA
1953 1953 self._inline = False
1954 1954 io = self._io
1955 1955 for i in self:
1956 1956 e = io.packentry(self.index[i], self.node, self.version, i)
1957 1957 fp.write(e)
1958 1958
1959 1959 # the temp file replace the real index when we exit the context
1960 1960 # manager
1961 1961
1962 1962 tr.replace(self.indexfile, trindex * self._io.size)
1963 1963 self._chunkclear()
1964 1964
1965 1965 def _nodeduplicatecallback(self, transaction, node):
1966 1966 """called when trying to add a node already stored.
1967 1967 """
1968 1968
1969 1969 def addrevision(
1970 1970 self,
1971 1971 text,
1972 1972 transaction,
1973 1973 link,
1974 1974 p1,
1975 1975 p2,
1976 1976 cachedelta=None,
1977 1977 node=None,
1978 1978 flags=REVIDX_DEFAULT_FLAGS,
1979 1979 deltacomputer=None,
1980 1980 sidedata=None,
1981 1981 ):
1982 1982 """add a revision to the log
1983 1983
1984 1984 text - the revision data to add
1985 1985 transaction - the transaction object used for rollback
1986 1986 link - the linkrev data to add
1987 1987 p1, p2 - the parent nodeids of the revision
1988 1988 cachedelta - an optional precomputed delta
1989 1989 node - nodeid of revision; typically node is not specified, and it is
1990 1990 computed by default as hash(text, p1, p2), however subclasses might
1991 1991 use different hashing method (and override checkhash() in such case)
1992 1992 flags - the known flags to set on the revision
1993 1993 deltacomputer - an optional deltacomputer instance shared between
1994 1994 multiple calls
1995 1995 """
1996 1996 if link == nullrev:
1997 1997 raise error.RevlogError(
1998 1998 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1999 1999 )
2000 2000
2001 2001 if sidedata is None:
2002 2002 sidedata = {}
2003 2003 flags = flags & ~REVIDX_SIDEDATA
2004 2004 elif not self.hassidedata:
2005 2005 raise error.ProgrammingError(
2006 2006 _(b"trying to add sidedata to a revlog who don't support them")
2007 2007 )
2008 2008 else:
2009 2009 flags |= REVIDX_SIDEDATA
2010 2010
2011 2011 if flags:
2012 2012 node = node or self.hash(text, p1, p2)
2013 2013
2014 2014 rawtext, validatehash = flagutil.processflagswrite(
2015 2015 self, text, flags, sidedata=sidedata
2016 2016 )
2017 2017
2018 2018 # If the flag processor modifies the revision data, ignore any provided
2019 2019 # cachedelta.
2020 2020 if rawtext != text:
2021 2021 cachedelta = None
2022 2022
2023 2023 if len(rawtext) > _maxentrysize:
2024 2024 raise error.RevlogError(
2025 2025 _(
2026 2026 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2027 2027 )
2028 2028 % (self.indexfile, len(rawtext))
2029 2029 )
2030 2030
2031 2031 node = node or self.hash(rawtext, p1, p2)
2032 2032 if self.index.has_node(node):
2033 2033 return node
2034 2034
2035 2035 if validatehash:
2036 2036 self.checkhash(rawtext, node, p1=p1, p2=p2)
2037 2037
2038 2038 return self.addrawrevision(
2039 2039 rawtext,
2040 2040 transaction,
2041 2041 link,
2042 2042 p1,
2043 2043 p2,
2044 2044 node,
2045 2045 flags,
2046 2046 cachedelta=cachedelta,
2047 2047 deltacomputer=deltacomputer,
2048 2048 )
2049 2049
2050 2050 def addrawrevision(
2051 2051 self,
2052 2052 rawtext,
2053 2053 transaction,
2054 2054 link,
2055 2055 p1,
2056 2056 p2,
2057 2057 node,
2058 2058 flags,
2059 2059 cachedelta=None,
2060 2060 deltacomputer=None,
2061 2061 ):
2062 2062 """add a raw revision with known flags, node and parents
2063 2063 useful when reusing a revision not stored in this revlog (ex: received
2064 2064 over wire, or read from an external bundle).
2065 2065 """
2066 2066 dfh = None
2067 2067 if not self._inline:
2068 2068 dfh = self._datafp(b"a+")
2069 2069 ifh = self._indexfp(b"a+")
2070 2070 try:
2071 2071 return self._addrevision(
2072 2072 node,
2073 2073 rawtext,
2074 2074 transaction,
2075 2075 link,
2076 2076 p1,
2077 2077 p2,
2078 2078 flags,
2079 2079 cachedelta,
2080 2080 ifh,
2081 2081 dfh,
2082 2082 deltacomputer=deltacomputer,
2083 2083 )
2084 2084 finally:
2085 2085 if dfh:
2086 2086 dfh.close()
2087 2087 ifh.close()
2088 2088
2089 2089 def compress(self, data):
2090 2090 """Generate a possibly-compressed representation of data."""
2091 2091 if not data:
2092 2092 return b'', data
2093 2093
2094 2094 compressed = self._compressor.compress(data)
2095 2095
2096 2096 if compressed:
2097 2097 # The revlog compressor added the header in the returned data.
2098 2098 return b'', compressed
2099 2099
2100 2100 if data[0:1] == b'\0':
2101 2101 return b'', data
2102 2102 return b'u', data
2103 2103
2104 2104 def decompress(self, data):
2105 2105 """Decompress a revlog chunk.
2106 2106
2107 2107 The chunk is expected to begin with a header identifying the
2108 2108 format type so it can be routed to an appropriate decompressor.
2109 2109 """
2110 2110 if not data:
2111 2111 return data
2112 2112
2113 2113 # Revlogs are read much more frequently than they are written and many
2114 2114 # chunks only take microseconds to decompress, so performance is
2115 2115 # important here.
2116 2116 #
2117 2117 # We can make a few assumptions about revlogs:
2118 2118 #
2119 2119 # 1) the majority of chunks will be compressed (as opposed to inline
2120 2120 # raw data).
2121 2121 # 2) decompressing *any* data will likely by at least 10x slower than
2122 2122 # returning raw inline data.
2123 2123 # 3) we want to prioritize common and officially supported compression
2124 2124 # engines
2125 2125 #
2126 2126 # It follows that we want to optimize for "decompress compressed data
2127 2127 # when encoded with common and officially supported compression engines"
2128 2128 # case over "raw data" and "data encoded by less common or non-official
2129 2129 # compression engines." That is why we have the inline lookup first
2130 2130 # followed by the compengines lookup.
2131 2131 #
2132 2132 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2133 2133 # compressed chunks. And this matters for changelog and manifest reads.
2134 2134 t = data[0:1]
2135 2135
2136 2136 if t == b'x':
2137 2137 try:
2138 2138 return _zlibdecompress(data)
2139 2139 except zlib.error as e:
2140 2140 raise error.RevlogError(
2141 2141 _(b'revlog decompress error: %s')
2142 2142 % stringutil.forcebytestr(e)
2143 2143 )
2144 2144 # '\0' is more common than 'u' so it goes first.
2145 2145 elif t == b'\0':
2146 2146 return data
2147 2147 elif t == b'u':
2148 2148 return util.buffer(data, 1)
2149 2149
2150 2150 try:
2151 2151 compressor = self._decompressors[t]
2152 2152 except KeyError:
2153 2153 try:
2154 2154 engine = util.compengines.forrevlogheader(t)
2155 2155 compressor = engine.revlogcompressor(self._compengineopts)
2156 2156 self._decompressors[t] = compressor
2157 2157 except KeyError:
2158 2158 raise error.RevlogError(_(b'unknown compression type %r') % t)
2159 2159
2160 2160 return compressor.decompress(data)
2161 2161
2162 2162 def _addrevision(
2163 2163 self,
2164 2164 node,
2165 2165 rawtext,
2166 2166 transaction,
2167 2167 link,
2168 2168 p1,
2169 2169 p2,
2170 2170 flags,
2171 2171 cachedelta,
2172 2172 ifh,
2173 2173 dfh,
2174 2174 alwayscache=False,
2175 2175 deltacomputer=None,
2176 2176 ):
2177 2177 """internal function to add revisions to the log
2178 2178
2179 2179 see addrevision for argument descriptions.
2180 2180
2181 2181 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182 2182
2183 2183 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 2184 be used.
2185 2185
2186 2186 invariants:
2187 2187 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 2188 if both are set, they must correspond to each other.
2189 2189 """
2190 2190 if node == nullid:
2191 2191 raise error.RevlogError(
2192 2192 _(b"%s: attempt to add null revision") % self.indexfile
2193 2193 )
2194 2194 if node == wdirid or node in wdirfilenodeids:
2195 2195 raise error.RevlogError(
2196 2196 _(b"%s: attempt to add wdir revision") % self.indexfile
2197 2197 )
2198 2198
2199 2199 if self._inline:
2200 2200 fh = ifh
2201 2201 else:
2202 2202 fh = dfh
2203 2203
2204 2204 btext = [rawtext]
2205 2205
2206 2206 curr = len(self)
2207 2207 prev = curr - 1
2208 2208 offset = self.end(prev)
2209 2209 p1r, p2r = self.rev(p1), self.rev(p2)
2210 2210
2211 2211 # full versions are inserted when the needed deltas
2212 2212 # become comparable to the uncompressed text
2213 2213 if rawtext is None:
2214 2214 # need rawtext size, before changed by flag processors, which is
2215 2215 # the non-raw size. use revlog explicitly to avoid filelog's extra
2216 2216 # logic that might remove metadata size.
2217 2217 textlen = mdiff.patchedsize(
2218 2218 revlog.size(self, cachedelta[0]), cachedelta[1]
2219 2219 )
2220 2220 else:
2221 2221 textlen = len(rawtext)
2222 2222
2223 2223 if deltacomputer is None:
2224 2224 deltacomputer = deltautil.deltacomputer(self)
2225 2225
2226 2226 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2227 2227
2228 2228 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2229 2229
2230 2230 e = (
2231 2231 offset_type(offset, flags),
2232 2232 deltainfo.deltalen,
2233 2233 textlen,
2234 2234 deltainfo.base,
2235 2235 link,
2236 2236 p1r,
2237 2237 p2r,
2238 2238 node,
2239 2239 )
2240 2240 self.index.append(e)
2241 2241
2242 2242 entry = self._io.packentry(e, self.node, self.version, curr)
2243 2243 self._writeentry(
2244 2244 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2245 2245 )
2246 2246
2247 2247 rawtext = btext[0]
2248 2248
2249 2249 if alwayscache and rawtext is None:
2250 2250 rawtext = deltacomputer.buildtext(revinfo, fh)
2251 2251
2252 2252 if type(rawtext) == bytes: # only accept immutable objects
2253 2253 self._revisioncache = (node, curr, rawtext)
2254 2254 self._chainbasecache[curr] = deltainfo.chainbase
2255 2255 return node
2256 2256
2257 2257 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2258 2258 # Files opened in a+ mode have inconsistent behavior on various
2259 2259 # platforms. Windows requires that a file positioning call be made
2260 2260 # when the file handle transitions between reads and writes. See
2261 2261 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2262 2262 # platforms, Python or the platform itself can be buggy. Some versions
2263 2263 # of Solaris have been observed to not append at the end of the file
2264 2264 # if the file was seeked to before the end. See issue4943 for more.
2265 2265 #
2266 2266 # We work around this issue by inserting a seek() before writing.
2267 2267 # Note: This is likely not necessary on Python 3. However, because
2268 2268 # the file handle is reused for reads and may be seeked there, we need
2269 2269 # to be careful before changing this.
2270 2270 ifh.seek(0, os.SEEK_END)
2271 2271 if dfh:
2272 2272 dfh.seek(0, os.SEEK_END)
2273 2273
2274 2274 curr = len(self) - 1
2275 2275 if not self._inline:
2276 2276 transaction.add(self.datafile, offset)
2277 2277 transaction.add(self.indexfile, curr * len(entry))
2278 2278 if data[0]:
2279 2279 dfh.write(data[0])
2280 2280 dfh.write(data[1])
2281 2281 ifh.write(entry)
2282 2282 else:
2283 2283 offset += curr * self._io.size
2284 2284 transaction.add(self.indexfile, offset, curr)
2285 2285 ifh.write(entry)
2286 2286 ifh.write(data[0])
2287 2287 ifh.write(data[1])
2288 2288 self._enforceinlinesize(transaction, ifh)
2289 2289
2290 2290 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2291 2291 """
2292 2292 add a delta group
2293 2293
2294 2294 given a set of deltas, add them to the revision log. the
2295 2295 first delta is against its parent, which should be in our
2296 2296 log, the rest are against the previous delta.
2297 2297
2298 2298 If ``addrevisioncb`` is defined, it will be called with arguments of
2299 2299 this revlog and the node that was added.
2300 2300 """
2301 2301
2302 2302 if self._writinghandles:
2303 2303 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2304 2304
2305 2305 nodes = []
2306 2306
2307 2307 r = len(self)
2308 2308 end = 0
2309 2309 if r:
2310 2310 end = self.end(r - 1)
2311 2311 ifh = self._indexfp(b"a+")
2312 2312 isize = r * self._io.size
2313 2313 if self._inline:
2314 2314 transaction.add(self.indexfile, end + isize, r)
2315 2315 dfh = None
2316 2316 else:
2317 2317 transaction.add(self.indexfile, isize, r)
2318 2318 transaction.add(self.datafile, end)
2319 2319 dfh = self._datafp(b"a+")
2320 2320
2321 2321 def flush():
2322 2322 if dfh:
2323 2323 dfh.flush()
2324 2324 ifh.flush()
2325 2325
2326 2326 self._writinghandles = (ifh, dfh)
2327 2327
2328 2328 try:
2329 2329 deltacomputer = deltautil.deltacomputer(self)
2330 2330 # loop through our set of deltas
2331 2331 for data in deltas:
2332 2332 node, p1, p2, linknode, deltabase, delta, flags = data
2333 2333 link = linkmapper(linknode)
2334 2334 flags = flags or REVIDX_DEFAULT_FLAGS
2335 2335
2336 2336 nodes.append(node)
2337 2337
2338 2338 if self.index.has_node(node):
2339 2339 self._nodeduplicatecallback(transaction, node)
2340 2340 # this can happen if two branches make the same change
2341 2341 continue
2342 2342
2343 2343 for p in (p1, p2):
2344 2344 if not self.index.has_node(p):
2345 2345 raise error.LookupError(
2346 2346 p, self.indexfile, _(b'unknown parent')
2347 2347 )
2348 2348
2349 2349 if not self.index.has_node(deltabase):
2350 2350 raise error.LookupError(
2351 2351 deltabase, self.indexfile, _(b'unknown delta base')
2352 2352 )
2353 2353
2354 2354 baserev = self.rev(deltabase)
2355 2355
2356 2356 if baserev != nullrev and self.iscensored(baserev):
2357 2357 # if base is censored, delta must be full replacement in a
2358 2358 # single patch operation
2359 2359 hlen = struct.calcsize(b">lll")
2360 2360 oldlen = self.rawsize(baserev)
2361 2361 newlen = len(delta) - hlen
2362 2362 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2363 2363 raise error.CensoredBaseError(
2364 2364 self.indexfile, self.node(baserev)
2365 2365 )
2366 2366
2367 2367 if not flags and self._peek_iscensored(baserev, delta, flush):
2368 2368 flags |= REVIDX_ISCENSORED
2369 2369
2370 2370 # We assume consumers of addrevisioncb will want to retrieve
2371 2371 # the added revision, which will require a call to
2372 2372 # revision(). revision() will fast path if there is a cache
2373 2373 # hit. So, we tell _addrevision() to always cache in this case.
2374 2374 # We're only using addgroup() in the context of changegroup
2375 2375 # generation so the revision data can always be handled as raw
2376 2376 # by the flagprocessor.
2377 2377 self._addrevision(
2378 2378 node,
2379 2379 None,
2380 2380 transaction,
2381 2381 link,
2382 2382 p1,
2383 2383 p2,
2384 2384 flags,
2385 2385 (baserev, delta),
2386 2386 ifh,
2387 2387 dfh,
2388 2388 alwayscache=bool(addrevisioncb),
2389 2389 deltacomputer=deltacomputer,
2390 2390 )
2391 2391
2392 2392 if addrevisioncb:
2393 2393 addrevisioncb(self, node)
2394 2394
2395 2395 if not dfh and not self._inline:
2396 2396 # addrevision switched from inline to conventional
2397 2397 # reopen the index
2398 2398 ifh.close()
2399 2399 dfh = self._datafp(b"a+")
2400 2400 ifh = self._indexfp(b"a+")
2401 2401 self._writinghandles = (ifh, dfh)
2402 2402 finally:
2403 2403 self._writinghandles = None
2404 2404
2405 2405 if dfh:
2406 2406 dfh.close()
2407 2407 ifh.close()
2408 2408
2409 2409 return nodes
2410 2410
2411 2411 def iscensored(self, rev):
2412 2412 """Check if a file revision is censored."""
2413 2413 if not self._censorable:
2414 2414 return False
2415 2415
2416 2416 return self.flags(rev) & REVIDX_ISCENSORED
2417 2417
2418 2418 def _peek_iscensored(self, baserev, delta, flush):
2419 2419 """Quickly check if a delta produces a censored revision."""
2420 2420 if not self._censorable:
2421 2421 return False
2422 2422
2423 2423 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2424 2424
2425 2425 def getstrippoint(self, minlink):
2426 2426 """find the minimum rev that must be stripped to strip the linkrev
2427 2427
2428 2428 Returns a tuple containing the minimum rev and a set of all revs that
2429 2429 have linkrevs that will be broken by this strip.
2430 2430 """
2431 2431 return storageutil.resolvestripinfo(
2432 2432 minlink,
2433 2433 len(self) - 1,
2434 2434 self.headrevs(),
2435 2435 self.linkrev,
2436 2436 self.parentrevs,
2437 2437 )
2438 2438
2439 2439 def strip(self, minlink, transaction):
2440 2440 """truncate the revlog on the first revision with a linkrev >= minlink
2441 2441
2442 2442 This function is called when we're stripping revision minlink and
2443 2443 its descendants from the repository.
2444 2444
2445 2445 We have to remove all revisions with linkrev >= minlink, because
2446 2446 the equivalent changelog revisions will be renumbered after the
2447 2447 strip.
2448 2448
2449 2449 So we truncate the revlog on the first of these revisions, and
2450 2450 trust that the caller has saved the revisions that shouldn't be
2451 2451 removed and that it'll re-add them after this truncation.
2452 2452 """
2453 2453 if len(self) == 0:
2454 2454 return
2455 2455
2456 2456 rev, _ = self.getstrippoint(minlink)
2457 2457 if rev == len(self):
2458 2458 return
2459 2459
2460 2460 # first truncate the files on disk
2461 2461 end = self.start(rev)
2462 2462 if not self._inline:
2463 2463 transaction.add(self.datafile, end)
2464 2464 end = rev * self._io.size
2465 2465 else:
2466 2466 end += rev * self._io.size
2467 2467
2468 2468 transaction.add(self.indexfile, end)
2469 2469
2470 2470 # then reset internal state in memory to forget those revisions
2471 2471 self._revisioncache = None
2472 2472 self._chaininfocache = {}
2473 2473 self._chunkclear()
2474 2474
2475 2475 del self.index[rev:-1]
2476 2476
2477 2477 def checksize(self):
2478 2478 """Check size of index and data files
2479 2479
2480 2480 return a (dd, di) tuple.
2481 2481 - dd: extra bytes for the "data" file
2482 2482 - di: extra bytes for the "index" file
2483 2483
2484 2484 A healthy revlog will return (0, 0).
2485 2485 """
2486 2486 expected = 0
2487 2487 if len(self):
2488 2488 expected = max(0, self.end(len(self) - 1))
2489 2489
2490 2490 try:
2491 2491 with self._datafp() as f:
2492 2492 f.seek(0, io.SEEK_END)
2493 2493 actual = f.tell()
2494 2494 dd = actual - expected
2495 2495 except IOError as inst:
2496 2496 if inst.errno != errno.ENOENT:
2497 2497 raise
2498 2498 dd = 0
2499 2499
2500 2500 try:
2501 2501 f = self.opener(self.indexfile)
2502 2502 f.seek(0, io.SEEK_END)
2503 2503 actual = f.tell()
2504 2504 f.close()
2505 2505 s = self._io.size
2506 2506 i = max(0, actual // s)
2507 2507 di = actual - (i * s)
2508 2508 if self._inline:
2509 2509 databytes = 0
2510 2510 for r in self:
2511 2511 databytes += max(0, self.length(r))
2512 2512 dd = 0
2513 2513 di = actual - len(self) * s - databytes
2514 2514 except IOError as inst:
2515 2515 if inst.errno != errno.ENOENT:
2516 2516 raise
2517 2517 di = 0
2518 2518
2519 2519 return (dd, di)
2520 2520
2521 2521 def files(self):
2522 2522 res = [self.indexfile]
2523 2523 if not self._inline:
2524 2524 res.append(self.datafile)
2525 2525 return res
2526 2526
2527 2527 def emitrevisions(
2528 2528 self,
2529 2529 nodes,
2530 2530 nodesorder=None,
2531 2531 revisiondata=False,
2532 2532 assumehaveparentrevisions=False,
2533 2533 deltamode=repository.CG_DELTAMODE_STD,
2534 2534 ):
2535 2535 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2536 2536 raise error.ProgrammingError(
2537 2537 b'unhandled value for nodesorder: %s' % nodesorder
2538 2538 )
2539 2539
2540 2540 if nodesorder is None and not self._generaldelta:
2541 2541 nodesorder = b'storage'
2542 2542
2543 2543 if (
2544 2544 not self._storedeltachains
2545 2545 and deltamode != repository.CG_DELTAMODE_PREV
2546 2546 ):
2547 2547 deltamode = repository.CG_DELTAMODE_FULL
2548 2548
2549 2549 return storageutil.emitrevisions(
2550 2550 self,
2551 2551 nodes,
2552 2552 nodesorder,
2553 2553 revlogrevisiondelta,
2554 2554 deltaparentfn=self.deltaparent,
2555 2555 candeltafn=self.candelta,
2556 2556 rawsizefn=self.rawsize,
2557 2557 revdifffn=self.revdiff,
2558 2558 flagsfn=self.flags,
2559 2559 deltamode=deltamode,
2560 2560 revisiondata=revisiondata,
2561 2561 assumehaveparentrevisions=assumehaveparentrevisions,
2562 2562 )
2563 2563
2564 2564 DELTAREUSEALWAYS = b'always'
2565 2565 DELTAREUSESAMEREVS = b'samerevs'
2566 2566 DELTAREUSENEVER = b'never'
2567 2567
2568 2568 DELTAREUSEFULLADD = b'fulladd'
2569 2569
2570 2570 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2571 2571
2572 2572 def clone(
2573 2573 self,
2574 2574 tr,
2575 2575 destrevlog,
2576 2576 addrevisioncb=None,
2577 2577 deltareuse=DELTAREUSESAMEREVS,
2578 2578 forcedeltabothparents=None,
2579 2579 sidedatacompanion=None,
2580 2580 ):
2581 2581 """Copy this revlog to another, possibly with format changes.
2582 2582
2583 2583 The destination revlog will contain the same revisions and nodes.
2584 2584 However, it may not be bit-for-bit identical due to e.g. delta encoding
2585 2585 differences.
2586 2586
2587 2587 The ``deltareuse`` argument control how deltas from the existing revlog
2588 2588 are preserved in the destination revlog. The argument can have the
2589 2589 following values:
2590 2590
2591 2591 DELTAREUSEALWAYS
2592 2592 Deltas will always be reused (if possible), even if the destination
2593 2593 revlog would not select the same revisions for the delta. This is the
2594 2594 fastest mode of operation.
2595 2595 DELTAREUSESAMEREVS
2596 2596 Deltas will be reused if the destination revlog would pick the same
2597 2597 revisions for the delta. This mode strikes a balance between speed
2598 2598 and optimization.
2599 2599 DELTAREUSENEVER
2600 2600 Deltas will never be reused. This is the slowest mode of execution.
2601 2601 This mode can be used to recompute deltas (e.g. if the diff/delta
2602 2602 algorithm changes).
2603 2603 DELTAREUSEFULLADD
2604 2604 Revision will be re-added as if their were new content. This is
2605 2605 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2606 2606 eg: large file detection and handling.
2607 2607
2608 2608 Delta computation can be slow, so the choice of delta reuse policy can
2609 2609 significantly affect run time.
2610 2610
2611 2611 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2612 2612 two extremes. Deltas will be reused if they are appropriate. But if the
2613 2613 delta could choose a better revision, it will do so. This means if you
2614 2614 are converting a non-generaldelta revlog to a generaldelta revlog,
2615 2615 deltas will be recomputed if the delta's parent isn't a parent of the
2616 2616 revision.
2617 2617
2618 2618 In addition to the delta policy, the ``forcedeltabothparents``
2619 2619 argument controls whether to force compute deltas against both parents
2620 2620 for merges. By default, the current default is used.
2621 2621
2622 2622 If not None, the `sidedatacompanion` is callable that accept two
2623 2623 arguments:
2624 2624
2625 2625 (srcrevlog, rev)
2626 2626
2627 2627 and return a triplet that control changes to sidedata content from the
2628 2628 old revision to the new clone result:
2629 2629
2630 2630 (dropall, filterout, update)
2631 2631
2632 2632 * if `dropall` is True, all sidedata should be dropped
2633 2633 * `filterout` is a set of sidedata keys that should be dropped
2634 2634 * `update` is a mapping of additionnal/new key -> value
2635 2635 """
2636 2636 if deltareuse not in self.DELTAREUSEALL:
2637 2637 raise ValueError(
2638 2638 _(b'value for deltareuse invalid: %s') % deltareuse
2639 2639 )
2640 2640
2641 2641 if len(destrevlog):
2642 2642 raise ValueError(_(b'destination revlog is not empty'))
2643 2643
2644 2644 if getattr(self, 'filteredrevs', None):
2645 2645 raise ValueError(_(b'source revlog has filtered revisions'))
2646 2646 if getattr(destrevlog, 'filteredrevs', None):
2647 2647 raise ValueError(_(b'destination revlog has filtered revisions'))
2648 2648
2649 2649 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2650 2650 # if possible.
2651 2651 oldlazydelta = destrevlog._lazydelta
2652 2652 oldlazydeltabase = destrevlog._lazydeltabase
2653 2653 oldamd = destrevlog._deltabothparents
2654 2654
2655 2655 try:
2656 2656 if deltareuse == self.DELTAREUSEALWAYS:
2657 2657 destrevlog._lazydeltabase = True
2658 2658 destrevlog._lazydelta = True
2659 2659 elif deltareuse == self.DELTAREUSESAMEREVS:
2660 2660 destrevlog._lazydeltabase = False
2661 2661 destrevlog._lazydelta = True
2662 2662 elif deltareuse == self.DELTAREUSENEVER:
2663 2663 destrevlog._lazydeltabase = False
2664 2664 destrevlog._lazydelta = False
2665 2665
2666 2666 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2667 2667
2668 2668 self._clone(
2669 2669 tr,
2670 2670 destrevlog,
2671 2671 addrevisioncb,
2672 2672 deltareuse,
2673 2673 forcedeltabothparents,
2674 2674 sidedatacompanion,
2675 2675 )
2676 2676
2677 2677 finally:
2678 2678 destrevlog._lazydelta = oldlazydelta
2679 2679 destrevlog._lazydeltabase = oldlazydeltabase
2680 2680 destrevlog._deltabothparents = oldamd
2681 2681
2682 2682 def _clone(
2683 2683 self,
2684 2684 tr,
2685 2685 destrevlog,
2686 2686 addrevisioncb,
2687 2687 deltareuse,
2688 2688 forcedeltabothparents,
2689 2689 sidedatacompanion,
2690 2690 ):
2691 2691 """perform the core duty of `revlog.clone` after parameter processing"""
2692 2692 deltacomputer = deltautil.deltacomputer(destrevlog)
2693 2693 index = self.index
2694 2694 for rev in self:
2695 2695 entry = index[rev]
2696 2696
2697 2697 # Some classes override linkrev to take filtered revs into
2698 2698 # account. Use raw entry from index.
2699 2699 flags = entry[0] & 0xFFFF
2700 2700 linkrev = entry[4]
2701 2701 p1 = index[entry[5]][7]
2702 2702 p2 = index[entry[6]][7]
2703 2703 node = entry[7]
2704 2704
2705 2705 sidedataactions = (False, [], {})
2706 2706 if sidedatacompanion is not None:
2707 2707 sidedataactions = sidedatacompanion(self, rev)
2708 2708
2709 2709 # (Possibly) reuse the delta from the revlog if allowed and
2710 2710 # the revlog chunk is a delta.
2711 2711 cachedelta = None
2712 2712 rawtext = None
2713 2713 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2714 2714 dropall, filterout, update = sidedataactions
2715 2715 text, sidedata = self._revisiondata(rev)
2716 2716 if dropall:
2717 2717 sidedata = {}
2718 2718 for key in filterout:
2719 2719 sidedata.pop(key, None)
2720 2720 sidedata.update(update)
2721 2721 if not sidedata:
2722 2722 sidedata = None
2723 2723 destrevlog.addrevision(
2724 2724 text,
2725 2725 tr,
2726 2726 linkrev,
2727 2727 p1,
2728 2728 p2,
2729 2729 cachedelta=cachedelta,
2730 2730 node=node,
2731 2731 flags=flags,
2732 2732 deltacomputer=deltacomputer,
2733 2733 sidedata=sidedata,
2734 2734 )
2735 2735 else:
2736 2736 if destrevlog._lazydelta:
2737 2737 dp = self.deltaparent(rev)
2738 2738 if dp != nullrev:
2739 2739 cachedelta = (dp, bytes(self._chunk(rev)))
2740 2740
2741 2741 if not cachedelta:
2742 2742 rawtext = self.rawdata(rev)
2743 2743
2744 2744 ifh = destrevlog.opener(
2745 2745 destrevlog.indexfile, b'a+', checkambig=False
2746 2746 )
2747 2747 dfh = None
2748 2748 if not destrevlog._inline:
2749 2749 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2750 2750 try:
2751 2751 destrevlog._addrevision(
2752 2752 node,
2753 2753 rawtext,
2754 2754 tr,
2755 2755 linkrev,
2756 2756 p1,
2757 2757 p2,
2758 2758 flags,
2759 2759 cachedelta,
2760 2760 ifh,
2761 2761 dfh,
2762 2762 deltacomputer=deltacomputer,
2763 2763 )
2764 2764 finally:
2765 2765 if dfh:
2766 2766 dfh.close()
2767 2767 ifh.close()
2768 2768
2769 2769 if addrevisioncb:
2770 2770 addrevisioncb(self, rev, node)
2771 2771
2772 2772 def censorrevision(self, tr, censornode, tombstone=b''):
2773 2773 if (self.version & 0xFFFF) == REVLOGV0:
2774 2774 raise error.RevlogError(
2775 2775 _(b'cannot censor with version %d revlogs') % self.version
2776 2776 )
2777 2777
2778 2778 censorrev = self.rev(censornode)
2779 2779 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2780 2780
2781 2781 if len(tombstone) > self.rawsize(censorrev):
2782 2782 raise error.Abort(
2783 2783 _(b'censor tombstone must be no longer than censored data')
2784 2784 )
2785 2785
2786 2786 # Rewriting the revlog in place is hard. Our strategy for censoring is
2787 2787 # to create a new revlog, copy all revisions to it, then replace the
2788 2788 # revlogs on transaction close.
2789 2789
2790 2790 newindexfile = self.indexfile + b'.tmpcensored'
2791 2791 newdatafile = self.datafile + b'.tmpcensored'
2792 2792
2793 2793 # This is a bit dangerous. We could easily have a mismatch of state.
2794 2794 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2795 2795 newrl.version = self.version
2796 2796 newrl._generaldelta = self._generaldelta
2797 2797 newrl._io = self._io
2798 2798
2799 2799 for rev in self.revs():
2800 2800 node = self.node(rev)
2801 2801 p1, p2 = self.parents(node)
2802 2802
2803 2803 if rev == censorrev:
2804 2804 newrl.addrawrevision(
2805 2805 tombstone,
2806 2806 tr,
2807 2807 self.linkrev(censorrev),
2808 2808 p1,
2809 2809 p2,
2810 2810 censornode,
2811 2811 REVIDX_ISCENSORED,
2812 2812 )
2813 2813
2814 2814 if newrl.deltaparent(rev) != nullrev:
2815 2815 raise error.Abort(
2816 2816 _(
2817 2817 b'censored revision stored as delta; '
2818 2818 b'cannot censor'
2819 2819 ),
2820 2820 hint=_(
2821 2821 b'censoring of revlogs is not '
2822 2822 b'fully implemented; please report '
2823 2823 b'this bug'
2824 2824 ),
2825 2825 )
2826 2826 continue
2827 2827
2828 2828 if self.iscensored(rev):
2829 2829 if self.deltaparent(rev) != nullrev:
2830 2830 raise error.Abort(
2831 2831 _(
2832 2832 b'cannot censor due to censored '
2833 2833 b'revision having delta stored'
2834 2834 )
2835 2835 )
2836 2836 rawtext = self._chunk(rev)
2837 2837 else:
2838 2838 rawtext = self.rawdata(rev)
2839 2839
2840 2840 newrl.addrawrevision(
2841 2841 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2842 2842 )
2843 2843
2844 2844 tr.addbackup(self.indexfile, location=b'store')
2845 2845 if not self._inline:
2846 2846 tr.addbackup(self.datafile, location=b'store')
2847 2847
2848 2848 self.opener.rename(newrl.indexfile, self.indexfile)
2849 2849 if not self._inline:
2850 2850 self.opener.rename(newrl.datafile, self.datafile)
2851 2851
2852 2852 self.clearcaches()
2853 2853 self._loadindex()
2854 2854
2855 2855 def verifyintegrity(self, state):
2856 2856 """Verifies the integrity of the revlog.
2857 2857
2858 2858 Yields ``revlogproblem`` instances describing problems that are
2859 2859 found.
2860 2860 """
2861 2861 dd, di = self.checksize()
2862 2862 if dd:
2863 2863 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2864 2864 if di:
2865 2865 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2866 2866
2867 2867 version = self.version & 0xFFFF
2868 2868
2869 2869 # The verifier tells us what version revlog we should be.
2870 2870 if version != state[b'expectedversion']:
2871 2871 yield revlogproblem(
2872 2872 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2873 2873 % (self.indexfile, version, state[b'expectedversion'])
2874 2874 )
2875 2875
2876 2876 state[b'skipread'] = set()
2877 state[b'safe_renamed'] = set()
2877 2878
2878 2879 for rev in self:
2879 2880 node = self.node(rev)
2880 2881
2881 2882 # Verify contents. 4 cases to care about:
2882 2883 #
2883 2884 # common: the most common case
2884 2885 # rename: with a rename
2885 2886 # meta: file content starts with b'\1\n', the metadata
2886 2887 # header defined in filelog.py, but without a rename
2887 2888 # ext: content stored externally
2888 2889 #
2889 2890 # More formally, their differences are shown below:
2890 2891 #
2891 2892 # | common | rename | meta | ext
2892 2893 # -------------------------------------------------------
2893 2894 # flags() | 0 | 0 | 0 | not 0
2894 2895 # renamed() | False | True | False | ?
2895 2896 # rawtext[0:2]=='\1\n'| False | True | True | ?
2896 2897 #
2897 2898 # "rawtext" means the raw text stored in revlog data, which
2898 2899 # could be retrieved by "rawdata(rev)". "text"
2899 2900 # mentioned below is "revision(rev)".
2900 2901 #
2901 2902 # There are 3 different lengths stored physically:
2902 2903 # 1. L1: rawsize, stored in revlog index
2903 2904 # 2. L2: len(rawtext), stored in revlog data
2904 2905 # 3. L3: len(text), stored in revlog data if flags==0, or
2905 2906 # possibly somewhere else if flags!=0
2906 2907 #
2907 2908 # L1 should be equal to L2. L3 could be different from them.
2908 2909 # "text" may or may not affect commit hash depending on flag
2909 2910 # processors (see flagutil.addflagprocessor).
2910 2911 #
2911 2912 # | common | rename | meta | ext
2912 2913 # -------------------------------------------------
2913 2914 # rawsize() | L1 | L1 | L1 | L1
2914 2915 # size() | L1 | L2-LM | L1(*) | L1 (?)
2915 2916 # len(rawtext) | L2 | L2 | L2 | L2
2916 2917 # len(text) | L2 | L2 | L2 | L3
2917 2918 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2918 2919 #
2919 2920 # LM: length of metadata, depending on rawtext
2920 2921 # (*): not ideal, see comment in filelog.size
2921 2922 # (?): could be "- len(meta)" if the resolved content has
2922 2923 # rename metadata
2923 2924 #
2924 2925 # Checks needed to be done:
2925 2926 # 1. length check: L1 == L2, in all cases.
2926 2927 # 2. hash check: depending on flag processor, we may need to
2927 2928 # use either "text" (external), or "rawtext" (in revlog).
2928 2929
2929 2930 try:
2930 2931 skipflags = state.get(b'skipflags', 0)
2931 2932 if skipflags:
2932 2933 skipflags &= self.flags(rev)
2933 2934
2934 2935 _verify_revision(self, skipflags, state, node)
2935 2936
2936 2937 l1 = self.rawsize(rev)
2937 2938 l2 = len(self.rawdata(node))
2938 2939
2939 2940 if l1 != l2:
2940 2941 yield revlogproblem(
2941 2942 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2942 2943 node=node,
2943 2944 )
2944 2945
2945 2946 except error.CensoredNodeError:
2946 2947 if state[b'erroroncensored']:
2947 2948 yield revlogproblem(
2948 2949 error=_(b'censored file data'), node=node
2949 2950 )
2950 2951 state[b'skipread'].add(node)
2951 2952 except Exception as e:
2952 2953 yield revlogproblem(
2953 2954 error=_(b'unpacking %s: %s')
2954 2955 % (short(node), stringutil.forcebytestr(e)),
2955 2956 node=node,
2956 2957 )
2957 2958 state[b'skipread'].add(node)
2958 2959
2959 2960 def storageinfo(
2960 2961 self,
2961 2962 exclusivefiles=False,
2962 2963 sharedfiles=False,
2963 2964 revisionscount=False,
2964 2965 trackedsize=False,
2965 2966 storedsize=False,
2966 2967 ):
2967 2968 d = {}
2968 2969
2969 2970 if exclusivefiles:
2970 2971 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2971 2972 if not self._inline:
2972 2973 d[b'exclusivefiles'].append((self.opener, self.datafile))
2973 2974
2974 2975 if sharedfiles:
2975 2976 d[b'sharedfiles'] = []
2976 2977
2977 2978 if revisionscount:
2978 2979 d[b'revisionscount'] = len(self)
2979 2980
2980 2981 if trackedsize:
2981 2982 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2982 2983
2983 2984 if storedsize:
2984 2985 d[b'storedsize'] = sum(
2985 2986 self.opener.stat(path).st_size for path in self.files()
2986 2987 )
2987 2988
2988 2989 return d
@@ -1,626 +1,628 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 VERIFY_DEFAULT = 0
26 26 VERIFY_FULL = 1
27 27
28 28
29 29 def verify(repo, level=None):
30 30 with repo.lock():
31 31 v = verifier(repo, level)
32 32 return v.verify()
33 33
34 34
35 35 def _normpath(f):
36 36 # under hg < 2.4, convert didn't sanitize paths properly, so a
37 37 # converted repo may contain repeated slashes
38 38 while b'//' in f:
39 39 f = f.replace(b'//', b'/')
40 40 return f
41 41
42 42
43 43 class verifier(object):
44 44 def __init__(self, repo, level=None):
45 45 self.repo = repo.unfiltered()
46 46 self.ui = repo.ui
47 47 self.match = repo.narrowmatch()
48 48 if level is None:
49 49 level = VERIFY_DEFAULT
50 50 self._level = level
51 51 self.badrevs = set()
52 52 self.errors = 0
53 53 self.warnings = 0
54 54 self.havecl = len(repo.changelog) > 0
55 55 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
56 56 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
57 57 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
58 58 self.refersmf = False
59 59 self.fncachewarned = False
60 60 # developer config: verify.skipflags
61 61 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
62 62 self.warnorphanstorefiles = True
63 63
64 64 def _warn(self, msg):
65 65 """record a "warning" level issue"""
66 66 self.ui.warn(msg + b"\n")
67 67 self.warnings += 1
68 68
69 69 def _err(self, linkrev, msg, filename=None):
70 70 """record a "error" level issue"""
71 71 if linkrev is not None:
72 72 self.badrevs.add(linkrev)
73 73 linkrev = b"%d" % linkrev
74 74 else:
75 75 linkrev = b'?'
76 76 msg = b"%s: %s" % (linkrev, msg)
77 77 if filename:
78 78 msg = b"%s@%s" % (filename, msg)
79 79 self.ui.warn(b" " + msg + b"\n")
80 80 self.errors += 1
81 81
82 82 def _exc(self, linkrev, msg, inst, filename=None):
83 83 """record exception raised during the verify process"""
84 84 fmsg = pycompat.bytestr(inst)
85 85 if not fmsg:
86 86 fmsg = pycompat.byterepr(inst)
87 87 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
88 88
89 89 def _checkrevlog(self, obj, name, linkrev):
90 90 """verify high level property of a revlog
91 91
92 92 - revlog is present,
93 93 - revlog is non-empty,
94 94 - sizes (index and data) are correct,
95 95 - revlog's format version is correct.
96 96 """
97 97 if not len(obj) and (self.havecl or self.havemf):
98 98 self._err(linkrev, _(b"empty or missing %s") % name)
99 99 return
100 100
101 101 d = obj.checksize()
102 102 if d[0]:
103 103 self._err(None, _(b"data length off by %d bytes") % d[0], name)
104 104 if d[1]:
105 105 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
106 106
107 107 if obj.version != revlog.REVLOGV0:
108 108 if not self.revlogv1:
109 109 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
110 110 elif self.revlogv1:
111 111 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
112 112
113 113 def _checkentry(self, obj, i, node, seen, linkrevs, f):
114 114 """verify a single revlog entry
115 115
116 116 arguments are:
117 117 - obj: the source revlog
118 118 - i: the revision number
119 119 - node: the revision node id
120 120 - seen: nodes previously seen for this revlog
121 121 - linkrevs: [changelog-revisions] introducing "node"
122 122 - f: string label ("changelog", "manifest", or filename)
123 123
124 124 Performs the following checks:
125 125 - linkrev points to an existing changelog revision,
126 126 - linkrev points to a changelog revision that introduces this revision,
127 127 - linkrev points to the lowest of these changesets,
128 128 - both parents exist in the revlog,
129 129 - the revision is not duplicated.
130 130
131 131 Return the linkrev of the revision (or None for changelog's revisions).
132 132 """
133 133 lr = obj.linkrev(obj.rev(node))
134 134 if lr < 0 or (self.havecl and lr not in linkrevs):
135 135 if lr < 0 or lr >= len(self.repo.changelog):
136 136 msg = _(b"rev %d points to nonexistent changeset %d")
137 137 else:
138 138 msg = _(b"rev %d points to unexpected changeset %d")
139 139 self._err(None, msg % (i, lr), f)
140 140 if linkrevs:
141 141 if f and len(linkrevs) > 1:
142 142 try:
143 143 # attempt to filter down to real linkrevs
144 144 linkrevs = [
145 145 l
146 146 for l in linkrevs
147 147 if self.lrugetctx(l)[f].filenode() == node
148 148 ]
149 149 except Exception:
150 150 pass
151 151 self._warn(
152 152 _(b" (expected %s)")
153 153 % b" ".join(map(pycompat.bytestr, linkrevs))
154 154 )
155 155 lr = None # can't be trusted
156 156
157 157 try:
158 158 p1, p2 = obj.parents(node)
159 159 if p1 not in seen and p1 != nullid:
160 160 self._err(
161 161 lr,
162 162 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
163 163 f,
164 164 )
165 165 if p2 not in seen and p2 != nullid:
166 166 self._err(
167 167 lr,
168 168 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
169 169 f,
170 170 )
171 171 except Exception as inst:
172 172 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
173 173
174 174 if node in seen:
175 175 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
176 176 seen[node] = i
177 177 return lr
178 178
179 179 def verify(self):
180 180 """verify the content of the Mercurial repository
181 181
182 182 This method run all verifications, displaying issues as they are found.
183 183
184 184 return 1 if any error have been encountered, 0 otherwise."""
185 185 # initial validation and generic report
186 186 repo = self.repo
187 187 ui = repo.ui
188 188 if not repo.url().startswith(b'file:'):
189 189 raise error.Abort(_(b"cannot verify bundle or remote repos"))
190 190
191 191 if os.path.exists(repo.sjoin(b"journal")):
192 192 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
193 193
194 194 if ui.verbose or not self.revlogv1:
195 195 ui.status(
196 196 _(b"repository uses revlog format %d\n")
197 197 % (self.revlogv1 and 1 or 0)
198 198 )
199 199
200 200 # data verification
201 201 mflinkrevs, filelinkrevs = self._verifychangelog()
202 202 filenodes = self._verifymanifest(mflinkrevs)
203 203 del mflinkrevs
204 204 self._crosscheckfiles(filelinkrevs, filenodes)
205 205 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
206 206
207 207 # final report
208 208 ui.status(
209 209 _(b"checked %d changesets with %d changes to %d files\n")
210 210 % (len(repo.changelog), filerevisions, totalfiles)
211 211 )
212 212 if self.warnings:
213 213 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
214 214 if self.fncachewarned:
215 215 ui.warn(
216 216 _(
217 217 b'hint: run "hg debugrebuildfncache" to recover from '
218 218 b'corrupt fncache\n'
219 219 )
220 220 )
221 221 if self.errors:
222 222 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
223 223 if self.badrevs:
224 224 ui.warn(
225 225 _(b"(first damaged changeset appears to be %d)\n")
226 226 % min(self.badrevs)
227 227 )
228 228 return 1
229 229 return 0
230 230
231 231 def _verifychangelog(self):
232 232 """verify the changelog of a repository
233 233
234 234 The following checks are performed:
235 235 - all of `_checkrevlog` checks,
236 236 - all of `_checkentry` checks (for each revisions),
237 237 - each revision can be read.
238 238
239 239 The function returns some of the data observed in the changesets as a
240 240 (mflinkrevs, filelinkrevs) tuples:
241 241 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
242 242 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
243 243
244 244 If a matcher was specified, filelinkrevs will only contains matched
245 245 files.
246 246 """
247 247 ui = self.ui
248 248 repo = self.repo
249 249 match = self.match
250 250 cl = repo.changelog
251 251
252 252 ui.status(_(b"checking changesets\n"))
253 253 mflinkrevs = {}
254 254 filelinkrevs = {}
255 255 seen = {}
256 256 self._checkrevlog(cl, b"changelog", 0)
257 257 progress = ui.makeprogress(
258 258 _(b'checking'), unit=_(b'changesets'), total=len(repo)
259 259 )
260 260 for i in repo:
261 261 progress.update(i)
262 262 n = cl.node(i)
263 263 self._checkentry(cl, i, n, seen, [i], b"changelog")
264 264
265 265 try:
266 266 changes = cl.read(n)
267 267 if changes[0] != nullid:
268 268 mflinkrevs.setdefault(changes[0], []).append(i)
269 269 self.refersmf = True
270 270 for f in changes[3]:
271 271 if match(f):
272 272 filelinkrevs.setdefault(_normpath(f), []).append(i)
273 273 except Exception as inst:
274 274 self.refersmf = True
275 275 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
276 276 progress.complete()
277 277 return mflinkrevs, filelinkrevs
278 278
279 279 def _verifymanifest(
280 280 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
281 281 ):
282 282 """verify the manifestlog content
283 283
284 284 Inputs:
285 285 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
286 286 - dir: a subdirectory to check (for tree manifest repo)
287 287 - storefiles: set of currently "orphan" files.
288 288 - subdirprogress: a progress object
289 289
290 290 This function checks:
291 291 * all of `_checkrevlog` checks (for all manifest related revlogs)
292 292 * all of `_checkentry` checks (for all manifest related revisions)
293 293 * nodes for subdirectory exists in the sub-directory manifest
294 294 * each manifest entries have a file path
295 295 * each manifest node refered in mflinkrevs exist in the manifest log
296 296
297 297 If tree manifest is in use and a matchers is specified, only the
298 298 sub-directories matching it will be verified.
299 299
300 300 return a two level mapping:
301 301 {"path" -> { filenode -> changelog-revision}}
302 302
303 303 This mapping primarily contains entries for every files in the
304 304 repository. In addition, when tree-manifest is used, it also contains
305 305 sub-directory entries.
306 306
307 307 If a matcher is provided, only matching paths will be included.
308 308 """
309 309 repo = self.repo
310 310 ui = self.ui
311 311 match = self.match
312 312 mfl = self.repo.manifestlog
313 313 mf = mfl.getstorage(dir)
314 314
315 315 if not dir:
316 316 self.ui.status(_(b"checking manifests\n"))
317 317
318 318 filenodes = {}
319 319 subdirnodes = {}
320 320 seen = {}
321 321 label = b"manifest"
322 322 if dir:
323 323 label = dir
324 324 revlogfiles = mf.files()
325 325 storefiles.difference_update(revlogfiles)
326 326 if subdirprogress: # should be true since we're in a subdirectory
327 327 subdirprogress.increment()
328 328 if self.refersmf:
329 329 # Do not check manifest if there are only changelog entries with
330 330 # null manifests.
331 331 self._checkrevlog(mf, label, 0)
332 332 progress = ui.makeprogress(
333 333 _(b'checking'), unit=_(b'manifests'), total=len(mf)
334 334 )
335 335 for i in mf:
336 336 if not dir:
337 337 progress.update(i)
338 338 n = mf.node(i)
339 339 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
340 340 if n in mflinkrevs:
341 341 del mflinkrevs[n]
342 342 elif dir:
343 343 self._err(
344 344 lr,
345 345 _(b"%s not in parent-directory manifest") % short(n),
346 346 label,
347 347 )
348 348 else:
349 349 self._err(lr, _(b"%s not in changesets") % short(n), label)
350 350
351 351 try:
352 352 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
353 353 for f, fn, fl in mfdelta.iterentries():
354 354 if not f:
355 355 self._err(lr, _(b"entry without name in manifest"))
356 356 elif f == b"/dev/null": # ignore this in very old repos
357 357 continue
358 358 fullpath = dir + _normpath(f)
359 359 if fl == b't':
360 360 if not match.visitdir(fullpath):
361 361 continue
362 362 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
363 363 fn, []
364 364 ).append(lr)
365 365 else:
366 366 if not match(fullpath):
367 367 continue
368 368 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
369 369 except Exception as inst:
370 370 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
371 371 if self._level >= VERIFY_FULL:
372 372 try:
373 373 # Various issues can affect manifest. So we read each full
374 374 # text from storage. This triggers the checks from the core
375 375 # code (eg: hash verification, filename are ordered, etc.)
376 376 mfdelta = mfl.get(dir, n).read()
377 377 except Exception as inst:
378 378 self._exc(
379 379 lr,
380 380 _(b"reading full manifest %s") % short(n),
381 381 inst,
382 382 label,
383 383 )
384 384
385 385 if not dir:
386 386 progress.complete()
387 387
388 388 if self.havemf:
389 389 # since we delete entry in `mflinkrevs` during iteration, any
390 390 # remaining entries are "missing". We need to issue errors for them.
391 391 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
392 392 for c, m in sorted(changesetpairs):
393 393 if dir:
394 394 self._err(
395 395 c,
396 396 _(
397 397 b"parent-directory manifest refers to unknown"
398 398 b" revision %s"
399 399 )
400 400 % short(m),
401 401 label,
402 402 )
403 403 else:
404 404 self._err(
405 405 c,
406 406 _(b"changeset refers to unknown revision %s")
407 407 % short(m),
408 408 label,
409 409 )
410 410
411 411 if not dir and subdirnodes:
412 412 self.ui.status(_(b"checking directory manifests\n"))
413 413 storefiles = set()
414 414 subdirs = set()
415 415 revlogv1 = self.revlogv1
416 416 for f, f2, size in repo.store.datafiles():
417 417 if not f:
418 418 self._err(None, _(b"cannot decode filename '%s'") % f2)
419 419 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
420 420 storefiles.add(_normpath(f))
421 421 subdirs.add(os.path.dirname(f))
422 422 subdirprogress = ui.makeprogress(
423 423 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
424 424 )
425 425
426 426 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
427 427 subdirfilenodes = self._verifymanifest(
428 428 linkrevs, subdir, storefiles, subdirprogress
429 429 )
430 430 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
431 431 filenodes.setdefault(f, {}).update(onefilenodes)
432 432
433 433 if not dir and subdirnodes:
434 434 subdirprogress.complete()
435 435 if self.warnorphanstorefiles:
436 436 for f in sorted(storefiles):
437 437 self._warn(_(b"warning: orphan data file '%s'") % f)
438 438
439 439 return filenodes
440 440
441 441 def _crosscheckfiles(self, filelinkrevs, filenodes):
442 442 repo = self.repo
443 443 ui = self.ui
444 444 ui.status(_(b"crosschecking files in changesets and manifests\n"))
445 445
446 446 total = len(filelinkrevs) + len(filenodes)
447 447 progress = ui.makeprogress(
448 448 _(b'crosschecking'), unit=_(b'files'), total=total
449 449 )
450 450 if self.havemf:
451 451 for f in sorted(filelinkrevs):
452 452 progress.increment()
453 453 if f not in filenodes:
454 454 lr = filelinkrevs[f][0]
455 455 self._err(lr, _(b"in changeset but not in manifest"), f)
456 456
457 457 if self.havecl:
458 458 for f in sorted(filenodes):
459 459 progress.increment()
460 460 if f not in filelinkrevs:
461 461 try:
462 462 fl = repo.file(f)
463 463 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
464 464 except Exception:
465 465 lr = None
466 466 self._err(lr, _(b"in manifest but not in changeset"), f)
467 467
468 468 progress.complete()
469 469
470 470 def _verifyfiles(self, filenodes, filelinkrevs):
471 471 repo = self.repo
472 472 ui = self.ui
473 473 lrugetctx = self.lrugetctx
474 474 revlogv1 = self.revlogv1
475 475 havemf = self.havemf
476 476 ui.status(_(b"checking files\n"))
477 477
478 478 storefiles = set()
479 479 for f, f2, size in repo.store.datafiles():
480 480 if not f:
481 481 self._err(None, _(b"cannot decode filename '%s'") % f2)
482 482 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
483 483 storefiles.add(_normpath(f))
484 484
485 485 state = {
486 486 # TODO this assumes revlog storage for changelog.
487 487 b'expectedversion': self.repo.changelog.version & 0xFFFF,
488 488 b'skipflags': self.skipflags,
489 489 # experimental config: censor.policy
490 490 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
491 491 }
492 492
493 493 files = sorted(set(filenodes) | set(filelinkrevs))
494 494 revisions = 0
495 495 progress = ui.makeprogress(
496 496 _(b'checking'), unit=_(b'files'), total=len(files)
497 497 )
498 498 for i, f in enumerate(files):
499 499 progress.update(i, item=f)
500 500 try:
501 501 linkrevs = filelinkrevs[f]
502 502 except KeyError:
503 503 # in manifest but not in changelog
504 504 linkrevs = []
505 505
506 506 if linkrevs:
507 507 lr = linkrevs[0]
508 508 else:
509 509 lr = None
510 510
511 511 try:
512 512 fl = repo.file(f)
513 513 except error.StorageError as e:
514 514 self._err(lr, _(b"broken revlog! (%s)") % e, f)
515 515 continue
516 516
517 517 for ff in fl.files():
518 518 try:
519 519 storefiles.remove(ff)
520 520 except KeyError:
521 521 if self.warnorphanstorefiles:
522 522 self._warn(
523 523 _(b" warning: revlog '%s' not in fncache!") % ff
524 524 )
525 525 self.fncachewarned = True
526 526
527 527 if not len(fl) and (self.havecl or self.havemf):
528 528 self._err(lr, _(b"empty or missing %s") % f)
529 529 else:
530 530 # Guard against implementations not setting this.
531 531 state[b'skipread'] = set()
532 state[b'safe_renamed'] = set()
533
532 534 for problem in fl.verifyintegrity(state):
533 535 if problem.node is not None:
534 536 linkrev = fl.linkrev(fl.rev(problem.node))
535 537 else:
536 538 linkrev = None
537 539
538 540 if problem.warning:
539 541 self._warn(problem.warning)
540 542 elif problem.error:
541 543 self._err(
542 544 linkrev if linkrev is not None else lr,
543 545 problem.error,
544 546 f,
545 547 )
546 548 else:
547 549 raise error.ProgrammingError(
548 550 b'problem instance does not set warning or error '
549 551 b'attribute: %s' % problem.msg
550 552 )
551 553
552 554 seen = {}
553 555 for i in fl:
554 556 revisions += 1
555 557 n = fl.node(i)
556 558 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
557 559 if f in filenodes:
558 560 if havemf and n not in filenodes[f]:
559 561 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
560 562 else:
561 563 del filenodes[f][n]
562 564
563 if n in state[b'skipread']:
565 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
564 566 continue
565 567
566 568 # check renames
567 569 try:
568 570 # This requires resolving fulltext (at least on revlogs,
569 571 # though not with LFS revisions). We may want
570 572 # ``verifyintegrity()`` to pass a set of nodes with
571 573 # rename metadata as an optimization.
572 574 rp = fl.renamed(n)
573 575 if rp:
574 576 if lr is not None and ui.verbose:
575 577 ctx = lrugetctx(lr)
576 578 if not any(rp[0] in pctx for pctx in ctx.parents()):
577 579 self._warn(
578 580 _(
579 581 b"warning: copy source of '%s' not"
580 582 b" in parents of %s"
581 583 )
582 584 % (f, ctx)
583 585 )
584 586 fl2 = repo.file(rp[0])
585 587 if not len(fl2):
586 588 self._err(
587 589 lr,
588 590 _(
589 591 b"empty or missing copy source revlog "
590 592 b"%s:%s"
591 593 )
592 594 % (rp[0], short(rp[1])),
593 595 f,
594 596 )
595 597 elif rp[1] == nullid:
596 598 ui.note(
597 599 _(
598 600 b"warning: %s@%s: copy source"
599 601 b" revision is nullid %s:%s\n"
600 602 )
601 603 % (f, lr, rp[0], short(rp[1]))
602 604 )
603 605 else:
604 606 fl2.rev(rp[1])
605 607 except Exception as inst:
606 608 self._exc(
607 609 lr, _(b"checking rename of %s") % short(n), inst, f
608 610 )
609 611
610 612 # cross-check
611 613 if f in filenodes:
612 614 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
613 615 for lr, node in sorted(fns):
614 616 self._err(
615 617 lr,
616 618 _(b"manifest refers to unknown revision %s")
617 619 % short(node),
618 620 f,
619 621 )
620 622 progress.complete()
621 623
622 624 if self.warnorphanstorefiles:
623 625 for f in sorted(storefiles):
624 626 self._warn(_(b"warning: orphan data file '%s'") % f)
625 627
626 628 return len(files), revisions
@@ -1,1192 +1,1231 b''
1 1 #require no-reposimplestore no-chg
2 2
3 3 $ hg init requirements
4 4 $ cd requirements
5 5
6 6 # LFS not loaded by default.
7 7
8 8 $ hg config extensions
9 9 [1]
10 10
11 11 # Adding lfs to requires file will auto-load lfs extension.
12 12
13 13 $ echo lfs >> .hg/requires
14 14 $ hg config extensions
15 15 extensions.lfs=
16 16
17 17 # But only if there is no config entry for the extension already.
18 18
19 19 $ cat > .hg/hgrc << EOF
20 20 > [extensions]
21 21 > lfs=!
22 22 > EOF
23 23
24 24 $ hg config extensions
25 25 abort: repository requires features unknown to this Mercurial: lfs!
26 26 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
27 27 [255]
28 28
29 29 $ cat > .hg/hgrc << EOF
30 30 > [extensions]
31 31 > lfs=
32 32 > EOF
33 33
34 34 $ hg config extensions
35 35 extensions.lfs=
36 36
37 37 $ cat > .hg/hgrc << EOF
38 38 > [extensions]
39 39 > lfs = missing.py
40 40 > EOF
41 41
42 42 $ hg config extensions
43 43 \*\*\* failed to import extension lfs from missing.py: [Errno *] $ENOENT$: 'missing.py' (glob)
44 44 abort: repository requires features unknown to this Mercurial: lfs!
45 45 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
46 46 [255]
47 47
48 48 $ cd ..
49 49
50 50 # Initial setup
51 51
52 52 $ cat >> $HGRCPATH << EOF
53 53 > [extensions]
54 54 > lfs=
55 55 > [lfs]
56 56 > # Test deprecated config
57 57 > threshold=1000B
58 58 > EOF
59 59
60 60 $ LONG=AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
61 61
62 62 # Prepare server and enable extension
63 63 $ hg init server
64 64 $ hg clone -q server client
65 65 $ cd client
66 66
67 67 # Commit small file
68 68 $ echo s > smallfile
69 69 $ echo '**.py = LF' > .hgeol
70 70 $ hg --config lfs.track='"size(\">1000B\")"' commit -Aqm "add small file"
71 71 hg: parse error: unsupported file pattern: size(">1000B")
72 72 (paths must be prefixed with "path:")
73 73 [255]
74 74 $ hg --config lfs.track='size(">1000B")' commit -Aqm "add small file"
75 75
76 76 # Commit large file
77 77 $ echo $LONG > largefile
78 78 $ grep lfs .hg/requires
79 79 [1]
80 80 $ hg commit --traceback -Aqm "add large file"
81 81 $ grep lfs .hg/requires
82 82 lfs
83 83
84 84 # Ensure metadata is stored
85 85 $ hg debugdata largefile 0
86 86 version https://git-lfs.github.com/spec/v1
87 87 oid sha256:f11e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
88 88 size 1501
89 89 x-is-binary 0
90 90
91 91 # Check the blobstore is populated
92 92 $ find .hg/store/lfs/objects | sort
93 93 .hg/store/lfs/objects
94 94 .hg/store/lfs/objects/f1
95 95 .hg/store/lfs/objects/f1/1e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
96 96
97 97 # Check the blob stored contains the actual contents of the file
98 98 $ cat .hg/store/lfs/objects/f1/1e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
99 99 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
100 100
101 101 # Push changes to the server
102 102
103 103 $ hg push
104 104 pushing to $TESTTMP/server
105 105 searching for changes
106 106 abort: lfs.url needs to be configured
107 107 [255]
108 108
109 109 $ cat >> $HGRCPATH << EOF
110 110 > [lfs]
111 111 > url=file:$TESTTMP/dummy-remote/
112 112 > EOF
113 113
114 114 Push to a local non-lfs repo with the extension enabled will add the
115 115 lfs requirement
116 116
117 117 $ grep lfs $TESTTMP/server/.hg/requires
118 118 [1]
119 119 $ hg push -v | egrep -v '^(uncompressed| )'
120 120 pushing to $TESTTMP/server
121 121 searching for changes
122 122 lfs: found f11e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b in the local lfs store
123 123 2 changesets found
124 124 adding changesets
125 125 adding manifests
126 126 adding file changes
127 127 calling hook pretxnchangegroup.lfs: hgext.lfs.checkrequireslfs
128 128 added 2 changesets with 3 changes to 3 files
129 129 $ grep lfs $TESTTMP/server/.hg/requires
130 130 lfs
131 131
132 132 # Unknown URL scheme
133 133
134 134 $ hg push --config lfs.url=ftp://foobar
135 135 abort: lfs: unknown url scheme: ftp
136 136 [255]
137 137
138 138 $ cd ../
139 139
140 140 # Initialize new client (not cloning) and setup extension
141 141 $ hg init client2
142 142 $ cd client2
143 143 $ cat >> .hg/hgrc <<EOF
144 144 > [paths]
145 145 > default = $TESTTMP/server
146 146 > EOF
147 147
148 148 # Pull from server
149 149
150 150 Pulling a local lfs repo into a local non-lfs repo with the extension
151 151 enabled adds the lfs requirement
152 152
153 153 $ grep lfs .hg/requires $TESTTMP/server/.hg/requires
154 154 $TESTTMP/server/.hg/requires:lfs
155 155 $ hg pull default
156 156 pulling from $TESTTMP/server
157 157 requesting all changes
158 158 adding changesets
159 159 adding manifests
160 160 adding file changes
161 161 added 2 changesets with 3 changes to 3 files
162 162 new changesets 0ead593177f7:b88141481348
163 163 (run 'hg update' to get a working copy)
164 164 $ grep lfs .hg/requires $TESTTMP/server/.hg/requires
165 165 .hg/requires:lfs
166 166 $TESTTMP/server/.hg/requires:lfs
167 167
168 168 # Check the blobstore is not yet populated
169 169 $ [ -d .hg/store/lfs/objects ]
170 170 [1]
171 171
172 172 # Update to the last revision containing the large file
173 173 $ hg update
174 174 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
175 175
176 176 # Check the blobstore has been populated on update
177 177 $ find .hg/store/lfs/objects | sort
178 178 .hg/store/lfs/objects
179 179 .hg/store/lfs/objects/f1
180 180 .hg/store/lfs/objects/f1/1e77c257047a398492d8d6cb9f6acf3aa7c4384bb23080b43546053e183e4b
181 181
182 182 # Check the contents of the file are fetched from blobstore when requested
183 183 $ hg cat -r . largefile
184 184 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
185 185
186 186 # Check the file has been copied in the working copy
187 187 $ cat largefile
188 188 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC
189 189
190 190 $ cd ..
191 191
192 192 # Check rename, and switch between large and small files
193 193
194 194 $ hg init repo3
195 195 $ cd repo3
196 196 $ cat >> .hg/hgrc << EOF
197 197 > [lfs]
198 198 > track=size(">10B")
199 199 > EOF
200 200
201 201 $ echo LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS > large
202 202 $ echo SHORTER > small
203 203 $ hg add . -q
204 204 $ hg commit -m 'commit with lfs content'
205 205
206 206 $ hg files -r . 'set:added()'
207 207 large
208 208 small
209 209 $ hg files -r . 'set:added() & lfs()'
210 210 large
211 211
212 212 $ hg mv large l
213 213 $ hg mv small s
214 214 $ hg status 'set:removed()'
215 215 R large
216 216 R small
217 217 $ hg status 'set:removed() & lfs()'
218 218 R large
219 219 $ hg commit -m 'renames'
220 220
221 $ hg cat -r . l -T '{rawdata}\n'
222 version https://git-lfs.github.com/spec/v1
223 oid sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
224 size 39
225 x-hg-copy large
226 x-hg-copyrev 2c531e0992ff3107c511b53cb82a91b6436de8b2
227 x-is-binary 0
228
229
221 230 $ hg files -r . 'set:copied()'
222 231 l
223 232 s
224 233 $ hg files -r . 'set:copied() & lfs()'
225 234 l
226 235 $ hg status --change . 'set:removed()'
227 236 R large
228 237 R small
229 238 $ hg status --change . 'set:removed() & lfs()'
230 239 R large
231 240
232 241 $ echo SHORT > l
233 242 $ echo BECOME-LARGER-FROM-SHORTER > s
234 243 $ hg commit -m 'large to small, small to large'
235 244
236 245 $ echo 1 >> l
237 246 $ echo 2 >> s
238 247 $ hg commit -m 'random modifications'
239 248
240 249 $ echo RESTORE-TO-BE-LARGE > l
241 250 $ echo SHORTER > s
242 251 $ hg commit -m 'switch large and small again'
243 252
244 253 # Test lfs_files template
245 254
246 255 $ hg log -r 'all()' -T '{rev} {join(lfs_files, ", ")}\n'
247 256 0 large
248 257 1 l, large
249 258 2 s
250 259 3 s
251 260 4 l
252 261
253 262 # Push and pull the above repo
254 263
255 264 $ hg --cwd .. init repo4
256 265 $ hg push ../repo4
257 266 pushing to ../repo4
258 267 searching for changes
259 268 adding changesets
260 269 adding manifests
261 270 adding file changes
262 271 added 5 changesets with 10 changes to 4 files
263 272
264 273 $ hg --cwd .. init repo5
265 274 $ hg --cwd ../repo5 pull ../repo3
266 275 pulling from ../repo3
267 276 requesting all changes
268 277 adding changesets
269 278 adding manifests
270 279 adding file changes
271 280 added 5 changesets with 10 changes to 4 files
272 281 new changesets fd47a419c4f7:5adf850972b9
273 282 (run 'hg update' to get a working copy)
274 283
275 284 $ cd ..
276 285
277 286 # Test clone
278 287
279 288 $ hg init repo6
280 289 $ cd repo6
281 290 $ cat >> .hg/hgrc << EOF
282 291 > [lfs]
283 292 > track=size(">30B")
284 293 > EOF
285 294
286 295 $ echo LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES > large
287 296 $ echo SMALL > small
288 297 $ hg commit -Aqm 'create a lfs file' large small
289 298 $ hg debuglfsupload -r 'all()' -v
290 299 lfs: found 8e92251415339ae9b148c8da89ed5ec665905166a1ab11b09dca8fad83344738 in the local lfs store
291 300
292 301 $ cd ..
293 302
294 303 $ hg clone repo6 repo7
295 304 updating to branch default
296 305 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
297 306 $ cd repo7
298 307 $ cat large
299 308 LARGE-BECAUSE-IT-IS-MORE-THAN-30-BYTES
300 309 $ cat small
301 310 SMALL
302 311
303 312 $ cd ..
304 313
305 314 $ hg --config extensions.share= share repo7 sharedrepo
306 315 updating working directory
307 316 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
308 317 $ grep lfs sharedrepo/.hg/requires
309 318 lfs
310 319
311 320 # Test rename and status
312 321
313 322 $ hg init repo8
314 323 $ cd repo8
315 324 $ cat >> .hg/hgrc << EOF
316 325 > [lfs]
317 326 > track=size(">10B")
318 327 > EOF
319 328
320 329 $ echo THIS-IS-LFS-BECAUSE-10-BYTES > a1
321 330 $ echo SMALL > a2
322 331 $ hg commit -m a -A a1 a2
323 332 $ hg status
324 333 $ hg mv a1 b1
325 334 $ hg mv a2 a1
326 335 $ hg mv b1 a2
327 336 $ hg commit -m b
328 337 $ hg status
329 338 >>> with open('a2', 'wb') as f:
330 339 ... f.write(b'\1\nSTART-WITH-HG-FILELOG-METADATA') and None
331 340 >>> with open('a1', 'wb') as f:
332 341 ... f.write(b'\1\nMETA\n') and None
333 342 $ hg commit -m meta
334 343 $ hg status
335 344 $ hg log -T '{rev}: {file_copies} | {file_dels} | {file_adds}\n'
336 345 2: | |
337 346 1: a1 (a2)a2 (a1) | |
338 347 0: | | a1 a2
339 348
340 349 $ for n in a1 a2; do
341 350 > for r in 0 1 2; do
342 351 > printf '\n%s @ %s\n' $n $r
343 352 > hg debugdata $n $r
344 353 > done
345 354 > done
346 355
347 356 a1 @ 0
348 357 version https://git-lfs.github.com/spec/v1
349 358 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
350 359 size 29
351 360 x-is-binary 0
352 361
353 362 a1 @ 1
354 363 \x01 (esc)
355 364 copy: a2
356 365 copyrev: 50470ad23cf937b1f4b9f80bfe54df38e65b50d9
357 366 \x01 (esc)
358 367 SMALL
359 368
360 369 a1 @ 2
361 370 \x01 (esc)
362 371 \x01 (esc)
363 372 \x01 (esc)
364 373 META
365 374
366 375 a2 @ 0
367 376 SMALL
368 377
369 378 a2 @ 1
370 379 version https://git-lfs.github.com/spec/v1
371 380 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
372 381 size 29
373 382 x-hg-copy a1
374 383 x-hg-copyrev be23af27908a582af43e5cda209a5a9b319de8d4
375 384 x-is-binary 0
376 385
377 386 a2 @ 2
378 387 version https://git-lfs.github.com/spec/v1
379 388 oid sha256:876dadc86a8542f9798048f2c47f51dbf8e4359aed883e8ec80c5db825f0d943
380 389 size 32
381 390 x-is-binary 0
382 391
383 392 # Verify commit hashes include rename metadata
384 393
385 394 $ hg log -T '{rev}:{node|short} {desc}\n'
386 395 2:0fae949de7fa meta
387 396 1:9cd6bdffdac0 b
388 397 0:7f96794915f7 a
389 398
390 399 $ cd ..
391 400
392 401 # Test bundle
393 402
394 403 $ hg init repo9
395 404 $ cd repo9
396 405 $ cat >> .hg/hgrc << EOF
397 406 > [lfs]
398 407 > track=size(">10B")
399 408 > [diff]
400 409 > git=1
401 410 > EOF
402 411
403 412 $ for i in 0 single two three 4; do
404 413 > echo 'THIS-IS-LFS-'$i > a
405 414 > hg commit -m a-$i -A a
406 415 > done
407 416
408 417 $ hg update 2 -q
409 418 $ echo 'THIS-IS-LFS-2-CHILD' > a
410 419 $ hg commit -m branching -q
411 420
412 421 $ hg bundle --base 1 bundle.hg -v
413 422 lfs: found 5ab7a3739a5feec94a562d070a14f36dba7cad17e5484a4a89eea8e5f3166888 in the local lfs store
414 423 lfs: found a9c7d1cd6ce2b9bbdf46ed9a862845228717b921c089d0d42e3bcaed29eb612e in the local lfs store
415 424 lfs: found f693890c49c409ec33673b71e53f297681f76c1166daf33b2ad7ebf8b1d3237e in the local lfs store
416 425 lfs: found fda198fea753eb66a252e9856915e1f5cddbe41723bd4b695ece2604ad3c9f75 in the local lfs store
417 426 4 changesets found
418 427 uncompressed size of bundle content:
419 428 * (changelog) (glob)
420 429 * (manifests) (glob)
421 430 * a (glob)
422 431 $ hg --config extensions.strip= strip -r 2 --no-backup --force -q
423 432 $ hg -R bundle.hg log -p -T '{rev} {desc}\n' a
424 433 5 branching
425 434 diff --git a/a b/a
426 435 --- a/a
427 436 +++ b/a
428 437 @@ -1,1 +1,1 @@
429 438 -THIS-IS-LFS-two
430 439 +THIS-IS-LFS-2-CHILD
431 440
432 441 4 a-4
433 442 diff --git a/a b/a
434 443 --- a/a
435 444 +++ b/a
436 445 @@ -1,1 +1,1 @@
437 446 -THIS-IS-LFS-three
438 447 +THIS-IS-LFS-4
439 448
440 449 3 a-three
441 450 diff --git a/a b/a
442 451 --- a/a
443 452 +++ b/a
444 453 @@ -1,1 +1,1 @@
445 454 -THIS-IS-LFS-two
446 455 +THIS-IS-LFS-three
447 456
448 457 2 a-two
449 458 diff --git a/a b/a
450 459 --- a/a
451 460 +++ b/a
452 461 @@ -1,1 +1,1 @@
453 462 -THIS-IS-LFS-single
454 463 +THIS-IS-LFS-two
455 464
456 465 1 a-single
457 466 diff --git a/a b/a
458 467 --- a/a
459 468 +++ b/a
460 469 @@ -1,1 +1,1 @@
461 470 -THIS-IS-LFS-0
462 471 +THIS-IS-LFS-single
463 472
464 473 0 a-0
465 474 diff --git a/a b/a
466 475 new file mode 100644
467 476 --- /dev/null
468 477 +++ b/a
469 478 @@ -0,0 +1,1 @@
470 479 +THIS-IS-LFS-0
471 480
472 481 $ hg bundle -R bundle.hg --base 1 bundle-again.hg -q
473 482 $ hg -R bundle-again.hg log -p -T '{rev} {desc}\n' a
474 483 5 branching
475 484 diff --git a/a b/a
476 485 --- a/a
477 486 +++ b/a
478 487 @@ -1,1 +1,1 @@
479 488 -THIS-IS-LFS-two
480 489 +THIS-IS-LFS-2-CHILD
481 490
482 491 4 a-4
483 492 diff --git a/a b/a
484 493 --- a/a
485 494 +++ b/a
486 495 @@ -1,1 +1,1 @@
487 496 -THIS-IS-LFS-three
488 497 +THIS-IS-LFS-4
489 498
490 499 3 a-three
491 500 diff --git a/a b/a
492 501 --- a/a
493 502 +++ b/a
494 503 @@ -1,1 +1,1 @@
495 504 -THIS-IS-LFS-two
496 505 +THIS-IS-LFS-three
497 506
498 507 2 a-two
499 508 diff --git a/a b/a
500 509 --- a/a
501 510 +++ b/a
502 511 @@ -1,1 +1,1 @@
503 512 -THIS-IS-LFS-single
504 513 +THIS-IS-LFS-two
505 514
506 515 1 a-single
507 516 diff --git a/a b/a
508 517 --- a/a
509 518 +++ b/a
510 519 @@ -1,1 +1,1 @@
511 520 -THIS-IS-LFS-0
512 521 +THIS-IS-LFS-single
513 522
514 523 0 a-0
515 524 diff --git a/a b/a
516 525 new file mode 100644
517 526 --- /dev/null
518 527 +++ b/a
519 528 @@ -0,0 +1,1 @@
520 529 +THIS-IS-LFS-0
521 530
522 531 $ cd ..
523 532
524 533 # Test isbinary
525 534
526 535 $ hg init repo10
527 536 $ cd repo10
528 537 $ cat >> .hg/hgrc << EOF
529 538 > [extensions]
530 539 > lfs=
531 540 > [lfs]
532 541 > track=all()
533 542 > EOF
534 543 $ "$PYTHON" <<'EOF'
535 544 > def write(path, content):
536 545 > with open(path, 'wb') as f:
537 546 > f.write(content)
538 547 > write('a', b'\0\0')
539 548 > write('b', b'\1\n')
540 549 > write('c', b'\1\n\0')
541 550 > write('d', b'xx')
542 551 > EOF
543 552 $ hg add a b c d
544 553 $ hg diff --stat
545 554 a | Bin
546 555 b | 1 +
547 556 c | Bin
548 557 d | 1 +
549 558 4 files changed, 2 insertions(+), 0 deletions(-)
550 559 $ hg commit -m binarytest
551 560 $ cat > $TESTTMP/dumpbinary.py << EOF
552 561 > from mercurial.utils import (
553 562 > stringutil,
554 563 > )
555 564 > def reposetup(ui, repo):
556 565 > for n in (b'a', b'b', b'c', b'd'):
557 566 > ui.write((b'%s: binary=%s\n')
558 567 > % (n, stringutil.pprint(repo[b'.'][n].isbinary())))
559 568 > EOF
560 569 $ hg --config extensions.dumpbinary=$TESTTMP/dumpbinary.py id --trace
561 570 a: binary=True
562 571 b: binary=False
563 572 c: binary=True
564 573 d: binary=False
565 574 b55353847f02 tip
566 575
567 576 Binary blobs don't need to be present to be skipped in filesets. (And their
568 577 absence doesn't cause an abort.)
569 578
570 579 $ rm .hg/store/lfs/objects/96/a296d224f285c67bee93c30f8a309157f0daa35dc5b87e410b78630a09cfc7
571 580 $ rm .hg/store/lfs/objects/92/f76135a4baf4faccb8586a60faf830c2bdfce147cefa188aaf4b790bd01b7e
572 581
573 582 $ hg files --debug -r . 'set:eol("unix")' --config 'experimental.lfs.disableusercache=True'
574 583 lfs: found c04b5bb1a5b2eb3e9cd4805420dba5a9d133da5b7adeeafb5474c4adae9faa80 in the local lfs store
575 584 2 b
576 585 lfs: found 5dde896887f6754c9b15bfe3a441ae4806df2fde94001311e08bf110622e0bbe in the local lfs store
577 586
578 587 $ hg files --debug -r . 'set:binary()' --config 'experimental.lfs.disableusercache=True'
579 588 2 a
580 589 3 c
581 590
582 591 $ cd ..
583 592
584 593 # Test fctx.cmp fastpath - diff without LFS blobs
585 594
586 595 $ hg init repo12
587 596 $ cd repo12
588 597 $ cat >> .hg/hgrc <<EOF
589 598 > [lfs]
590 599 > threshold=1
591 600 > EOF
592 601 $ cat > ../patch.diff <<EOF
593 602 > # HG changeset patch
594 603 > 2
595 604 >
596 605 > diff --git a/a b/a
597 606 > old mode 100644
598 607 > new mode 100755
599 608 > EOF
600 609
601 610 $ for i in 1 2 3; do
602 611 > cp ../repo10/a a
603 612 > if [ $i = 3 ]; then
604 613 > # make a content-only change
605 614 > hg import -q --bypass ../patch.diff
606 615 > hg update -q
607 616 > rm ../patch.diff
608 617 > else
609 618 > echo $i >> a
610 619 > hg commit -m $i -A a
611 620 > fi
612 621 > done
613 622 $ [ -d .hg/store/lfs/objects ]
614 623
615 624 $ cd ..
616 625
617 626 $ hg clone repo12 repo13 --noupdate
618 627 $ cd repo13
619 628 $ hg log --removed -p a -T '{desc}\n' --config diff.nobinary=1 --git
620 629 2
621 630 diff --git a/a b/a
622 631 old mode 100644
623 632 new mode 100755
624 633
625 634 2
626 635 diff --git a/a b/a
627 636 Binary file a has changed
628 637
629 638 1
630 639 diff --git a/a b/a
631 640 new file mode 100644
632 641 Binary file a has changed
633 642
634 643 $ [ -d .hg/store/lfs/objects ]
635 644 [1]
636 645
637 646 $ cd ..
638 647
639 648 # Test filter
640 649
641 650 $ hg init repo11
642 651 $ cd repo11
643 652 $ cat >> .hg/hgrc << EOF
644 653 > [lfs]
645 654 > track=(**.a & size(">5B")) | (**.b & !size(">5B"))
646 655 > | (**.c & "path:d" & !"path:d/c.c") | size(">10B")
647 656 > EOF
648 657
649 658 $ mkdir a
650 659 $ echo aaaaaa > a/1.a
651 660 $ echo a > a/2.a
652 661 $ echo aaaaaa > 1.b
653 662 $ echo a > 2.b
654 663 $ echo a > 1.c
655 664 $ mkdir d
656 665 $ echo a > d/c.c
657 666 $ echo a > d/d.c
658 667 $ echo aaaaaaaaaaaa > x
659 668 $ hg add . -q
660 669 $ hg commit -m files
661 670
662 671 $ for p in a/1.a a/2.a 1.b 2.b 1.c d/c.c d/d.c x; do
663 672 > if hg debugdata $p 0 2>&1 | grep git-lfs >/dev/null; then
664 673 > echo "${p}: is lfs"
665 674 > else
666 675 > echo "${p}: not lfs"
667 676 > fi
668 677 > done
669 678 a/1.a: is lfs
670 679 a/2.a: not lfs
671 680 1.b: not lfs
672 681 2.b: is lfs
673 682 1.c: not lfs
674 683 d/c.c: not lfs
675 684 d/d.c: is lfs
676 685 x: is lfs
677 686
678 687 $ cd ..
679 688
680 689 # Verify the repos
681 690
682 691 $ cat > $TESTTMP/dumpflog.py << EOF
683 692 > # print raw revision sizes, flags, and hashes for certain files
684 693 > import hashlib
685 694 > from mercurial.node import short
686 695 > from mercurial import (
687 696 > pycompat,
688 697 > revlog,
689 698 > )
690 699 > from mercurial.utils import (
691 700 > stringutil,
692 701 > )
693 702 > def hash(rawtext):
694 703 > h = hashlib.sha512()
695 704 > h.update(rawtext)
696 705 > return pycompat.sysbytes(h.hexdigest()[:4])
697 706 > def reposetup(ui, repo):
698 707 > # these 2 files are interesting
699 708 > for name in [b'l', b's']:
700 709 > fl = repo.file(name)
701 710 > if len(fl) == 0:
702 711 > continue
703 712 > sizes = [fl._revlog.rawsize(i) for i in fl]
704 713 > texts = [fl.rawdata(i) for i in fl]
705 714 > flags = [int(fl._revlog.flags(i)) for i in fl]
706 715 > hashes = [hash(t) for t in texts]
707 716 > pycompat.stdout.write(b' %s: rawsizes=%r flags=%r hashes=%s\n'
708 717 > % (name, sizes, flags, stringutil.pprint(hashes)))
709 718 > EOF
710 719
711 720 $ for i in client client2 server repo3 repo4 repo5 repo6 repo7 repo8 repo9 \
712 721 > repo10; do
713 722 > echo 'repo:' $i
714 723 > hg --cwd $i verify --config extensions.dumpflog=$TESTTMP/dumpflog.py -q
715 724 > done
716 725 repo: client
717 726 repo: client2
718 727 repo: server
719 728 repo: repo3
720 729 l: rawsizes=[211, 6, 8, 141] flags=[8192, 0, 0, 8192] hashes=['d2b8', '948c', 'cc88', '724d']
721 730 s: rawsizes=[74, 141, 141, 8] flags=[0, 8192, 8192, 0] hashes=['3c80', 'fce0', '874a', '826b']
722 731 repo: repo4
723 732 l: rawsizes=[211, 6, 8, 141] flags=[8192, 0, 0, 8192] hashes=['d2b8', '948c', 'cc88', '724d']
724 733 s: rawsizes=[74, 141, 141, 8] flags=[0, 8192, 8192, 0] hashes=['3c80', 'fce0', '874a', '826b']
725 734 repo: repo5
726 735 l: rawsizes=[211, 6, 8, 141] flags=[8192, 0, 0, 8192] hashes=['d2b8', '948c', 'cc88', '724d']
727 736 s: rawsizes=[74, 141, 141, 8] flags=[0, 8192, 8192, 0] hashes=['3c80', 'fce0', '874a', '826b']
728 737 repo: repo6
729 738 repo: repo7
730 739 repo: repo8
731 740 repo: repo9
732 741 repo: repo10
733 742
734 743 repo13 doesn't have any cached lfs files and its source never pushed its
735 744 files. Therefore, the files don't exist in the remote store. Use the files in
736 745 the user cache.
737 746
738 747 $ test -d $TESTTMP/repo13/.hg/store/lfs/objects
739 748 [1]
740 749
741 750 $ hg --config extensions.share= share repo13 repo14
742 751 updating working directory
743 752 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
744 753 $ hg -R repo14 -q verify
745 754
746 755 $ hg clone repo13 repo15
747 756 updating to branch default
748 757 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
749 758 $ hg -R repo15 -q verify
750 759
751 760 If the source repo doesn't have the blob (maybe it was pulled or cloned with
752 761 --noupdate), the blob is still accessible via the global cache to send to the
753 762 remote store.
754 763
755 764 $ rm -rf $TESTTMP/repo15/.hg/store/lfs
756 765 $ hg init repo16
757 766 $ hg -R repo15 push repo16
758 767 pushing to repo16
759 768 searching for changes
760 769 adding changesets
761 770 adding manifests
762 771 adding file changes
763 772 added 3 changesets with 2 changes to 1 files
764 773 $ hg -R repo15 -q verify
765 774
766 775 Test damaged file scenarios. (This also damages the usercache because of the
767 776 hardlinks.)
768 777
769 778 $ echo 'damage' >> repo5/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
770 779
771 780 Repo with damaged lfs objects in any revision will fail verification.
772 781
773 782 $ hg -R repo5 verify
774 783 checking changesets
775 784 checking manifests
776 785 crosschecking files in changesets and manifests
777 786 checking files
778 787 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
779 788 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
780 789 checked 5 changesets with 10 changes to 4 files
781 790 2 integrity errors encountered!
782 791 (first damaged changeset appears to be 0)
783 792 [1]
784 793
785 794 Updates work after cloning a damaged repo, if the damaged lfs objects aren't in
786 795 the update destination. Those objects won't be added to the new repo's store
787 796 because they aren't accessed.
788 797
789 798 $ hg clone -v repo5 fromcorrupt
790 799 updating to branch default
791 800 resolving manifests
792 801 getting l
793 802 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the usercache
794 803 getting s
795 804 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
796 805 $ test -f fromcorrupt/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
797 806 [1]
798 807
799 Verify will not try to download lfs blobs, if told not to process lfs content
808 Verify will not try to download lfs blobs, if told not to process lfs content.
809 The extension makes sure that the filelog.renamed() path is taken on a missing
810 blob, and the output shows that it isn't fetched.
800 811
801 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v --no-lfs
812 $ cat > $TESTTMP/lfsrename.py <<EOF
813 > from mercurial import (
814 > exthelper,
815 > )
816 >
817 > from hgext.lfs import (
818 > pointer,
819 > wrapper,
820 > )
821 >
822 > eh = exthelper.exthelper()
823 > uisetup = eh.finaluisetup
824 >
825 > @eh.wrapfunction(wrapper, b'filelogrenamed')
826 > def filelogrenamed(orig, orig1, self, node):
827 > ret = orig(orig1, self, node)
828 > if wrapper._islfs(self._revlog, node) and ret:
829 > rawtext = self._revlog.rawdata(node)
830 > metadata = pointer.deserialize(rawtext)
831 > print('lfs blob %s renamed %s -> %s'
832 > % (metadata[b'oid'], ret[0], self._revlog.filename))
833 > return ret
834 > EOF
835
836 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v --no-lfs \
837 > --config extensions.x=$TESTTMP/lfsrename.py
802 838 repository uses revlog format 1
803 839 checking changesets
804 840 checking manifests
805 841 crosschecking files in changesets and manifests
806 842 checking files
807 843 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
844 lfs blob sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e renamed large -> l
808 845 checked 5 changesets with 10 changes to 4 files
809 846
810 847 Verify will not try to download lfs blobs, if told not to by the config option
811 848
812 849 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v \
813 > --config verify.skipflags=8192
850 > --config verify.skipflags=8192 \
851 > --config extensions.x=$TESTTMP/lfsrename.py
814 852 repository uses revlog format 1
815 853 checking changesets
816 854 checking manifests
817 855 crosschecking files in changesets and manifests
818 856 checking files
819 857 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
858 lfs blob sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e renamed large -> l
820 859 checked 5 changesets with 10 changes to 4 files
821 860
822 861 Verify will copy/link all lfs objects into the local store that aren't already
823 862 present. Bypass the corrupted usercache to show that verify works when fed by
824 863 the (uncorrupted) remote store.
825 864
826 865 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v
827 866 repository uses revlog format 1
828 867 checking changesets
829 868 checking manifests
830 869 crosschecking files in changesets and manifests
831 870 checking files
832 871 lfs: adding 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e to the usercache
833 872 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
834 873 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
835 874 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
836 875 lfs: adding 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 to the usercache
837 876 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
838 877 lfs: adding b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c to the usercache
839 878 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
840 879 checked 5 changesets with 10 changes to 4 files
841 880
842 881 Verify will not copy/link a corrupted file from the usercache into the local
843 882 store, and poison it. (The verify with a good remote now works.)
844 883
845 884 $ rm -r fromcorrupt/.hg/store/lfs/objects/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
846 885 $ hg -R fromcorrupt verify -v
847 886 repository uses revlog format 1
848 887 checking changesets
849 888 checking manifests
850 889 crosschecking files in changesets and manifests
851 890 checking files
852 891 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
853 892 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
854 893 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
855 894 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
856 895 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
857 896 checked 5 changesets with 10 changes to 4 files
858 897 2 integrity errors encountered!
859 898 (first damaged changeset appears to be 0)
860 899 [1]
861 900 $ hg -R fromcorrupt --config lfs.usercache=emptycache verify -v
862 901 repository uses revlog format 1
863 902 checking changesets
864 903 checking manifests
865 904 crosschecking files in changesets and manifests
866 905 checking files
867 906 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the usercache
868 907 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
869 908 lfs: found 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e in the local lfs store
870 909 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
871 910 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
872 911 checked 5 changesets with 10 changes to 4 files
873 912
874 913 Damaging a file required by the update destination fails the update.
875 914
876 915 $ echo 'damage' >> $TESTTMP/dummy-remote/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
877 916 $ hg --config lfs.usercache=emptycache clone -v repo5 fromcorrupt2
878 917 updating to branch default
879 918 resolving manifests
880 919 abort: corrupt remote lfs object: 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
881 920 [255]
882 921
883 922 A corrupted lfs blob is not transferred from a file://remotestore to the
884 923 usercache or local store.
885 924
886 925 $ test -f emptycache/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
887 926 [1]
888 927 $ test -f fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
889 928 [1]
890 929
891 930 $ hg -R fromcorrupt2 verify
892 931 checking changesets
893 932 checking manifests
894 933 crosschecking files in changesets and manifests
895 934 checking files
896 935 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
897 936 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
898 937 checked 5 changesets with 10 changes to 4 files
899 938 2 integrity errors encountered!
900 939 (first damaged changeset appears to be 0)
901 940 [1]
902 941
903 942 Corrupt local files are not sent upstream. (The alternate dummy remote
904 943 avoids the corrupt lfs object in the original remote.)
905 944
906 945 $ mkdir $TESTTMP/dummy-remote2
907 946 $ hg init dest
908 947 $ hg -R fromcorrupt2 --config lfs.url=file:///$TESTTMP/dummy-remote2 push -v dest
909 948 pushing to dest
910 949 searching for changes
911 950 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
912 951 abort: detected corrupt lfs object: 66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
913 952 (run hg verify)
914 953 [255]
915 954
916 955 $ hg -R fromcorrupt2 --config lfs.url=file:///$TESTTMP/dummy-remote2 verify -v
917 956 repository uses revlog format 1
918 957 checking changesets
919 958 checking manifests
920 959 crosschecking files in changesets and manifests
921 960 checking files
922 961 l@1: unpacking 46a2f24864bc: integrity check failed on data/l.i:0
923 962 lfs: found 22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b in the local lfs store
924 963 large@0: unpacking 2c531e0992ff: integrity check failed on data/large.i:0
925 964 lfs: found 89b6070915a3d573ff3599d1cda305bc5e38549b15c4847ab034169da66e1ca8 in the local lfs store
926 965 lfs: found b1a6ea88da0017a0e77db139a54618986e9a2489bee24af9fe596de9daac498c in the local lfs store
927 966 checked 5 changesets with 10 changes to 4 files
928 967 2 integrity errors encountered!
929 968 (first damaged changeset appears to be 0)
930 969 [1]
931 970
932 971 $ cat $TESTTMP/dummy-remote2/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256
933 972 sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
934 973 $ cat fromcorrupt2/.hg/store/lfs/objects/22/f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b | $TESTDIR/f --sha256
935 974 sha256=22f66a3fc0b9bf3f012c814303995ec07099b3a9ce02a7af84b5970811074a3b
936 975 $ test -f $TESTTMP/dummy-remote2/66/100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
937 976 [1]
938 977
939 978 Accessing a corrupt file will complain
940 979
941 980 $ hg --cwd fromcorrupt2 cat -r 0 large
942 981 abort: integrity check failed on data/large.i:0!
943 982 [255]
944 983
945 984 lfs -> normal -> lfs round trip conversions are possible. The 'none()'
946 985 predicate on the command line will override whatever is configured globally and
947 986 locally, and ensures everything converts to a regular file. For lfs -> normal,
948 987 there's no 'lfs' destination repo requirement. For normal -> lfs, there is.
949 988
950 989 $ hg --config extensions.convert= --config 'lfs.track=none()' \
951 990 > convert repo8 convert_normal
952 991 initializing destination convert_normal repository
953 992 scanning source...
954 993 sorting...
955 994 converting...
956 995 2 a
957 996 1 b
958 997 0 meta
959 998 $ grep 'lfs' convert_normal/.hg/requires
960 999 [1]
961 1000 $ hg --cwd convert_normal cat a1 -r 0 -T '{rawdata}'
962 1001 THIS-IS-LFS-BECAUSE-10-BYTES
963 1002
964 1003 $ hg --config extensions.convert= --config lfs.threshold=10B \
965 1004 > convert convert_normal convert_lfs
966 1005 initializing destination convert_lfs repository
967 1006 scanning source...
968 1007 sorting...
969 1008 converting...
970 1009 2 a
971 1010 1 b
972 1011 0 meta
973 1012
974 1013 $ hg --cwd convert_lfs cat -r 0 a1 -T '{rawdata}'
975 1014 version https://git-lfs.github.com/spec/v1
976 1015 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
977 1016 size 29
978 1017 x-is-binary 0
979 1018 $ hg --cwd convert_lfs debugdata a1 0
980 1019 version https://git-lfs.github.com/spec/v1
981 1020 oid sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
982 1021 size 29
983 1022 x-is-binary 0
984 1023 $ hg --cwd convert_lfs log -r 0 -T "{lfs_files % '{lfspointer % '{key}={value}\n'}'}"
985 1024 version=https://git-lfs.github.com/spec/v1
986 1025 oid=sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
987 1026 size=29
988 1027 x-is-binary=0
989 1028 $ hg --cwd convert_lfs log -r 0 \
990 1029 > -T '{lfs_files % "{get(lfspointer, "oid")}\n"}{lfs_files % "{lfspointer.oid}\n"}'
991 1030 sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
992 1031 sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
993 1032 $ hg --cwd convert_lfs log -r 0 -T '{lfs_files % "{lfspointer}\n"}'
994 1033 version=https://git-lfs.github.com/spec/v1 oid=sha256:5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024 size=29 x-is-binary=0
995 1034 $ hg --cwd convert_lfs \
996 1035 > log -r 'all()' -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}'
997 1036 0: a1: 5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
998 1037 1: a2: 5bb8341bee63b3649f222b2215bde37322bea075a30575aa685d8f8d21c77024
999 1038 2: a2: 876dadc86a8542f9798048f2c47f51dbf8e4359aed883e8ec80c5db825f0d943
1000 1039
1001 1040 $ grep 'lfs' convert_lfs/.hg/requires
1002 1041 lfs
1003 1042
1004 1043 The hashes in all stages of the conversion are unchanged.
1005 1044
1006 1045 $ hg -R repo8 log -T '{node|short}\n'
1007 1046 0fae949de7fa
1008 1047 9cd6bdffdac0
1009 1048 7f96794915f7
1010 1049 $ hg -R convert_normal log -T '{node|short}\n'
1011 1050 0fae949de7fa
1012 1051 9cd6bdffdac0
1013 1052 7f96794915f7
1014 1053 $ hg -R convert_lfs log -T '{node|short}\n'
1015 1054 0fae949de7fa
1016 1055 9cd6bdffdac0
1017 1056 7f96794915f7
1018 1057
1019 1058 This convert is trickier, because it contains deleted files (via `hg mv`)
1020 1059
1021 1060 $ hg --config extensions.convert= --config lfs.threshold=1000M \
1022 1061 > convert repo3 convert_normal2
1023 1062 initializing destination convert_normal2 repository
1024 1063 scanning source...
1025 1064 sorting...
1026 1065 converting...
1027 1066 4 commit with lfs content
1028 1067 3 renames
1029 1068 2 large to small, small to large
1030 1069 1 random modifications
1031 1070 0 switch large and small again
1032 1071 $ grep 'lfs' convert_normal2/.hg/requires
1033 1072 [1]
1034 1073 $ hg --cwd convert_normal2 debugdata large 0
1035 1074 LONGER-THAN-TEN-BYTES-WILL-TRIGGER-LFS
1036 1075
1037 1076 $ hg --config extensions.convert= --config lfs.threshold=10B \
1038 1077 > convert convert_normal2 convert_lfs2
1039 1078 initializing destination convert_lfs2 repository
1040 1079 scanning source...
1041 1080 sorting...
1042 1081 converting...
1043 1082 4 commit with lfs content
1044 1083 3 renames
1045 1084 2 large to small, small to large
1046 1085 1 random modifications
1047 1086 0 switch large and small again
1048 1087 $ grep 'lfs' convert_lfs2/.hg/requires
1049 1088 lfs
1050 1089 $ hg --cwd convert_lfs2 debugdata large 0
1051 1090 version https://git-lfs.github.com/spec/v1
1052 1091 oid sha256:66100b384bf761271b407d79fc30cdd0554f3b2c5d944836e936d584b88ce88e
1053 1092 size 39
1054 1093 x-is-binary 0
1055 1094
1056 1095 Committing deleted files works:
1057 1096
1058 1097 $ hg init $TESTTMP/repo-del
1059 1098 $ cd $TESTTMP/repo-del
1060 1099 $ echo 1 > A
1061 1100 $ hg commit -m 'add A' -A A
1062 1101 $ hg rm A
1063 1102 $ hg commit -m 'rm A'
1064 1103
1065 1104 Bad .hglfs files will block the commit with a useful message
1066 1105
1067 1106 $ cat > .hglfs << EOF
1068 1107 > [track]
1069 1108 > **.test = size(">5B")
1070 1109 > bad file ... no commit
1071 1110 > EOF
1072 1111
1073 1112 $ echo x > file.txt
1074 1113 $ hg ci -Aqm 'should fail'
1075 1114 hg: parse error at .hglfs:3: bad file ... no commit
1076 1115 [255]
1077 1116
1078 1117 $ cat > .hglfs << EOF
1079 1118 > [track]
1080 1119 > **.test = size(">5B")
1081 1120 > ** = nonexistent()
1082 1121 > EOF
1083 1122
1084 1123 $ hg ci -Aqm 'should fail'
1085 1124 abort: parse error in .hglfs: unknown identifier: nonexistent
1086 1125 [255]
1087 1126
1088 1127 '**' works out to mean all files.
1089 1128
1090 1129 $ cat > .hglfs << EOF
1091 1130 > [track]
1092 1131 > path:.hglfs = none()
1093 1132 > **.test = size(">5B")
1094 1133 > **.exclude = none()
1095 1134 > ** = size(">10B")
1096 1135 > EOF
1097 1136
1098 1137 The LFS policy takes effect without tracking the .hglfs file
1099 1138
1100 1139 $ echo 'largefile' > lfs.test
1101 1140 $ echo '012345678901234567890' > nolfs.exclude
1102 1141 $ echo '01234567890123456' > lfs.catchall
1103 1142 $ hg add *
1104 1143 $ hg ci -qm 'before add .hglfs'
1105 1144 $ hg log -r . -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}\n'
1106 1145 2: lfs.catchall: d4ec46c2869ba22eceb42a729377432052d9dd75d82fc40390ebaadecee87ee9
1107 1146 lfs.test: 5489e6ced8c36a7b267292bde9fd5242a5f80a7482e8f23fa0477393dfaa4d6c
1108 1147
1109 1148 The .hglfs file works when tracked
1110 1149
1111 1150 $ echo 'largefile2' > lfs.test
1112 1151 $ echo '012345678901234567890a' > nolfs.exclude
1113 1152 $ echo '01234567890123456a' > lfs.catchall
1114 1153 $ hg ci -Aqm 'after adding .hglfs'
1115 1154 $ hg log -r . -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}\n'
1116 1155 3: lfs.catchall: 31f43b9c62b540126b0ad5884dc013d21a61c9329b77de1fceeae2fc58511573
1117 1156 lfs.test: 8acd23467967bc7b8cc5a280056589b0ba0b17ff21dbd88a7b6474d6290378a6
1118 1157
1119 1158 The LFS policy stops when the .hglfs is gone
1120 1159
1121 1160 $ mv .hglfs .hglfs_
1122 1161 $ echo 'largefile3' > lfs.test
1123 1162 $ echo '012345678901234567890abc' > nolfs.exclude
1124 1163 $ echo '01234567890123456abc' > lfs.catchall
1125 1164 $ hg ci -qm 'file test' -X .hglfs
1126 1165 $ hg log -r . -T '{rev}: {lfs_files % "{file}: {lfsoid}\n"}\n'
1127 1166 4:
1128 1167
1129 1168 $ mv .hglfs_ .hglfs
1130 1169 $ echo '012345678901234567890abc' > lfs.test
1131 1170 $ hg ci -m 'back to lfs'
1132 1171 $ hg rm lfs.test
1133 1172 $ hg ci -qm 'remove lfs'
1134 1173
1135 1174 {lfs_files} will list deleted files too
1136 1175
1137 1176 $ hg log -T "{lfs_files % '{rev} {file}: {lfspointer.oid}\n'}"
1138 1177 6 lfs.test:
1139 1178 5 lfs.test: sha256:43f8f41171b6f62a6b61ba4ce98a8a6c1649240a47ebafd43120aa215ac9e7f6
1140 1179 3 lfs.catchall: sha256:31f43b9c62b540126b0ad5884dc013d21a61c9329b77de1fceeae2fc58511573
1141 1180 3 lfs.test: sha256:8acd23467967bc7b8cc5a280056589b0ba0b17ff21dbd88a7b6474d6290378a6
1142 1181 2 lfs.catchall: sha256:d4ec46c2869ba22eceb42a729377432052d9dd75d82fc40390ebaadecee87ee9
1143 1182 2 lfs.test: sha256:5489e6ced8c36a7b267292bde9fd5242a5f80a7482e8f23fa0477393dfaa4d6c
1144 1183
1145 1184 $ hg log -r 'file("set:lfs()")' -T '{rev} {join(lfs_files, ", ")}\n'
1146 1185 2 lfs.catchall, lfs.test
1147 1186 3 lfs.catchall, lfs.test
1148 1187 5 lfs.test
1149 1188 6 lfs.test
1150 1189
1151 1190 $ cd ..
1152 1191
1153 1192 Unbundling adds a requirement to a non-lfs repo, if necessary.
1154 1193
1155 1194 $ hg bundle -R $TESTTMP/repo-del -qr 0 --base null nolfs.hg
1156 1195 $ hg bundle -R convert_lfs2 -qr tip --base null lfs.hg
1157 1196 $ hg init unbundle
1158 1197 $ hg pull -R unbundle -q nolfs.hg
1159 1198 $ grep lfs unbundle/.hg/requires
1160 1199 [1]
1161 1200 $ hg pull -R unbundle -q lfs.hg
1162 1201 $ grep lfs unbundle/.hg/requires
1163 1202 lfs
1164 1203
1165 1204 $ hg init no_lfs
1166 1205 $ cat >> no_lfs/.hg/hgrc <<EOF
1167 1206 > [experimental]
1168 1207 > changegroup3 = True
1169 1208 > [extensions]
1170 1209 > lfs=!
1171 1210 > EOF
1172 1211 $ cp -R no_lfs no_lfs2
1173 1212
1174 1213 Pushing from a local lfs repo to a local repo without an lfs requirement and
1175 1214 with lfs disabled, fails.
1176 1215
1177 1216 $ hg push -R convert_lfs2 no_lfs
1178 1217 pushing to no_lfs
1179 1218 abort: required features are not supported in the destination: lfs
1180 1219 [255]
1181 1220 $ grep lfs no_lfs/.hg/requires
1182 1221 [1]
1183 1222
1184 1223 Pulling from a local lfs repo to a local repo without an lfs requirement and
1185 1224 with lfs disabled, fails.
1186 1225
1187 1226 $ hg pull -R no_lfs2 convert_lfs2
1188 1227 pulling from convert_lfs2
1189 1228 abort: required features are not supported in the destination: lfs
1190 1229 [255]
1191 1230 $ grep lfs no_lfs2/.hg/requires
1192 1231 [1]
General Comments 0
You need to be logged in to leave comments. Login now