##// END OF EJS Templates
copies: add a HASCOPIESINFO flag to highlight rev with useful data...
marmoute -
r46245:c398c798 default draft
parent child Browse files
Show More
@@ -1,608 +1,618 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 from .revlogutils import flagutil
29 30
30 31 _defaultextra = {b'branch': b'default'}
31 32
32 33
33 34 def _string_escape(text):
34 35 """
35 36 >>> from .pycompat import bytechr as chr
36 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
38 39 >>> s
39 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
40 41 >>> res = _string_escape(s)
41 42 >>> s == _string_unescape(res)
42 43 True
43 44 """
44 45 # subset of the string_escape codec
45 46 text = (
46 47 text.replace(b'\\', b'\\\\')
47 48 .replace(b'\n', b'\\n')
48 49 .replace(b'\r', b'\\r')
49 50 )
50 51 return text.replace(b'\0', b'\\0')
51 52
52 53
53 54 def _string_unescape(text):
54 55 if b'\\0' in text:
55 56 # fix up \0 without getting into trouble with \\0
56 57 text = text.replace(b'\\\\', b'\\\\\n')
57 58 text = text.replace(b'\\0', b'\0')
58 59 text = text.replace(b'\n', b'')
59 60 return stringutil.unescapestr(text)
60 61
61 62
62 63 def decodeextra(text):
63 64 """
64 65 >>> from .pycompat import bytechr as chr
65 66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
66 67 ... ).items())
67 68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
68 69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
69 70 ... b'baz': chr(92) + chr(0) + b'2'})
70 71 ... ).items())
71 72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
72 73 """
73 74 extra = _defaultextra.copy()
74 75 for l in text.split(b'\0'):
75 76 if l:
76 77 k, v = _string_unescape(l).split(b':', 1)
77 78 extra[k] = v
78 79 return extra
79 80
80 81
81 82 def encodeextra(d):
82 83 # keys must be sorted to produce a deterministic changelog entry
83 84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
84 85 return b"\0".join(items)
85 86
86 87
87 88 def stripdesc(desc):
88 89 """strip trailing whitespace and leading and trailing empty lines"""
89 90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
90 91
91 92
92 93 class appender(object):
93 94 '''the changelog index must be updated last on disk, so we use this class
94 95 to delay writes to it'''
95 96
96 97 def __init__(self, vfs, name, mode, buf):
97 98 self.data = buf
98 99 fp = vfs(name, mode)
99 100 self.fp = fp
100 101 self.offset = fp.tell()
101 102 self.size = vfs.fstat(fp).st_size
102 103 self._end = self.size
103 104
104 105 def end(self):
105 106 return self._end
106 107
107 108 def tell(self):
108 109 return self.offset
109 110
110 111 def flush(self):
111 112 pass
112 113
113 114 @property
114 115 def closed(self):
115 116 return self.fp.closed
116 117
117 118 def close(self):
118 119 self.fp.close()
119 120
120 121 def seek(self, offset, whence=0):
121 122 '''virtual file offset spans real file and data'''
122 123 if whence == 0:
123 124 self.offset = offset
124 125 elif whence == 1:
125 126 self.offset += offset
126 127 elif whence == 2:
127 128 self.offset = self.end() + offset
128 129 if self.offset < self.size:
129 130 self.fp.seek(self.offset)
130 131
131 132 def read(self, count=-1):
132 133 '''only trick here is reads that span real file and data'''
133 134 ret = b""
134 135 if self.offset < self.size:
135 136 s = self.fp.read(count)
136 137 ret = s
137 138 self.offset += len(s)
138 139 if count > 0:
139 140 count -= len(s)
140 141 if count != 0:
141 142 doff = self.offset - self.size
142 143 self.data.insert(0, b"".join(self.data))
143 144 del self.data[1:]
144 145 s = self.data[0][doff : doff + count]
145 146 self.offset += len(s)
146 147 ret += s
147 148 return ret
148 149
149 150 def write(self, s):
150 151 self.data.append(bytes(s))
151 152 self.offset += len(s)
152 153 self._end += len(s)
153 154
154 155 def __enter__(self):
155 156 self.fp.__enter__()
156 157 return self
157 158
158 159 def __exit__(self, *args):
159 160 return self.fp.__exit__(*args)
160 161
161 162
162 163 class _divertopener(object):
163 164 def __init__(self, opener, target):
164 165 self._opener = opener
165 166 self._target = target
166 167
167 168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
168 169 if name != self._target:
169 170 return self._opener(name, mode, **kwargs)
170 171 return self._opener(name + b".a", mode, **kwargs)
171 172
172 173 def __getattr__(self, attr):
173 174 return getattr(self._opener, attr)
174 175
175 176
176 177 def _delayopener(opener, target, buf):
177 178 """build an opener that stores chunks in 'buf' instead of 'target'"""
178 179
179 180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
180 181 if name != target:
181 182 return opener(name, mode, **kwargs)
182 183 assert not kwargs
183 184 return appender(opener, name, mode, buf)
184 185
185 186 return _delay
186 187
187 188
188 189 @attr.s
189 190 class _changelogrevision(object):
190 191 # Extensions might modify _defaultextra, so let the constructor below pass
191 192 # it in
192 193 extra = attr.ib()
193 194 manifest = attr.ib(default=nullid)
194 195 user = attr.ib(default=b'')
195 196 date = attr.ib(default=(0, 0))
196 197 files = attr.ib(default=attr.Factory(list))
197 198 filesadded = attr.ib(default=None)
198 199 filesremoved = attr.ib(default=None)
199 200 p1copies = attr.ib(default=None)
200 201 p2copies = attr.ib(default=None)
201 202 description = attr.ib(default=b'')
202 203
203 204
204 205 class changelogrevision(object):
205 206 """Holds results of a parsed changelog revision.
206 207
207 208 Changelog revisions consist of multiple pieces of data, including
208 209 the manifest node, user, and date. This object exposes a view into
209 210 the parsed object.
210 211 """
211 212
212 213 __slots__ = (
213 214 '_offsets',
214 215 '_text',
215 216 '_sidedata',
216 217 '_cpsd',
217 218 '_changes',
218 219 )
219 220
220 221 def __new__(cls, text, sidedata, cpsd):
221 222 if not text:
222 223 return _changelogrevision(extra=_defaultextra)
223 224
224 225 self = super(changelogrevision, cls).__new__(cls)
225 226 # We could return here and implement the following as an __init__.
226 227 # But doing it here is equivalent and saves an extra function call.
227 228
228 229 # format used:
229 230 # nodeid\n : manifest node in ascii
230 231 # user\n : user, no \n or \r allowed
231 232 # time tz extra\n : date (time is int or float, timezone is int)
232 233 # : extra is metadata, encoded and separated by '\0'
233 234 # : older versions ignore it
234 235 # files\n\n : files modified by the cset, no \n or \r allowed
235 236 # (.*) : comment (free text, ideally utf-8)
236 237 #
237 238 # changelog v0 doesn't use extra
238 239
239 240 nl1 = text.index(b'\n')
240 241 nl2 = text.index(b'\n', nl1 + 1)
241 242 nl3 = text.index(b'\n', nl2 + 1)
242 243
243 244 # The list of files may be empty. Which means nl3 is the first of the
244 245 # double newline that precedes the description.
245 246 if text[nl3 + 1 : nl3 + 2] == b'\n':
246 247 doublenl = nl3
247 248 else:
248 249 doublenl = text.index(b'\n\n', nl3 + 1)
249 250
250 251 self._offsets = (nl1, nl2, nl3, doublenl)
251 252 self._text = text
252 253 self._sidedata = sidedata
253 254 self._cpsd = cpsd
254 255 self._changes = None
255 256
256 257 return self
257 258
258 259 @property
259 260 def manifest(self):
260 261 return bin(self._text[0 : self._offsets[0]])
261 262
262 263 @property
263 264 def user(self):
264 265 off = self._offsets
265 266 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 267
267 268 @property
268 269 def _rawdate(self):
269 270 off = self._offsets
270 271 dateextra = self._text[off[1] + 1 : off[2]]
271 272 return dateextra.split(b' ', 2)[0:2]
272 273
273 274 @property
274 275 def _rawextra(self):
275 276 off = self._offsets
276 277 dateextra = self._text[off[1] + 1 : off[2]]
277 278 fields = dateextra.split(b' ', 2)
278 279 if len(fields) != 3:
279 280 return None
280 281
281 282 return fields[2]
282 283
283 284 @property
284 285 def date(self):
285 286 raw = self._rawdate
286 287 time = float(raw[0])
287 288 # Various tools did silly things with the timezone.
288 289 try:
289 290 timezone = int(raw[1])
290 291 except ValueError:
291 292 timezone = 0
292 293
293 294 return time, timezone
294 295
295 296 @property
296 297 def extra(self):
297 298 raw = self._rawextra
298 299 if raw is None:
299 300 return _defaultextra
300 301
301 302 return decodeextra(raw)
302 303
303 304 @property
304 305 def changes(self):
305 306 if self._changes is not None:
306 307 return self._changes
307 308 if self._cpsd:
308 309 changes = metadata.decode_files_sidedata(self._sidedata)
309 310 else:
310 311 changes = metadata.ChangingFiles(
311 312 touched=self.files or (),
312 313 added=self.filesadded or (),
313 314 removed=self.filesremoved or (),
314 315 p1_copies=self.p1copies or {},
315 316 p2_copies=self.p2copies or {},
316 317 )
317 318 self._changes = changes
318 319 return changes
319 320
320 321 @property
321 322 def files(self):
322 323 if self._cpsd:
323 324 return sorted(self.changes.touched)
324 325 off = self._offsets
325 326 if off[2] == off[3]:
326 327 return []
327 328
328 329 return self._text[off[2] + 1 : off[3]].split(b'\n')
329 330
330 331 @property
331 332 def filesadded(self):
332 333 if self._cpsd:
333 334 return self.changes.added
334 335 else:
335 336 rawindices = self.extra.get(b'filesadded')
336 337 if rawindices is None:
337 338 return None
338 339 return metadata.decodefileindices(self.files, rawindices)
339 340
340 341 @property
341 342 def filesremoved(self):
342 343 if self._cpsd:
343 344 return self.changes.removed
344 345 else:
345 346 rawindices = self.extra.get(b'filesremoved')
346 347 if rawindices is None:
347 348 return None
348 349 return metadata.decodefileindices(self.files, rawindices)
349 350
350 351 @property
351 352 def p1copies(self):
352 353 if self._cpsd:
353 354 return self.changes.copied_from_p1
354 355 else:
355 356 rawcopies = self.extra.get(b'p1copies')
356 357 if rawcopies is None:
357 358 return None
358 359 return metadata.decodecopies(self.files, rawcopies)
359 360
360 361 @property
361 362 def p2copies(self):
362 363 if self._cpsd:
363 364 return self.changes.copied_from_p2
364 365 else:
365 366 rawcopies = self.extra.get(b'p2copies')
366 367 if rawcopies is None:
367 368 return None
368 369 return metadata.decodecopies(self.files, rawcopies)
369 370
370 371 @property
371 372 def description(self):
372 373 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
373 374
374 375
375 376 class changelog(revlog.revlog):
376 377 def __init__(self, opener, trypending=False):
377 378 """Load a changelog revlog using an opener.
378 379
379 380 If ``trypending`` is true, we attempt to load the index from a
380 381 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
381 382 The ``00changelog.i.a`` file contains index (and possibly inline
382 383 revision) data for a transaction that hasn't been finalized yet.
383 384 It exists in a separate file to facilitate readers (such as
384 385 hooks processes) accessing data before a transaction is finalized.
385 386 """
386 387 if trypending and opener.exists(b'00changelog.i.a'):
387 388 indexfile = b'00changelog.i.a'
388 389 else:
389 390 indexfile = b'00changelog.i'
390 391
391 392 datafile = b'00changelog.d'
392 393 revlog.revlog.__init__(
393 394 self,
394 395 opener,
395 396 indexfile,
396 397 datafile=datafile,
397 398 checkambig=True,
398 399 mmaplargeindex=True,
399 400 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
400 401 )
401 402
402 403 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
403 404 # changelogs don't benefit from generaldelta.
404 405
405 406 self.version &= ~revlog.FLAG_GENERALDELTA
406 407 self._generaldelta = False
407 408
408 409 # Delta chains for changelogs tend to be very small because entries
409 410 # tend to be small and don't delta well with each. So disable delta
410 411 # chains.
411 412 self._storedeltachains = False
412 413
413 414 self._realopener = opener
414 415 self._delayed = False
415 416 self._delaybuf = None
416 417 self._divert = False
417 418 self._filteredrevs = frozenset()
418 419 self._filteredrevs_hashcache = {}
419 420 self._copiesstorage = opener.options.get(b'copies-storage')
420 421
421 422 @property
422 423 def filteredrevs(self):
423 424 return self._filteredrevs
424 425
425 426 @filteredrevs.setter
426 427 def filteredrevs(self, val):
427 428 # Ensure all updates go through this function
428 429 assert isinstance(val, frozenset)
429 430 self._filteredrevs = val
430 431 self._filteredrevs_hashcache = {}
431 432
432 433 def delayupdate(self, tr):
433 434 """delay visibility of index updates to other readers"""
434 435
435 436 if not self._delayed:
436 437 if len(self) == 0:
437 438 self._divert = True
438 439 if self._realopener.exists(self.indexfile + b'.a'):
439 440 self._realopener.unlink(self.indexfile + b'.a')
440 441 self.opener = _divertopener(self._realopener, self.indexfile)
441 442 else:
442 443 self._delaybuf = []
443 444 self.opener = _delayopener(
444 445 self._realopener, self.indexfile, self._delaybuf
445 446 )
446 447 self._delayed = True
447 448 tr.addpending(b'cl-%i' % id(self), self._writepending)
448 449 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
449 450
450 451 def _finalize(self, tr):
451 452 """finalize index updates"""
452 453 self._delayed = False
453 454 self.opener = self._realopener
454 455 # move redirected index data back into place
455 456 if self._divert:
456 457 assert not self._delaybuf
457 458 tmpname = self.indexfile + b".a"
458 459 nfile = self.opener.open(tmpname)
459 460 nfile.close()
460 461 self.opener.rename(tmpname, self.indexfile, checkambig=True)
461 462 elif self._delaybuf:
462 463 fp = self.opener(self.indexfile, b'a', checkambig=True)
463 464 fp.write(b"".join(self._delaybuf))
464 465 fp.close()
465 466 self._delaybuf = None
466 467 self._divert = False
467 468 # split when we're done
468 469 self._enforceinlinesize(tr)
469 470
470 471 def _writepending(self, tr):
471 472 """create a file containing the unfinalized state for
472 473 pretxnchangegroup"""
473 474 if self._delaybuf:
474 475 # make a temporary copy of the index
475 476 fp1 = self._realopener(self.indexfile)
476 477 pendingfilename = self.indexfile + b".a"
477 478 # register as a temp file to ensure cleanup on failure
478 479 tr.registertmp(pendingfilename)
479 480 # write existing data
480 481 fp2 = self._realopener(pendingfilename, b"w")
481 482 fp2.write(fp1.read())
482 483 # add pending data
483 484 fp2.write(b"".join(self._delaybuf))
484 485 fp2.close()
485 486 # switch modes so finalize can simply rename
486 487 self._delaybuf = None
487 488 self._divert = True
488 489 self.opener = _divertopener(self._realopener, self.indexfile)
489 490
490 491 if self._divert:
491 492 return True
492 493
493 494 return False
494 495
495 496 def _enforceinlinesize(self, tr, fp=None):
496 497 if not self._delayed:
497 498 revlog.revlog._enforceinlinesize(self, tr, fp)
498 499
499 500 def read(self, node):
500 501 """Obtain data from a parsed changelog revision.
501 502
502 503 Returns a 6-tuple of:
503 504
504 505 - manifest node in binary
505 506 - author/user as a localstr
506 507 - date as a 2-tuple of (time, timezone)
507 508 - list of files
508 509 - commit message as a localstr
509 510 - dict of extra metadata
510 511
511 512 Unless you need to access all fields, consider calling
512 513 ``changelogrevision`` instead, as it is faster for partial object
513 514 access.
514 515 """
515 516 d, s = self._revisiondata(node)
516 517 c = changelogrevision(
517 518 d, s, self._copiesstorage == b'changeset-sidedata'
518 519 )
519 520 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
520 521
521 522 def changelogrevision(self, nodeorrev):
522 523 """Obtain a ``changelogrevision`` for a node or revision."""
523 524 text, sidedata = self._revisiondata(nodeorrev)
524 525 return changelogrevision(
525 526 text, sidedata, self._copiesstorage == b'changeset-sidedata'
526 527 )
527 528
528 529 def readfiles(self, node):
529 530 """
530 531 short version of read that only returns the files modified by the cset
531 532 """
532 533 text = self.revision(node)
533 534 if not text:
534 535 return []
535 536 last = text.index(b"\n\n")
536 537 l = text[:last].split(b'\n')
537 538 return l[3:]
538 539
539 540 def add(
540 541 self,
541 542 manifest,
542 543 files,
543 544 desc,
544 545 transaction,
545 546 p1,
546 547 p2,
547 548 user,
548 549 date=None,
549 550 extra=None,
550 551 ):
551 552 # Convert to UTF-8 encoded bytestrings as the very first
552 553 # thing: calling any method on a localstr object will turn it
553 554 # into a str object and the cached UTF-8 string is thus lost.
554 555 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
555 556
556 557 user = user.strip()
557 558 # An empty username or a username with a "\n" will make the
558 559 # revision text contain two "\n\n" sequences -> corrupt
559 560 # repository since read cannot unpack the revision.
560 561 if not user:
561 562 raise error.StorageError(_(b"empty username"))
562 563 if b"\n" in user:
563 564 raise error.StorageError(
564 565 _(b"username %r contains a newline") % pycompat.bytestr(user)
565 566 )
566 567
567 568 desc = stripdesc(desc)
568 569
569 570 if date:
570 571 parseddate = b"%d %d" % dateutil.parsedate(date)
571 572 else:
572 573 parseddate = b"%d %d" % dateutil.makedate()
573 574 if extra:
574 575 branch = extra.get(b"branch")
575 576 if branch in (b"default", b""):
576 577 del extra[b"branch"]
577 578 elif branch in (b".", b"null", b"tip"):
578 579 raise error.StorageError(
579 580 _(b'the name \'%s\' is reserved') % branch
580 581 )
581 582 sortedfiles = sorted(files.touched)
583 flags = 0
582 584 sidedata = None
583 585 if self._copiesstorage == b'changeset-sidedata':
586 if (
587 files.removed
588 or files.merged
589 or files.salvaged
590 or files.copied_from_p1
591 or files.copied_from_p2
592 ):
593 flags |= flagutil.REVIDX_HASCOPIESINFO
584 594 sidedata = metadata.encode_files_sidedata(files)
585 595
586 596 if extra:
587 597 extra = encodeextra(extra)
588 598 parseddate = b"%s %s" % (parseddate, extra)
589 599 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
590 600 text = b"\n".join(l)
591 601 return self.addrevision(
592 602 text, transaction, len(self), p1, p2, sidedata=sidedata
593 603 )
594 604
595 605 def branchinfo(self, rev):
596 606 """return the branch name and open/close state of a revision
597 607
598 608 This function exists because creating a changectx object
599 609 just to access this is costly."""
600 610 extra = self.read(rev)[5]
601 611 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
602 612
603 613 def _nodeduplicatecallback(self, transaction, node):
604 614 # keep track of revisions that got "re-added", eg: unbunde of know rev.
605 615 #
606 616 # We track them in a list to preserve their order from the source bundle
607 617 duplicates = transaction.changes.setdefault(b'revduplicates', [])
608 618 duplicates.append(self.rev(node))
@@ -1,1971 +1,1973 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_SIDEDATA = 1 << 12
31 REVISION_FLAG_HASCOPIESINFO = 1 << 11
31 32
32 33 REVISION_FLAGS_KNOWN = (
33 34 REVISION_FLAG_CENSORED
34 35 | REVISION_FLAG_ELLIPSIS
35 36 | REVISION_FLAG_EXTSTORED
36 37 | REVISION_FLAG_SIDEDATA
38 | REVISION_FLAG_HASCOPIESINFO
37 39 )
38 40
39 41 CG_DELTAMODE_STD = b'default'
40 42 CG_DELTAMODE_PREV = b'previous'
41 43 CG_DELTAMODE_FULL = b'fulltext'
42 44 CG_DELTAMODE_P1 = b'p1'
43 45
44 46
45 47 class ipeerconnection(interfaceutil.Interface):
46 48 """Represents a "connection" to a repository.
47 49
48 50 This is the base interface for representing a connection to a repository.
49 51 It holds basic properties and methods applicable to all peer types.
50 52
51 53 This is not a complete interface definition and should not be used
52 54 outside of this module.
53 55 """
54 56
55 57 ui = interfaceutil.Attribute("""ui.ui instance""")
56 58
57 59 def url():
58 60 """Returns a URL string representing this peer.
59 61
60 62 Currently, implementations expose the raw URL used to construct the
61 63 instance. It may contain credentials as part of the URL. The
62 64 expectations of the value aren't well-defined and this could lead to
63 65 data leakage.
64 66
65 67 TODO audit/clean consumers and more clearly define the contents of this
66 68 value.
67 69 """
68 70
69 71 def local():
70 72 """Returns a local repository instance.
71 73
72 74 If the peer represents a local repository, returns an object that
73 75 can be used to interface with it. Otherwise returns ``None``.
74 76 """
75 77
76 78 def peer():
77 79 """Returns an object conforming to this interface.
78 80
79 81 Most implementations will ``return self``.
80 82 """
81 83
82 84 def canpush():
83 85 """Returns a boolean indicating if this peer can be pushed to."""
84 86
85 87 def close():
86 88 """Close the connection to this peer.
87 89
88 90 This is called when the peer will no longer be used. Resources
89 91 associated with the peer should be cleaned up.
90 92 """
91 93
92 94
93 95 class ipeercapabilities(interfaceutil.Interface):
94 96 """Peer sub-interface related to capabilities."""
95 97
96 98 def capable(name):
97 99 """Determine support for a named capability.
98 100
99 101 Returns ``False`` if capability not supported.
100 102
101 103 Returns ``True`` if boolean capability is supported. Returns a string
102 104 if capability support is non-boolean.
103 105
104 106 Capability strings may or may not map to wire protocol capabilities.
105 107 """
106 108
107 109 def requirecap(name, purpose):
108 110 """Require a capability to be present.
109 111
110 112 Raises a ``CapabilityError`` if the capability isn't present.
111 113 """
112 114
113 115
114 116 class ipeercommands(interfaceutil.Interface):
115 117 """Client-side interface for communicating over the wire protocol.
116 118
117 119 This interface is used as a gateway to the Mercurial wire protocol.
118 120 methods commonly call wire protocol commands of the same name.
119 121 """
120 122
121 123 def branchmap():
122 124 """Obtain heads in named branches.
123 125
124 126 Returns a dict mapping branch name to an iterable of nodes that are
125 127 heads on that branch.
126 128 """
127 129
128 130 def capabilities():
129 131 """Obtain capabilities of the peer.
130 132
131 133 Returns a set of string capabilities.
132 134 """
133 135
134 136 def clonebundles():
135 137 """Obtains the clone bundles manifest for the repo.
136 138
137 139 Returns the manifest as unparsed bytes.
138 140 """
139 141
140 142 def debugwireargs(one, two, three=None, four=None, five=None):
141 143 """Used to facilitate debugging of arguments passed over the wire."""
142 144
143 145 def getbundle(source, **kwargs):
144 146 """Obtain remote repository data as a bundle.
145 147
146 148 This command is how the bulk of repository data is transferred from
147 149 the peer to the local repository
148 150
149 151 Returns a generator of bundle data.
150 152 """
151 153
152 154 def heads():
153 155 """Determine all known head revisions in the peer.
154 156
155 157 Returns an iterable of binary nodes.
156 158 """
157 159
158 160 def known(nodes):
159 161 """Determine whether multiple nodes are known.
160 162
161 163 Accepts an iterable of nodes whose presence to check for.
162 164
163 165 Returns an iterable of booleans indicating of the corresponding node
164 166 at that index is known to the peer.
165 167 """
166 168
167 169 def listkeys(namespace):
168 170 """Obtain all keys in a pushkey namespace.
169 171
170 172 Returns an iterable of key names.
171 173 """
172 174
173 175 def lookup(key):
174 176 """Resolve a value to a known revision.
175 177
176 178 Returns a binary node of the resolved revision on success.
177 179 """
178 180
179 181 def pushkey(namespace, key, old, new):
180 182 """Set a value using the ``pushkey`` protocol.
181 183
182 184 Arguments correspond to the pushkey namespace and key to operate on and
183 185 the old and new values for that key.
184 186
185 187 Returns a string with the peer result. The value inside varies by the
186 188 namespace.
187 189 """
188 190
189 191 def stream_out():
190 192 """Obtain streaming clone data.
191 193
192 194 Successful result should be a generator of data chunks.
193 195 """
194 196
195 197 def unbundle(bundle, heads, url):
196 198 """Transfer repository data to the peer.
197 199
198 200 This is how the bulk of data during a push is transferred.
199 201
200 202 Returns the integer number of heads added to the peer.
201 203 """
202 204
203 205
204 206 class ipeerlegacycommands(interfaceutil.Interface):
205 207 """Interface for implementing support for legacy wire protocol commands.
206 208
207 209 Wire protocol commands transition to legacy status when they are no longer
208 210 used by modern clients. To facilitate identifying which commands are
209 211 legacy, the interfaces are split.
210 212 """
211 213
212 214 def between(pairs):
213 215 """Obtain nodes between pairs of nodes.
214 216
215 217 ``pairs`` is an iterable of node pairs.
216 218
217 219 Returns an iterable of iterables of nodes corresponding to each
218 220 requested pair.
219 221 """
220 222
221 223 def branches(nodes):
222 224 """Obtain ancestor changesets of specific nodes back to a branch point.
223 225
224 226 For each requested node, the peer finds the first ancestor node that is
225 227 a DAG root or is a merge.
226 228
227 229 Returns an iterable of iterables with the resolved values for each node.
228 230 """
229 231
230 232 def changegroup(nodes, source):
231 233 """Obtain a changegroup with data for descendants of specified nodes."""
232 234
233 235 def changegroupsubset(bases, heads, source):
234 236 pass
235 237
236 238
237 239 class ipeercommandexecutor(interfaceutil.Interface):
238 240 """Represents a mechanism to execute remote commands.
239 241
240 242 This is the primary interface for requesting that wire protocol commands
241 243 be executed. Instances of this interface are active in a context manager
242 244 and have a well-defined lifetime. When the context manager exits, all
243 245 outstanding requests are waited on.
244 246 """
245 247
246 248 def callcommand(name, args):
247 249 """Request that a named command be executed.
248 250
249 251 Receives the command name and a dictionary of command arguments.
250 252
251 253 Returns a ``concurrent.futures.Future`` that will resolve to the
252 254 result of that command request. That exact value is left up to
253 255 the implementation and possibly varies by command.
254 256
255 257 Not all commands can coexist with other commands in an executor
256 258 instance: it depends on the underlying wire protocol transport being
257 259 used and the command itself.
258 260
259 261 Implementations MAY call ``sendcommands()`` automatically if the
260 262 requested command can not coexist with other commands in this executor.
261 263
262 264 Implementations MAY call ``sendcommands()`` automatically when the
263 265 future's ``result()`` is called. So, consumers using multiple
264 266 commands with an executor MUST ensure that ``result()`` is not called
265 267 until all command requests have been issued.
266 268 """
267 269
268 270 def sendcommands():
269 271 """Trigger submission of queued command requests.
270 272
271 273 Not all transports submit commands as soon as they are requested to
272 274 run. When called, this method forces queued command requests to be
273 275 issued. It will no-op if all commands have already been sent.
274 276
275 277 When called, no more new commands may be issued with this executor.
276 278 """
277 279
278 280 def close():
279 281 """Signal that this command request is finished.
280 282
281 283 When called, no more new commands may be issued. All outstanding
282 284 commands that have previously been issued are waited on before
283 285 returning. This not only includes waiting for the futures to resolve,
284 286 but also waiting for all response data to arrive. In other words,
285 287 calling this waits for all on-wire state for issued command requests
286 288 to finish.
287 289
288 290 When used as a context manager, this method is called when exiting the
289 291 context manager.
290 292
291 293 This method may call ``sendcommands()`` if there are buffered commands.
292 294 """
293 295
294 296
295 297 class ipeerrequests(interfaceutil.Interface):
296 298 """Interface for executing commands on a peer."""
297 299
298 300 limitedarguments = interfaceutil.Attribute(
299 301 """True if the peer cannot receive large argument value for commands."""
300 302 )
301 303
302 304 def commandexecutor():
303 305 """A context manager that resolves to an ipeercommandexecutor.
304 306
305 307 The object this resolves to can be used to issue command requests
306 308 to the peer.
307 309
308 310 Callers should call its ``callcommand`` method to issue command
309 311 requests.
310 312
311 313 A new executor should be obtained for each distinct set of commands
312 314 (possibly just a single command) that the consumer wants to execute
313 315 as part of a single operation or round trip. This is because some
314 316 peers are half-duplex and/or don't support persistent connections.
315 317 e.g. in the case of HTTP peers, commands sent to an executor represent
316 318 a single HTTP request. While some peers may support multiple command
317 319 sends over the wire per executor, consumers need to code to the least
318 320 capable peer. So it should be assumed that command executors buffer
319 321 called commands until they are told to send them and that each
320 322 command executor could result in a new connection or wire-level request
321 323 being issued.
322 324 """
323 325
324 326
325 327 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
326 328 """Unified interface for peer repositories.
327 329
328 330 All peer instances must conform to this interface.
329 331 """
330 332
331 333
332 334 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
333 335 """Unified peer interface for wire protocol version 2 peers."""
334 336
335 337 apidescriptor = interfaceutil.Attribute(
336 338 """Data structure holding description of server API."""
337 339 )
338 340
339 341
340 342 @interfaceutil.implementer(ipeerbase)
341 343 class peer(object):
342 344 """Base class for peer repositories."""
343 345
344 346 limitedarguments = False
345 347
346 348 def capable(self, name):
347 349 caps = self.capabilities()
348 350 if name in caps:
349 351 return True
350 352
351 353 name = b'%s=' % name
352 354 for cap in caps:
353 355 if cap.startswith(name):
354 356 return cap[len(name) :]
355 357
356 358 return False
357 359
358 360 def requirecap(self, name, purpose):
359 361 if self.capable(name):
360 362 return
361 363
362 364 raise error.CapabilityError(
363 365 _(
364 366 b'cannot %s; remote repository does not support the '
365 367 b'\'%s\' capability'
366 368 )
367 369 % (purpose, name)
368 370 )
369 371
370 372
371 373 class iverifyproblem(interfaceutil.Interface):
372 374 """Represents a problem with the integrity of the repository.
373 375
374 376 Instances of this interface are emitted to describe an integrity issue
375 377 with a repository (e.g. corrupt storage, missing data, etc).
376 378
377 379 Instances are essentially messages associated with severity.
378 380 """
379 381
380 382 warning = interfaceutil.Attribute(
381 383 """Message indicating a non-fatal problem."""
382 384 )
383 385
384 386 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
385 387
386 388 node = interfaceutil.Attribute(
387 389 """Revision encountering the problem.
388 390
389 391 ``None`` means the problem doesn't apply to a single revision.
390 392 """
391 393 )
392 394
393 395
394 396 class irevisiondelta(interfaceutil.Interface):
395 397 """Represents a delta between one revision and another.
396 398
397 399 Instances convey enough information to allow a revision to be exchanged
398 400 with another repository.
399 401
400 402 Instances represent the fulltext revision data or a delta against
401 403 another revision. Therefore the ``revision`` and ``delta`` attributes
402 404 are mutually exclusive.
403 405
404 406 Typically used for changegroup generation.
405 407 """
406 408
407 409 node = interfaceutil.Attribute("""20 byte node of this revision.""")
408 410
409 411 p1node = interfaceutil.Attribute(
410 412 """20 byte node of 1st parent of this revision."""
411 413 )
412 414
413 415 p2node = interfaceutil.Attribute(
414 416 """20 byte node of 2nd parent of this revision."""
415 417 )
416 418
417 419 linknode = interfaceutil.Attribute(
418 420 """20 byte node of the changelog revision this node is linked to."""
419 421 )
420 422
421 423 flags = interfaceutil.Attribute(
422 424 """2 bytes of integer flags that apply to this revision.
423 425
424 426 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
425 427 """
426 428 )
427 429
428 430 basenode = interfaceutil.Attribute(
429 431 """20 byte node of the revision this data is a delta against.
430 432
431 433 ``nullid`` indicates that the revision is a full revision and not
432 434 a delta.
433 435 """
434 436 )
435 437
436 438 baserevisionsize = interfaceutil.Attribute(
437 439 """Size of base revision this delta is against.
438 440
439 441 May be ``None`` if ``basenode`` is ``nullid``.
440 442 """
441 443 )
442 444
443 445 revision = interfaceutil.Attribute(
444 446 """Raw fulltext of revision data for this node."""
445 447 )
446 448
447 449 delta = interfaceutil.Attribute(
448 450 """Delta between ``basenode`` and ``node``.
449 451
450 452 Stored in the bdiff delta format.
451 453 """
452 454 )
453 455
454 456
455 457 class ifilerevisionssequence(interfaceutil.Interface):
456 458 """Contains index data for all revisions of a file.
457 459
458 460 Types implementing this behave like lists of tuples. The index
459 461 in the list corresponds to the revision number. The values contain
460 462 index metadata.
461 463
462 464 The *null* revision (revision number -1) is always the last item
463 465 in the index.
464 466 """
465 467
466 468 def __len__():
467 469 """The total number of revisions."""
468 470
469 471 def __getitem__(rev):
470 472 """Returns the object having a specific revision number.
471 473
472 474 Returns an 8-tuple with the following fields:
473 475
474 476 offset+flags
475 477 Contains the offset and flags for the revision. 64-bit unsigned
476 478 integer where first 6 bytes are the offset and the next 2 bytes
477 479 are flags. The offset can be 0 if it is not used by the store.
478 480 compressed size
479 481 Size of the revision data in the store. It can be 0 if it isn't
480 482 needed by the store.
481 483 uncompressed size
482 484 Fulltext size. It can be 0 if it isn't needed by the store.
483 485 base revision
484 486 Revision number of revision the delta for storage is encoded
485 487 against. -1 indicates not encoded against a base revision.
486 488 link revision
487 489 Revision number of changelog revision this entry is related to.
488 490 p1 revision
489 491 Revision number of 1st parent. -1 if no 1st parent.
490 492 p2 revision
491 493 Revision number of 2nd parent. -1 if no 1st parent.
492 494 node
493 495 Binary node value for this revision number.
494 496
495 497 Negative values should index off the end of the sequence. ``-1``
496 498 should return the null revision. ``-2`` should return the most
497 499 recent revision.
498 500 """
499 501
500 502 def __contains__(rev):
501 503 """Whether a revision number exists."""
502 504
503 505 def insert(self, i, entry):
504 506 """Add an item to the index at specific revision."""
505 507
506 508
507 509 class ifileindex(interfaceutil.Interface):
508 510 """Storage interface for index data of a single file.
509 511
510 512 File storage data is divided into index metadata and data storage.
511 513 This interface defines the index portion of the interface.
512 514
513 515 The index logically consists of:
514 516
515 517 * A mapping between revision numbers and nodes.
516 518 * DAG data (storing and querying the relationship between nodes).
517 519 * Metadata to facilitate storage.
518 520 """
519 521
520 522 def __len__():
521 523 """Obtain the number of revisions stored for this file."""
522 524
523 525 def __iter__():
524 526 """Iterate over revision numbers for this file."""
525 527
526 528 def hasnode(node):
527 529 """Returns a bool indicating if a node is known to this store.
528 530
529 531 Implementations must only return True for full, binary node values:
530 532 hex nodes, revision numbers, and partial node matches must be
531 533 rejected.
532 534
533 535 The null node is never present.
534 536 """
535 537
536 538 def revs(start=0, stop=None):
537 539 """Iterate over revision numbers for this file, with control."""
538 540
539 541 def parents(node):
540 542 """Returns a 2-tuple of parent nodes for a revision.
541 543
542 544 Values will be ``nullid`` if the parent is empty.
543 545 """
544 546
545 547 def parentrevs(rev):
546 548 """Like parents() but operates on revision numbers."""
547 549
548 550 def rev(node):
549 551 """Obtain the revision number given a node.
550 552
551 553 Raises ``error.LookupError`` if the node is not known.
552 554 """
553 555
554 556 def node(rev):
555 557 """Obtain the node value given a revision number.
556 558
557 559 Raises ``IndexError`` if the node is not known.
558 560 """
559 561
560 562 def lookup(node):
561 563 """Attempt to resolve a value to a node.
562 564
563 565 Value can be a binary node, hex node, revision number, or a string
564 566 that can be converted to an integer.
565 567
566 568 Raises ``error.LookupError`` if a node could not be resolved.
567 569 """
568 570
569 571 def linkrev(rev):
570 572 """Obtain the changeset revision number a revision is linked to."""
571 573
572 574 def iscensored(rev):
573 575 """Return whether a revision's content has been censored."""
574 576
575 577 def commonancestorsheads(node1, node2):
576 578 """Obtain an iterable of nodes containing heads of common ancestors.
577 579
578 580 See ``ancestor.commonancestorsheads()``.
579 581 """
580 582
581 583 def descendants(revs):
582 584 """Obtain descendant revision numbers for a set of revision numbers.
583 585
584 586 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
585 587 """
586 588
587 589 def heads(start=None, stop=None):
588 590 """Obtain a list of nodes that are DAG heads, with control.
589 591
590 592 The set of revisions examined can be limited by specifying
591 593 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
592 594 iterable of nodes. DAG traversal starts at earlier revision
593 595 ``start`` and iterates forward until any node in ``stop`` is
594 596 encountered.
595 597 """
596 598
597 599 def children(node):
598 600 """Obtain nodes that are children of a node.
599 601
600 602 Returns a list of nodes.
601 603 """
602 604
603 605
604 606 class ifiledata(interfaceutil.Interface):
605 607 """Storage interface for data storage of a specific file.
606 608
607 609 This complements ``ifileindex`` and provides an interface for accessing
608 610 data for a tracked file.
609 611 """
610 612
611 613 def size(rev):
612 614 """Obtain the fulltext size of file data.
613 615
614 616 Any metadata is excluded from size measurements.
615 617 """
616 618
617 619 def revision(node, raw=False):
618 620 """"Obtain fulltext data for a node.
619 621
620 622 By default, any storage transformations are applied before the data
621 623 is returned. If ``raw`` is True, non-raw storage transformations
622 624 are not applied.
623 625
624 626 The fulltext data may contain a header containing metadata. Most
625 627 consumers should use ``read()`` to obtain the actual file data.
626 628 """
627 629
628 630 def rawdata(node):
629 631 """Obtain raw data for a node.
630 632 """
631 633
632 634 def read(node):
633 635 """Resolve file fulltext data.
634 636
635 637 This is similar to ``revision()`` except any metadata in the data
636 638 headers is stripped.
637 639 """
638 640
639 641 def renamed(node):
640 642 """Obtain copy metadata for a node.
641 643
642 644 Returns ``False`` if no copy metadata is stored or a 2-tuple of
643 645 (path, node) from which this revision was copied.
644 646 """
645 647
646 648 def cmp(node, fulltext):
647 649 """Compare fulltext to another revision.
648 650
649 651 Returns True if the fulltext is different from what is stored.
650 652
651 653 This takes copy metadata into account.
652 654
653 655 TODO better document the copy metadata and censoring logic.
654 656 """
655 657
656 658 def emitrevisions(
657 659 nodes,
658 660 nodesorder=None,
659 661 revisiondata=False,
660 662 assumehaveparentrevisions=False,
661 663 deltamode=CG_DELTAMODE_STD,
662 664 ):
663 665 """Produce ``irevisiondelta`` for revisions.
664 666
665 667 Given an iterable of nodes, emits objects conforming to the
666 668 ``irevisiondelta`` interface that describe revisions in storage.
667 669
668 670 This method is a generator.
669 671
670 672 The input nodes may be unordered. Implementations must ensure that a
671 673 node's parents are emitted before the node itself. Transitively, this
672 674 means that a node may only be emitted once all its ancestors in
673 675 ``nodes`` have also been emitted.
674 676
675 677 By default, emits "index" data (the ``node``, ``p1node``, and
676 678 ``p2node`` attributes). If ``revisiondata`` is set, revision data
677 679 will also be present on the emitted objects.
678 680
679 681 With default argument values, implementations can choose to emit
680 682 either fulltext revision data or a delta. When emitting deltas,
681 683 implementations must consider whether the delta's base revision
682 684 fulltext is available to the receiver.
683 685
684 686 The base revision fulltext is guaranteed to be available if any of
685 687 the following are met:
686 688
687 689 * Its fulltext revision was emitted by this method call.
688 690 * A delta for that revision was emitted by this method call.
689 691 * ``assumehaveparentrevisions`` is True and the base revision is a
690 692 parent of the node.
691 693
692 694 ``nodesorder`` can be used to control the order that revisions are
693 695 emitted. By default, revisions can be reordered as long as they are
694 696 in DAG topological order (see above). If the value is ``nodes``,
695 697 the iteration order from ``nodes`` should be used. If the value is
696 698 ``storage``, then the native order from the backing storage layer
697 699 is used. (Not all storage layers will have strong ordering and behavior
698 700 of this mode is storage-dependent.) ``nodes`` ordering can force
699 701 revisions to be emitted before their ancestors, so consumers should
700 702 use it with care.
701 703
702 704 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
703 705 be set and it is the caller's responsibility to resolve it, if needed.
704 706
705 707 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
706 708 all revision data should be emitted as deltas against the revision
707 709 emitted just prior. The initial revision should be a delta against its
708 710 1st parent.
709 711 """
710 712
711 713
712 714 class ifilemutation(interfaceutil.Interface):
713 715 """Storage interface for mutation events of a tracked file."""
714 716
715 717 def add(filedata, meta, transaction, linkrev, p1, p2):
716 718 """Add a new revision to the store.
717 719
718 720 Takes file data, dictionary of metadata, a transaction, linkrev,
719 721 and parent nodes.
720 722
721 723 Returns the node that was added.
722 724
723 725 May no-op if a revision matching the supplied data is already stored.
724 726 """
725 727
726 728 def addrevision(
727 729 revisiondata,
728 730 transaction,
729 731 linkrev,
730 732 p1,
731 733 p2,
732 734 node=None,
733 735 flags=0,
734 736 cachedelta=None,
735 737 ):
736 738 """Add a new revision to the store.
737 739
738 740 This is similar to ``add()`` except it operates at a lower level.
739 741
740 742 The data passed in already contains a metadata header, if any.
741 743
742 744 ``node`` and ``flags`` can be used to define the expected node and
743 745 the flags to use with storage. ``flags`` is a bitwise value composed
744 746 of the various ``REVISION_FLAG_*`` constants.
745 747
746 748 ``add()`` is usually called when adding files from e.g. the working
747 749 directory. ``addrevision()`` is often called by ``add()`` and for
748 750 scenarios where revision data has already been computed, such as when
749 751 applying raw data from a peer repo.
750 752 """
751 753
752 754 def addgroup(
753 755 deltas,
754 756 linkmapper,
755 757 transaction,
756 758 addrevisioncb=None,
757 759 maybemissingparents=False,
758 760 ):
759 761 """Process a series of deltas for storage.
760 762
761 763 ``deltas`` is an iterable of 7-tuples of
762 764 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
763 765 to add.
764 766
765 767 The ``delta`` field contains ``mpatch`` data to apply to a base
766 768 revision, identified by ``deltabase``. The base node can be
767 769 ``nullid``, in which case the header from the delta can be ignored
768 770 and the delta used as the fulltext.
769 771
770 772 ``addrevisioncb`` should be called for each node as it is committed.
771 773
772 774 ``maybemissingparents`` is a bool indicating whether the incoming
773 775 data may reference parents/ancestor revisions that aren't present.
774 776 This flag is set when receiving data into a "shallow" store that
775 777 doesn't hold all history.
776 778
777 779 Returns a list of nodes that were processed. A node will be in the list
778 780 even if it existed in the store previously.
779 781 """
780 782
781 783 def censorrevision(tr, node, tombstone=b''):
782 784 """Remove the content of a single revision.
783 785
784 786 The specified ``node`` will have its content purged from storage.
785 787 Future attempts to access the revision data for this node will
786 788 result in failure.
787 789
788 790 A ``tombstone`` message can optionally be stored. This message may be
789 791 displayed to users when they attempt to access the missing revision
790 792 data.
791 793
792 794 Storage backends may have stored deltas against the previous content
793 795 in this revision. As part of censoring a revision, these storage
794 796 backends are expected to rewrite any internally stored deltas such
795 797 that they no longer reference the deleted content.
796 798 """
797 799
798 800 def getstrippoint(minlink):
799 801 """Find the minimum revision that must be stripped to strip a linkrev.
800 802
801 803 Returns a 2-tuple containing the minimum revision number and a set
802 804 of all revisions numbers that would be broken by this strip.
803 805
804 806 TODO this is highly revlog centric and should be abstracted into
805 807 a higher-level deletion API. ``repair.strip()`` relies on this.
806 808 """
807 809
808 810 def strip(minlink, transaction):
809 811 """Remove storage of items starting at a linkrev.
810 812
811 813 This uses ``getstrippoint()`` to determine the first node to remove.
812 814 Then it effectively truncates storage for all revisions after that.
813 815
814 816 TODO this is highly revlog centric and should be abstracted into a
815 817 higher-level deletion API.
816 818 """
817 819
818 820
819 821 class ifilestorage(ifileindex, ifiledata, ifilemutation):
820 822 """Complete storage interface for a single tracked file."""
821 823
822 824 def files():
823 825 """Obtain paths that are backing storage for this file.
824 826
825 827 TODO this is used heavily by verify code and there should probably
826 828 be a better API for that.
827 829 """
828 830
829 831 def storageinfo(
830 832 exclusivefiles=False,
831 833 sharedfiles=False,
832 834 revisionscount=False,
833 835 trackedsize=False,
834 836 storedsize=False,
835 837 ):
836 838 """Obtain information about storage for this file's data.
837 839
838 840 Returns a dict describing storage for this tracked path. The keys
839 841 in the dict map to arguments of the same. The arguments are bools
840 842 indicating whether to calculate and obtain that data.
841 843
842 844 exclusivefiles
843 845 Iterable of (vfs, path) describing files that are exclusively
844 846 used to back storage for this tracked path.
845 847
846 848 sharedfiles
847 849 Iterable of (vfs, path) describing files that are used to back
848 850 storage for this tracked path. Those files may also provide storage
849 851 for other stored entities.
850 852
851 853 revisionscount
852 854 Number of revisions available for retrieval.
853 855
854 856 trackedsize
855 857 Total size in bytes of all tracked revisions. This is a sum of the
856 858 length of the fulltext of all revisions.
857 859
858 860 storedsize
859 861 Total size in bytes used to store data for all tracked revisions.
860 862 This is commonly less than ``trackedsize`` due to internal usage
861 863 of deltas rather than fulltext revisions.
862 864
863 865 Not all storage backends may support all queries are have a reasonable
864 866 value to use. In that case, the value should be set to ``None`` and
865 867 callers are expected to handle this special value.
866 868 """
867 869
868 870 def verifyintegrity(state):
869 871 """Verifies the integrity of file storage.
870 872
871 873 ``state`` is a dict holding state of the verifier process. It can be
872 874 used to communicate data between invocations of multiple storage
873 875 primitives.
874 876
875 877 If individual revisions cannot have their revision content resolved,
876 878 the method is expected to set the ``skipread`` key to a set of nodes
877 879 that encountered problems. If set, the method can also add the node(s)
878 880 to ``safe_renamed`` in order to indicate nodes that may perform the
879 881 rename checks with currently accessible data.
880 882
881 883 The method yields objects conforming to the ``iverifyproblem``
882 884 interface.
883 885 """
884 886
885 887
886 888 class idirs(interfaceutil.Interface):
887 889 """Interface representing a collection of directories from paths.
888 890
889 891 This interface is essentially a derived data structure representing
890 892 directories from a collection of paths.
891 893 """
892 894
893 895 def addpath(path):
894 896 """Add a path to the collection.
895 897
896 898 All directories in the path will be added to the collection.
897 899 """
898 900
899 901 def delpath(path):
900 902 """Remove a path from the collection.
901 903
902 904 If the removal was the last path in a particular directory, the
903 905 directory is removed from the collection.
904 906 """
905 907
906 908 def __iter__():
907 909 """Iterate over the directories in this collection of paths."""
908 910
909 911 def __contains__(path):
910 912 """Whether a specific directory is in this collection."""
911 913
912 914
913 915 class imanifestdict(interfaceutil.Interface):
914 916 """Interface representing a manifest data structure.
915 917
916 918 A manifest is effectively a dict mapping paths to entries. Each entry
917 919 consists of a binary node and extra flags affecting that entry.
918 920 """
919 921
920 922 def __getitem__(path):
921 923 """Returns the binary node value for a path in the manifest.
922 924
923 925 Raises ``KeyError`` if the path does not exist in the manifest.
924 926
925 927 Equivalent to ``self.find(path)[0]``.
926 928 """
927 929
928 930 def find(path):
929 931 """Returns the entry for a path in the manifest.
930 932
931 933 Returns a 2-tuple of (node, flags).
932 934
933 935 Raises ``KeyError`` if the path does not exist in the manifest.
934 936 """
935 937
936 938 def __len__():
937 939 """Return the number of entries in the manifest."""
938 940
939 941 def __nonzero__():
940 942 """Returns True if the manifest has entries, False otherwise."""
941 943
942 944 __bool__ = __nonzero__
943 945
944 946 def __setitem__(path, node):
945 947 """Define the node value for a path in the manifest.
946 948
947 949 If the path is already in the manifest, its flags will be copied to
948 950 the new entry.
949 951 """
950 952
951 953 def __contains__(path):
952 954 """Whether a path exists in the manifest."""
953 955
954 956 def __delitem__(path):
955 957 """Remove a path from the manifest.
956 958
957 959 Raises ``KeyError`` if the path is not in the manifest.
958 960 """
959 961
960 962 def __iter__():
961 963 """Iterate over paths in the manifest."""
962 964
963 965 def iterkeys():
964 966 """Iterate over paths in the manifest."""
965 967
966 968 def keys():
967 969 """Obtain a list of paths in the manifest."""
968 970
969 971 def filesnotin(other, match=None):
970 972 """Obtain the set of paths in this manifest but not in another.
971 973
972 974 ``match`` is an optional matcher function to be applied to both
973 975 manifests.
974 976
975 977 Returns a set of paths.
976 978 """
977 979
978 980 def dirs():
979 981 """Returns an object implementing the ``idirs`` interface."""
980 982
981 983 def hasdir(dir):
982 984 """Returns a bool indicating if a directory is in this manifest."""
983 985
984 986 def walk(match):
985 987 """Generator of paths in manifest satisfying a matcher.
986 988
987 989 If the matcher has explicit files listed and they don't exist in
988 990 the manifest, ``match.bad()`` is called for each missing file.
989 991 """
990 992
991 993 def diff(other, match=None, clean=False):
992 994 """Find differences between this manifest and another.
993 995
994 996 This manifest is compared to ``other``.
995 997
996 998 If ``match`` is provided, the two manifests are filtered against this
997 999 matcher and only entries satisfying the matcher are compared.
998 1000
999 1001 If ``clean`` is True, unchanged files are included in the returned
1000 1002 object.
1001 1003
1002 1004 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1003 1005 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1004 1006 represents the node and flags for this manifest and ``(node2, flag2)``
1005 1007 are the same for the other manifest.
1006 1008 """
1007 1009
1008 1010 def setflag(path, flag):
1009 1011 """Set the flag value for a given path.
1010 1012
1011 1013 Raises ``KeyError`` if the path is not already in the manifest.
1012 1014 """
1013 1015
1014 1016 def get(path, default=None):
1015 1017 """Obtain the node value for a path or a default value if missing."""
1016 1018
1017 1019 def flags(path):
1018 1020 """Return the flags value for a path (default: empty bytestring)."""
1019 1021
1020 1022 def copy():
1021 1023 """Return a copy of this manifest."""
1022 1024
1023 1025 def items():
1024 1026 """Returns an iterable of (path, node) for items in this manifest."""
1025 1027
1026 1028 def iteritems():
1027 1029 """Identical to items()."""
1028 1030
1029 1031 def iterentries():
1030 1032 """Returns an iterable of (path, node, flags) for this manifest.
1031 1033
1032 1034 Similar to ``iteritems()`` except items are a 3-tuple and include
1033 1035 flags.
1034 1036 """
1035 1037
1036 1038 def text():
1037 1039 """Obtain the raw data representation for this manifest.
1038 1040
1039 1041 Result is used to create a manifest revision.
1040 1042 """
1041 1043
1042 1044 def fastdelta(base, changes):
1043 1045 """Obtain a delta between this manifest and another given changes.
1044 1046
1045 1047 ``base`` in the raw data representation for another manifest.
1046 1048
1047 1049 ``changes`` is an iterable of ``(path, to_delete)``.
1048 1050
1049 1051 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1050 1052 delta between ``base`` and this manifest.
1051 1053
1052 1054 If this manifest implementation can't support ``fastdelta()``,
1053 1055 raise ``mercurial.manifest.FastdeltaUnavailable``.
1054 1056 """
1055 1057
1056 1058
1057 1059 class imanifestrevisionbase(interfaceutil.Interface):
1058 1060 """Base interface representing a single revision of a manifest.
1059 1061
1060 1062 Should not be used as a primary interface: should always be inherited
1061 1063 as part of a larger interface.
1062 1064 """
1063 1065
1064 1066 def copy():
1065 1067 """Obtain a copy of this manifest instance.
1066 1068
1067 1069 Returns an object conforming to the ``imanifestrevisionwritable``
1068 1070 interface. The instance will be associated with the same
1069 1071 ``imanifestlog`` collection as this instance.
1070 1072 """
1071 1073
1072 1074 def read():
1073 1075 """Obtain the parsed manifest data structure.
1074 1076
1075 1077 The returned object conforms to the ``imanifestdict`` interface.
1076 1078 """
1077 1079
1078 1080
1079 1081 class imanifestrevisionstored(imanifestrevisionbase):
1080 1082 """Interface representing a manifest revision committed to storage."""
1081 1083
1082 1084 def node():
1083 1085 """The binary node for this manifest."""
1084 1086
1085 1087 parents = interfaceutil.Attribute(
1086 1088 """List of binary nodes that are parents for this manifest revision."""
1087 1089 )
1088 1090
1089 1091 def readdelta(shallow=False):
1090 1092 """Obtain the manifest data structure representing changes from parent.
1091 1093
1092 1094 This manifest is compared to its 1st parent. A new manifest representing
1093 1095 those differences is constructed.
1094 1096
1095 1097 The returned object conforms to the ``imanifestdict`` interface.
1096 1098 """
1097 1099
1098 1100 def readfast(shallow=False):
1099 1101 """Calls either ``read()`` or ``readdelta()``.
1100 1102
1101 1103 The faster of the two options is called.
1102 1104 """
1103 1105
1104 1106 def find(key):
1105 1107 """Calls self.read().find(key)``.
1106 1108
1107 1109 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1108 1110 """
1109 1111
1110 1112
1111 1113 class imanifestrevisionwritable(imanifestrevisionbase):
1112 1114 """Interface representing a manifest revision that can be committed."""
1113 1115
1114 1116 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1115 1117 """Add this revision to storage.
1116 1118
1117 1119 Takes a transaction object, the changeset revision number it will
1118 1120 be associated with, its parent nodes, and lists of added and
1119 1121 removed paths.
1120 1122
1121 1123 If match is provided, storage can choose not to inspect or write out
1122 1124 items that do not match. Storage is still required to be able to provide
1123 1125 the full manifest in the future for any directories written (these
1124 1126 manifests should not be "narrowed on disk").
1125 1127
1126 1128 Returns the binary node of the created revision.
1127 1129 """
1128 1130
1129 1131
1130 1132 class imanifeststorage(interfaceutil.Interface):
1131 1133 """Storage interface for manifest data."""
1132 1134
1133 1135 tree = interfaceutil.Attribute(
1134 1136 """The path to the directory this manifest tracks.
1135 1137
1136 1138 The empty bytestring represents the root manifest.
1137 1139 """
1138 1140 )
1139 1141
1140 1142 index = interfaceutil.Attribute(
1141 1143 """An ``ifilerevisionssequence`` instance."""
1142 1144 )
1143 1145
1144 1146 indexfile = interfaceutil.Attribute(
1145 1147 """Path of revlog index file.
1146 1148
1147 1149 TODO this is revlog specific and should not be exposed.
1148 1150 """
1149 1151 )
1150 1152
1151 1153 opener = interfaceutil.Attribute(
1152 1154 """VFS opener to use to access underlying files used for storage.
1153 1155
1154 1156 TODO this is revlog specific and should not be exposed.
1155 1157 """
1156 1158 )
1157 1159
1158 1160 version = interfaceutil.Attribute(
1159 1161 """Revlog version number.
1160 1162
1161 1163 TODO this is revlog specific and should not be exposed.
1162 1164 """
1163 1165 )
1164 1166
1165 1167 _generaldelta = interfaceutil.Attribute(
1166 1168 """Whether generaldelta storage is being used.
1167 1169
1168 1170 TODO this is revlog specific and should not be exposed.
1169 1171 """
1170 1172 )
1171 1173
1172 1174 fulltextcache = interfaceutil.Attribute(
1173 1175 """Dict with cache of fulltexts.
1174 1176
1175 1177 TODO this doesn't feel appropriate for the storage interface.
1176 1178 """
1177 1179 )
1178 1180
1179 1181 def __len__():
1180 1182 """Obtain the number of revisions stored for this manifest."""
1181 1183
1182 1184 def __iter__():
1183 1185 """Iterate over revision numbers for this manifest."""
1184 1186
1185 1187 def rev(node):
1186 1188 """Obtain the revision number given a binary node.
1187 1189
1188 1190 Raises ``error.LookupError`` if the node is not known.
1189 1191 """
1190 1192
1191 1193 def node(rev):
1192 1194 """Obtain the node value given a revision number.
1193 1195
1194 1196 Raises ``error.LookupError`` if the revision is not known.
1195 1197 """
1196 1198
1197 1199 def lookup(value):
1198 1200 """Attempt to resolve a value to a node.
1199 1201
1200 1202 Value can be a binary node, hex node, revision number, or a bytes
1201 1203 that can be converted to an integer.
1202 1204
1203 1205 Raises ``error.LookupError`` if a ndoe could not be resolved.
1204 1206 """
1205 1207
1206 1208 def parents(node):
1207 1209 """Returns a 2-tuple of parent nodes for a node.
1208 1210
1209 1211 Values will be ``nullid`` if the parent is empty.
1210 1212 """
1211 1213
1212 1214 def parentrevs(rev):
1213 1215 """Like parents() but operates on revision numbers."""
1214 1216
1215 1217 def linkrev(rev):
1216 1218 """Obtain the changeset revision number a revision is linked to."""
1217 1219
1218 1220 def revision(node, _df=None, raw=False):
1219 1221 """Obtain fulltext data for a node."""
1220 1222
1221 1223 def rawdata(node, _df=None):
1222 1224 """Obtain raw data for a node."""
1223 1225
1224 1226 def revdiff(rev1, rev2):
1225 1227 """Obtain a delta between two revision numbers.
1226 1228
1227 1229 The returned data is the result of ``bdiff.bdiff()`` on the raw
1228 1230 revision data.
1229 1231 """
1230 1232
1231 1233 def cmp(node, fulltext):
1232 1234 """Compare fulltext to another revision.
1233 1235
1234 1236 Returns True if the fulltext is different from what is stored.
1235 1237 """
1236 1238
1237 1239 def emitrevisions(
1238 1240 nodes,
1239 1241 nodesorder=None,
1240 1242 revisiondata=False,
1241 1243 assumehaveparentrevisions=False,
1242 1244 ):
1243 1245 """Produce ``irevisiondelta`` describing revisions.
1244 1246
1245 1247 See the documentation for ``ifiledata`` for more.
1246 1248 """
1247 1249
1248 1250 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1249 1251 """Process a series of deltas for storage.
1250 1252
1251 1253 See the documentation in ``ifilemutation`` for more.
1252 1254 """
1253 1255
1254 1256 def rawsize(rev):
1255 1257 """Obtain the size of tracked data.
1256 1258
1257 1259 Is equivalent to ``len(m.rawdata(node))``.
1258 1260
1259 1261 TODO this method is only used by upgrade code and may be removed.
1260 1262 """
1261 1263
1262 1264 def getstrippoint(minlink):
1263 1265 """Find minimum revision that must be stripped to strip a linkrev.
1264 1266
1265 1267 See the documentation in ``ifilemutation`` for more.
1266 1268 """
1267 1269
1268 1270 def strip(minlink, transaction):
1269 1271 """Remove storage of items starting at a linkrev.
1270 1272
1271 1273 See the documentation in ``ifilemutation`` for more.
1272 1274 """
1273 1275
1274 1276 def checksize():
1275 1277 """Obtain the expected sizes of backing files.
1276 1278
1277 1279 TODO this is used by verify and it should not be part of the interface.
1278 1280 """
1279 1281
1280 1282 def files():
1281 1283 """Obtain paths that are backing storage for this manifest.
1282 1284
1283 1285 TODO this is used by verify and there should probably be a better API
1284 1286 for this functionality.
1285 1287 """
1286 1288
1287 1289 def deltaparent(rev):
1288 1290 """Obtain the revision that a revision is delta'd against.
1289 1291
1290 1292 TODO delta encoding is an implementation detail of storage and should
1291 1293 not be exposed to the storage interface.
1292 1294 """
1293 1295
1294 1296 def clone(tr, dest, **kwargs):
1295 1297 """Clone this instance to another."""
1296 1298
1297 1299 def clearcaches(clear_persisted_data=False):
1298 1300 """Clear any caches associated with this instance."""
1299 1301
1300 1302 def dirlog(d):
1301 1303 """Obtain a manifest storage instance for a tree."""
1302 1304
1303 1305 def add(
1304 1306 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1305 1307 ):
1306 1308 """Add a revision to storage.
1307 1309
1308 1310 ``m`` is an object conforming to ``imanifestdict``.
1309 1311
1310 1312 ``link`` is the linkrev revision number.
1311 1313
1312 1314 ``p1`` and ``p2`` are the parent revision numbers.
1313 1315
1314 1316 ``added`` and ``removed`` are iterables of added and removed paths,
1315 1317 respectively.
1316 1318
1317 1319 ``readtree`` is a function that can be used to read the child tree(s)
1318 1320 when recursively writing the full tree structure when using
1319 1321 treemanifets.
1320 1322
1321 1323 ``match`` is a matcher that can be used to hint to storage that not all
1322 1324 paths must be inspected; this is an optimization and can be safely
1323 1325 ignored. Note that the storage must still be able to reproduce a full
1324 1326 manifest including files that did not match.
1325 1327 """
1326 1328
1327 1329 def storageinfo(
1328 1330 exclusivefiles=False,
1329 1331 sharedfiles=False,
1330 1332 revisionscount=False,
1331 1333 trackedsize=False,
1332 1334 storedsize=False,
1333 1335 ):
1334 1336 """Obtain information about storage for this manifest's data.
1335 1337
1336 1338 See ``ifilestorage.storageinfo()`` for a description of this method.
1337 1339 This one behaves the same way, except for manifest data.
1338 1340 """
1339 1341
1340 1342
1341 1343 class imanifestlog(interfaceutil.Interface):
1342 1344 """Interface representing a collection of manifest snapshots.
1343 1345
1344 1346 Represents the root manifest in a repository.
1345 1347
1346 1348 Also serves as a means to access nested tree manifests and to cache
1347 1349 tree manifests.
1348 1350 """
1349 1351
1350 1352 def __getitem__(node):
1351 1353 """Obtain a manifest instance for a given binary node.
1352 1354
1353 1355 Equivalent to calling ``self.get('', node)``.
1354 1356
1355 1357 The returned object conforms to the ``imanifestrevisionstored``
1356 1358 interface.
1357 1359 """
1358 1360
1359 1361 def get(tree, node, verify=True):
1360 1362 """Retrieve the manifest instance for a given directory and binary node.
1361 1363
1362 1364 ``node`` always refers to the node of the root manifest (which will be
1363 1365 the only manifest if flat manifests are being used).
1364 1366
1365 1367 If ``tree`` is the empty string, the root manifest is returned.
1366 1368 Otherwise the manifest for the specified directory will be returned
1367 1369 (requires tree manifests).
1368 1370
1369 1371 If ``verify`` is True, ``LookupError`` is raised if the node is not
1370 1372 known.
1371 1373
1372 1374 The returned object conforms to the ``imanifestrevisionstored``
1373 1375 interface.
1374 1376 """
1375 1377
1376 1378 def getstorage(tree):
1377 1379 """Retrieve an interface to storage for a particular tree.
1378 1380
1379 1381 If ``tree`` is the empty bytestring, storage for the root manifest will
1380 1382 be returned. Otherwise storage for a tree manifest is returned.
1381 1383
1382 1384 TODO formalize interface for returned object.
1383 1385 """
1384 1386
1385 1387 def clearcaches():
1386 1388 """Clear caches associated with this collection."""
1387 1389
1388 1390 def rev(node):
1389 1391 """Obtain the revision number for a binary node.
1390 1392
1391 1393 Raises ``error.LookupError`` if the node is not known.
1392 1394 """
1393 1395
1394 1396 def update_caches(transaction):
1395 1397 """update whatever cache are relevant for the used storage."""
1396 1398
1397 1399
1398 1400 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1399 1401 """Local repository sub-interface providing access to tracked file storage.
1400 1402
1401 1403 This interface defines how a repository accesses storage for a single
1402 1404 tracked file path.
1403 1405 """
1404 1406
1405 1407 def file(f):
1406 1408 """Obtain a filelog for a tracked path.
1407 1409
1408 1410 The returned type conforms to the ``ifilestorage`` interface.
1409 1411 """
1410 1412
1411 1413
1412 1414 class ilocalrepositorymain(interfaceutil.Interface):
1413 1415 """Main interface for local repositories.
1414 1416
1415 1417 This currently captures the reality of things - not how things should be.
1416 1418 """
1417 1419
1418 1420 supportedformats = interfaceutil.Attribute(
1419 1421 """Set of requirements that apply to stream clone.
1420 1422
1421 1423 This is actually a class attribute and is shared among all instances.
1422 1424 """
1423 1425 )
1424 1426
1425 1427 supported = interfaceutil.Attribute(
1426 1428 """Set of requirements that this repo is capable of opening."""
1427 1429 )
1428 1430
1429 1431 requirements = interfaceutil.Attribute(
1430 1432 """Set of requirements this repo uses."""
1431 1433 )
1432 1434
1433 1435 features = interfaceutil.Attribute(
1434 1436 """Set of "features" this repository supports.
1435 1437
1436 1438 A "feature" is a loosely-defined term. It can refer to a feature
1437 1439 in the classical sense or can describe an implementation detail
1438 1440 of the repository. For example, a ``readonly`` feature may denote
1439 1441 the repository as read-only. Or a ``revlogfilestore`` feature may
1440 1442 denote that the repository is using revlogs for file storage.
1441 1443
1442 1444 The intent of features is to provide a machine-queryable mechanism
1443 1445 for repo consumers to test for various repository characteristics.
1444 1446
1445 1447 Features are similar to ``requirements``. The main difference is that
1446 1448 requirements are stored on-disk and represent requirements to open the
1447 1449 repository. Features are more run-time capabilities of the repository
1448 1450 and more granular capabilities (which may be derived from requirements).
1449 1451 """
1450 1452 )
1451 1453
1452 1454 filtername = interfaceutil.Attribute(
1453 1455 """Name of the repoview that is active on this repo."""
1454 1456 )
1455 1457
1456 1458 wvfs = interfaceutil.Attribute(
1457 1459 """VFS used to access the working directory."""
1458 1460 )
1459 1461
1460 1462 vfs = interfaceutil.Attribute(
1461 1463 """VFS rooted at the .hg directory.
1462 1464
1463 1465 Used to access repository data not in the store.
1464 1466 """
1465 1467 )
1466 1468
1467 1469 svfs = interfaceutil.Attribute(
1468 1470 """VFS rooted at the store.
1469 1471
1470 1472 Used to access repository data in the store. Typically .hg/store.
1471 1473 But can point elsewhere if the store is shared.
1472 1474 """
1473 1475 )
1474 1476
1475 1477 root = interfaceutil.Attribute(
1476 1478 """Path to the root of the working directory."""
1477 1479 )
1478 1480
1479 1481 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1480 1482
1481 1483 origroot = interfaceutil.Attribute(
1482 1484 """The filesystem path that was used to construct the repo."""
1483 1485 )
1484 1486
1485 1487 auditor = interfaceutil.Attribute(
1486 1488 """A pathauditor for the working directory.
1487 1489
1488 1490 This checks if a path refers to a nested repository.
1489 1491
1490 1492 Operates on the filesystem.
1491 1493 """
1492 1494 )
1493 1495
1494 1496 nofsauditor = interfaceutil.Attribute(
1495 1497 """A pathauditor for the working directory.
1496 1498
1497 1499 This is like ``auditor`` except it doesn't do filesystem checks.
1498 1500 """
1499 1501 )
1500 1502
1501 1503 baseui = interfaceutil.Attribute(
1502 1504 """Original ui instance passed into constructor."""
1503 1505 )
1504 1506
1505 1507 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1506 1508
1507 1509 sharedpath = interfaceutil.Attribute(
1508 1510 """Path to the .hg directory of the repo this repo was shared from."""
1509 1511 )
1510 1512
1511 1513 store = interfaceutil.Attribute("""A store instance.""")
1512 1514
1513 1515 spath = interfaceutil.Attribute("""Path to the store.""")
1514 1516
1515 1517 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1516 1518
1517 1519 cachevfs = interfaceutil.Attribute(
1518 1520 """A VFS used to access the cache directory.
1519 1521
1520 1522 Typically .hg/cache.
1521 1523 """
1522 1524 )
1523 1525
1524 1526 wcachevfs = interfaceutil.Attribute(
1525 1527 """A VFS used to access the cache directory dedicated to working copy
1526 1528
1527 1529 Typically .hg/wcache.
1528 1530 """
1529 1531 )
1530 1532
1531 1533 filteredrevcache = interfaceutil.Attribute(
1532 1534 """Holds sets of revisions to be filtered."""
1533 1535 )
1534 1536
1535 1537 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1536 1538
1537 1539 filecopiesmode = interfaceutil.Attribute(
1538 1540 """The way files copies should be dealt with in this repo."""
1539 1541 )
1540 1542
1541 1543 def close():
1542 1544 """Close the handle on this repository."""
1543 1545
1544 1546 def peer():
1545 1547 """Obtain an object conforming to the ``peer`` interface."""
1546 1548
1547 1549 def unfiltered():
1548 1550 """Obtain an unfiltered/raw view of this repo."""
1549 1551
1550 1552 def filtered(name, visibilityexceptions=None):
1551 1553 """Obtain a named view of this repository."""
1552 1554
1553 1555 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1554 1556
1555 1557 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1556 1558
1557 1559 manifestlog = interfaceutil.Attribute(
1558 1560 """An instance conforming to the ``imanifestlog`` interface.
1559 1561
1560 1562 Provides access to manifests for the repository.
1561 1563 """
1562 1564 )
1563 1565
1564 1566 dirstate = interfaceutil.Attribute("""Working directory state.""")
1565 1567
1566 1568 narrowpats = interfaceutil.Attribute(
1567 1569 """Matcher patterns for this repository's narrowspec."""
1568 1570 )
1569 1571
1570 1572 def narrowmatch(match=None, includeexact=False):
1571 1573 """Obtain a matcher for the narrowspec."""
1572 1574
1573 1575 def setnarrowpats(newincludes, newexcludes):
1574 1576 """Define the narrowspec for this repository."""
1575 1577
1576 1578 def __getitem__(changeid):
1577 1579 """Try to resolve a changectx."""
1578 1580
1579 1581 def __contains__(changeid):
1580 1582 """Whether a changeset exists."""
1581 1583
1582 1584 def __nonzero__():
1583 1585 """Always returns True."""
1584 1586 return True
1585 1587
1586 1588 __bool__ = __nonzero__
1587 1589
1588 1590 def __len__():
1589 1591 """Returns the number of changesets in the repo."""
1590 1592
1591 1593 def __iter__():
1592 1594 """Iterate over revisions in the changelog."""
1593 1595
1594 1596 def revs(expr, *args):
1595 1597 """Evaluate a revset.
1596 1598
1597 1599 Emits revisions.
1598 1600 """
1599 1601
1600 1602 def set(expr, *args):
1601 1603 """Evaluate a revset.
1602 1604
1603 1605 Emits changectx instances.
1604 1606 """
1605 1607
1606 1608 def anyrevs(specs, user=False, localalias=None):
1607 1609 """Find revisions matching one of the given revsets."""
1608 1610
1609 1611 def url():
1610 1612 """Returns a string representing the location of this repo."""
1611 1613
1612 1614 def hook(name, throw=False, **args):
1613 1615 """Call a hook."""
1614 1616
1615 1617 def tags():
1616 1618 """Return a mapping of tag to node."""
1617 1619
1618 1620 def tagtype(tagname):
1619 1621 """Return the type of a given tag."""
1620 1622
1621 1623 def tagslist():
1622 1624 """Return a list of tags ordered by revision."""
1623 1625
1624 1626 def nodetags(node):
1625 1627 """Return the tags associated with a node."""
1626 1628
1627 1629 def nodebookmarks(node):
1628 1630 """Return the list of bookmarks pointing to the specified node."""
1629 1631
1630 1632 def branchmap():
1631 1633 """Return a mapping of branch to heads in that branch."""
1632 1634
1633 1635 def revbranchcache():
1634 1636 pass
1635 1637
1636 1638 def branchtip(branchtip, ignoremissing=False):
1637 1639 """Return the tip node for a given branch."""
1638 1640
1639 1641 def lookup(key):
1640 1642 """Resolve the node for a revision."""
1641 1643
1642 1644 def lookupbranch(key):
1643 1645 """Look up the branch name of the given revision or branch name."""
1644 1646
1645 1647 def known(nodes):
1646 1648 """Determine whether a series of nodes is known.
1647 1649
1648 1650 Returns a list of bools.
1649 1651 """
1650 1652
1651 1653 def local():
1652 1654 """Whether the repository is local."""
1653 1655 return True
1654 1656
1655 1657 def publishing():
1656 1658 """Whether the repository is a publishing repository."""
1657 1659
1658 1660 def cancopy():
1659 1661 pass
1660 1662
1661 1663 def shared():
1662 1664 """The type of shared repository or None."""
1663 1665
1664 1666 def wjoin(f, *insidef):
1665 1667 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1666 1668
1667 1669 def setparents(p1, p2):
1668 1670 """Set the parent nodes of the working directory."""
1669 1671
1670 1672 def filectx(path, changeid=None, fileid=None):
1671 1673 """Obtain a filectx for the given file revision."""
1672 1674
1673 1675 def getcwd():
1674 1676 """Obtain the current working directory from the dirstate."""
1675 1677
1676 1678 def pathto(f, cwd=None):
1677 1679 """Obtain the relative path to a file."""
1678 1680
1679 1681 def adddatafilter(name, fltr):
1680 1682 pass
1681 1683
1682 1684 def wread(filename):
1683 1685 """Read a file from wvfs, using data filters."""
1684 1686
1685 1687 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1686 1688 """Write data to a file in the wvfs, using data filters."""
1687 1689
1688 1690 def wwritedata(filename, data):
1689 1691 """Resolve data for writing to the wvfs, using data filters."""
1690 1692
1691 1693 def currenttransaction():
1692 1694 """Obtain the current transaction instance or None."""
1693 1695
1694 1696 def transaction(desc, report=None):
1695 1697 """Open a new transaction to write to the repository."""
1696 1698
1697 1699 def undofiles():
1698 1700 """Returns a list of (vfs, path) for files to undo transactions."""
1699 1701
1700 1702 def recover():
1701 1703 """Roll back an interrupted transaction."""
1702 1704
1703 1705 def rollback(dryrun=False, force=False):
1704 1706 """Undo the last transaction.
1705 1707
1706 1708 DANGEROUS.
1707 1709 """
1708 1710
1709 1711 def updatecaches(tr=None, full=False):
1710 1712 """Warm repo caches."""
1711 1713
1712 1714 def invalidatecaches():
1713 1715 """Invalidate cached data due to the repository mutating."""
1714 1716
1715 1717 def invalidatevolatilesets():
1716 1718 pass
1717 1719
1718 1720 def invalidatedirstate():
1719 1721 """Invalidate the dirstate."""
1720 1722
1721 1723 def invalidate(clearfilecache=False):
1722 1724 pass
1723 1725
1724 1726 def invalidateall():
1725 1727 pass
1726 1728
1727 1729 def lock(wait=True):
1728 1730 """Lock the repository store and return a lock instance."""
1729 1731
1730 1732 def wlock(wait=True):
1731 1733 """Lock the non-store parts of the repository."""
1732 1734
1733 1735 def currentwlock():
1734 1736 """Return the wlock if it's held or None."""
1735 1737
1736 1738 def checkcommitpatterns(wctx, match, status, fail):
1737 1739 pass
1738 1740
1739 1741 def commit(
1740 1742 text=b'',
1741 1743 user=None,
1742 1744 date=None,
1743 1745 match=None,
1744 1746 force=False,
1745 1747 editor=False,
1746 1748 extra=None,
1747 1749 ):
1748 1750 """Add a new revision to the repository."""
1749 1751
1750 1752 def commitctx(ctx, error=False, origctx=None):
1751 1753 """Commit a commitctx instance to the repository."""
1752 1754
1753 1755 def destroying():
1754 1756 """Inform the repository that nodes are about to be destroyed."""
1755 1757
1756 1758 def destroyed():
1757 1759 """Inform the repository that nodes have been destroyed."""
1758 1760
1759 1761 def status(
1760 1762 node1=b'.',
1761 1763 node2=None,
1762 1764 match=None,
1763 1765 ignored=False,
1764 1766 clean=False,
1765 1767 unknown=False,
1766 1768 listsubrepos=False,
1767 1769 ):
1768 1770 """Convenience method to call repo[x].status()."""
1769 1771
1770 1772 def addpostdsstatus(ps):
1771 1773 pass
1772 1774
1773 1775 def postdsstatus():
1774 1776 pass
1775 1777
1776 1778 def clearpostdsstatus():
1777 1779 pass
1778 1780
1779 1781 def heads(start=None):
1780 1782 """Obtain list of nodes that are DAG heads."""
1781 1783
1782 1784 def branchheads(branch=None, start=None, closed=False):
1783 1785 pass
1784 1786
1785 1787 def branches(nodes):
1786 1788 pass
1787 1789
1788 1790 def between(pairs):
1789 1791 pass
1790 1792
1791 1793 def checkpush(pushop):
1792 1794 pass
1793 1795
1794 1796 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1795 1797
1796 1798 def pushkey(namespace, key, old, new):
1797 1799 pass
1798 1800
1799 1801 def listkeys(namespace):
1800 1802 pass
1801 1803
1802 1804 def debugwireargs(one, two, three=None, four=None, five=None):
1803 1805 pass
1804 1806
1805 1807 def savecommitmessage(text):
1806 1808 pass
1807 1809
1808 1810
1809 1811 class completelocalrepository(
1810 1812 ilocalrepositorymain, ilocalrepositoryfilestorage
1811 1813 ):
1812 1814 """Complete interface for a local repository."""
1813 1815
1814 1816
1815 1817 class iwireprotocolcommandcacher(interfaceutil.Interface):
1816 1818 """Represents a caching backend for wire protocol commands.
1817 1819
1818 1820 Wire protocol version 2 supports transparent caching of many commands.
1819 1821 To leverage this caching, servers can activate objects that cache
1820 1822 command responses. Objects handle both cache writing and reading.
1821 1823 This interface defines how that response caching mechanism works.
1822 1824
1823 1825 Wire protocol version 2 commands emit a series of objects that are
1824 1826 serialized and sent to the client. The caching layer exists between
1825 1827 the invocation of the command function and the sending of its output
1826 1828 objects to an output layer.
1827 1829
1828 1830 Instances of this interface represent a binding to a cache that
1829 1831 can serve a response (in place of calling a command function) and/or
1830 1832 write responses to a cache for subsequent use.
1831 1833
1832 1834 When a command request arrives, the following happens with regards
1833 1835 to this interface:
1834 1836
1835 1837 1. The server determines whether the command request is cacheable.
1836 1838 2. If it is, an instance of this interface is spawned.
1837 1839 3. The cacher is activated in a context manager (``__enter__`` is called).
1838 1840 4. A cache *key* for that request is derived. This will call the
1839 1841 instance's ``adjustcachekeystate()`` method so the derivation
1840 1842 can be influenced.
1841 1843 5. The cacher is informed of the derived cache key via a call to
1842 1844 ``setcachekey()``.
1843 1845 6. The cacher's ``lookup()`` method is called to test for presence of
1844 1846 the derived key in the cache.
1845 1847 7. If ``lookup()`` returns a hit, that cached result is used in place
1846 1848 of invoking the command function. ``__exit__`` is called and the instance
1847 1849 is discarded.
1848 1850 8. The command function is invoked.
1849 1851 9. ``onobject()`` is called for each object emitted by the command
1850 1852 function.
1851 1853 10. After the final object is seen, ``onfinished()`` is called.
1852 1854 11. ``__exit__`` is called to signal the end of use of the instance.
1853 1855
1854 1856 Cache *key* derivation can be influenced by the instance.
1855 1857
1856 1858 Cache keys are initially derived by a deterministic representation of
1857 1859 the command request. This includes the command name, arguments, protocol
1858 1860 version, etc. This initial key derivation is performed by CBOR-encoding a
1859 1861 data structure and feeding that output into a hasher.
1860 1862
1861 1863 Instances of this interface can influence this initial key derivation
1862 1864 via ``adjustcachekeystate()``.
1863 1865
1864 1866 The instance is informed of the derived cache key via a call to
1865 1867 ``setcachekey()``. The instance must store the key locally so it can
1866 1868 be consulted on subsequent operations that may require it.
1867 1869
1868 1870 When constructed, the instance has access to a callable that can be used
1869 1871 for encoding response objects. This callable receives as its single
1870 1872 argument an object emitted by a command function. It returns an iterable
1871 1873 of bytes chunks representing the encoded object. Unless the cacher is
1872 1874 caching native Python objects in memory or has a way of reconstructing
1873 1875 the original Python objects, implementations typically call this function
1874 1876 to produce bytes from the output objects and then store those bytes in
1875 1877 the cache. When it comes time to re-emit those bytes, they are wrapped
1876 1878 in a ``wireprototypes.encodedresponse`` instance to tell the output
1877 1879 layer that they are pre-encoded.
1878 1880
1879 1881 When receiving the objects emitted by the command function, instances
1880 1882 can choose what to do with those objects. The simplest thing to do is
1881 1883 re-emit the original objects. They will be forwarded to the output
1882 1884 layer and will be processed as if the cacher did not exist.
1883 1885
1884 1886 Implementations could also choose to not emit objects - instead locally
1885 1887 buffering objects or their encoded representation. They could then emit
1886 1888 a single "coalesced" object when ``onfinished()`` is called. In
1887 1889 this way, the implementation would function as a filtering layer of
1888 1890 sorts.
1889 1891
1890 1892 When caching objects, typically the encoded form of the object will
1891 1893 be stored. Keep in mind that if the original object is forwarded to
1892 1894 the output layer, it will need to be encoded there as well. For large
1893 1895 output, this redundant encoding could add overhead. Implementations
1894 1896 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1895 1897 instances to avoid this overhead.
1896 1898 """
1897 1899
1898 1900 def __enter__():
1899 1901 """Marks the instance as active.
1900 1902
1901 1903 Should return self.
1902 1904 """
1903 1905
1904 1906 def __exit__(exctype, excvalue, exctb):
1905 1907 """Called when cacher is no longer used.
1906 1908
1907 1909 This can be used by implementations to perform cleanup actions (e.g.
1908 1910 disconnecting network sockets, aborting a partially cached response.
1909 1911 """
1910 1912
1911 1913 def adjustcachekeystate(state):
1912 1914 """Influences cache key derivation by adjusting state to derive key.
1913 1915
1914 1916 A dict defining the state used to derive the cache key is passed.
1915 1917
1916 1918 Implementations can modify this dict to record additional state that
1917 1919 is wanted to influence key derivation.
1918 1920
1919 1921 Implementations are *highly* encouraged to not modify or delete
1920 1922 existing keys.
1921 1923 """
1922 1924
1923 1925 def setcachekey(key):
1924 1926 """Record the derived cache key for this request.
1925 1927
1926 1928 Instances may mutate the key for internal usage, as desired. e.g.
1927 1929 instances may wish to prepend the repo name, introduce path
1928 1930 components for filesystem or URL addressing, etc. Behavior is up to
1929 1931 the cache.
1930 1932
1931 1933 Returns a bool indicating if the request is cacheable by this
1932 1934 instance.
1933 1935 """
1934 1936
1935 1937 def lookup():
1936 1938 """Attempt to resolve an entry in the cache.
1937 1939
1938 1940 The instance is instructed to look for the cache key that it was
1939 1941 informed about via the call to ``setcachekey()``.
1940 1942
1941 1943 If there's no cache hit or the cacher doesn't wish to use the cached
1942 1944 entry, ``None`` should be returned.
1943 1945
1944 1946 Else, a dict defining the cached result should be returned. The
1945 1947 dict may have the following keys:
1946 1948
1947 1949 objs
1948 1950 An iterable of objects that should be sent to the client. That
1949 1951 iterable of objects is expected to be what the command function
1950 1952 would return if invoked or an equivalent representation thereof.
1951 1953 """
1952 1954
1953 1955 def onobject(obj):
1954 1956 """Called when a new object is emitted from the command function.
1955 1957
1956 1958 Receives as its argument the object that was emitted from the
1957 1959 command function.
1958 1960
1959 1961 This method returns an iterator of objects to forward to the output
1960 1962 layer. The easiest implementation is a generator that just
1961 1963 ``yield obj``.
1962 1964 """
1963 1965
1964 1966 def onfinished():
1965 1967 """Called after all objects have been emitted from the command function.
1966 1968
1967 1969 Implementations should return an iterator of objects to forward to
1968 1970 the output layer.
1969 1971
1970 1972 This method can be a generator.
1971 1973 """
@@ -1,3068 +1,3070 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 REVIDX_HASCOPIESINFO,
56 57 REVIDX_ISCENSORED,
57 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 59 REVIDX_SIDEDATA,
59 60 )
60 61 from .thirdparty import attr
61 62 from . import (
62 63 ancestor,
63 64 dagop,
64 65 error,
65 66 mdiff,
66 67 policy,
67 68 pycompat,
68 69 templatefilters,
69 70 util,
70 71 )
71 72 from .interfaces import (
72 73 repository,
73 74 util as interfaceutil,
74 75 )
75 76 from .revlogutils import (
76 77 deltas as deltautil,
77 78 flagutil,
78 79 nodemap as nodemaputil,
79 80 sidedata as sidedatautil,
80 81 )
81 82 from .utils import (
82 83 storageutil,
83 84 stringutil,
84 85 )
85 86
86 87 # blanked usage of all the name to prevent pyflakes constraints
87 88 # We need these name available in the module for extensions.
88 89 REVLOGV0
89 90 REVLOGV1
90 91 REVLOGV2
91 92 FLAG_INLINE_DATA
92 93 FLAG_GENERALDELTA
93 94 REVLOG_DEFAULT_FLAGS
94 95 REVLOG_DEFAULT_FORMAT
95 96 REVLOG_DEFAULT_VERSION
96 97 REVLOGV1_FLAGS
97 98 REVLOGV2_FLAGS
98 99 REVIDX_ISCENSORED
99 100 REVIDX_ELLIPSIS
100 101 REVIDX_SIDEDATA
102 REVIDX_HASCOPIESINFO
101 103 REVIDX_EXTSTORED
102 104 REVIDX_DEFAULT_FLAGS
103 105 REVIDX_FLAGS_ORDER
104 106 REVIDX_RAWTEXT_CHANGING_FLAGS
105 107
106 108 parsers = policy.importmod('parsers')
107 109 rustancestor = policy.importrust('ancestor')
108 110 rustdagop = policy.importrust('dagop')
109 111 rustrevlog = policy.importrust('revlog')
110 112
111 113 # Aliased for performance.
112 114 _zlibdecompress = zlib.decompress
113 115
114 116 # max size of revlog with inline data
115 117 _maxinline = 131072
116 118 _chunksize = 1048576
117 119
118 120 # Flag processors for REVIDX_ELLIPSIS.
119 121 def ellipsisreadprocessor(rl, text):
120 122 return text, False, {}
121 123
122 124
123 125 def ellipsiswriteprocessor(rl, text, sidedata):
124 126 return text, False
125 127
126 128
127 129 def ellipsisrawprocessor(rl, text):
128 130 return False
129 131
130 132
131 133 ellipsisprocessor = (
132 134 ellipsisreadprocessor,
133 135 ellipsiswriteprocessor,
134 136 ellipsisrawprocessor,
135 137 )
136 138
137 139
138 140 def getoffset(q):
139 141 return int(q >> 16)
140 142
141 143
142 144 def gettype(q):
143 145 return int(q & 0xFFFF)
144 146
145 147
146 148 def offset_type(offset, type):
147 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 150 raise ValueError(b'unknown revlog index flags')
149 151 return int(int(offset) << 16 | type)
150 152
151 153
152 154 def _verify_revision(rl, skipflags, state, node):
153 155 """Verify the integrity of the given revlog ``node`` while providing a hook
154 156 point for extensions to influence the operation."""
155 157 if skipflags:
156 158 state[b'skipread'].add(node)
157 159 else:
158 160 # Side-effect: read content and verify hash.
159 161 rl.revision(node)
160 162
161 163
162 164 @attr.s(slots=True, frozen=True)
163 165 class _revisioninfo(object):
164 166 """Information about a revision that allows building its fulltext
165 167 node: expected hash of the revision
166 168 p1, p2: parent revs of the revision
167 169 btext: built text cache consisting of a one-element list
168 170 cachedelta: (baserev, uncompressed_delta) or None
169 171 flags: flags associated to the revision storage
170 172
171 173 One of btext[0] or cachedelta must be set.
172 174 """
173 175
174 176 node = attr.ib()
175 177 p1 = attr.ib()
176 178 p2 = attr.ib()
177 179 btext = attr.ib()
178 180 textlen = attr.ib()
179 181 cachedelta = attr.ib()
180 182 flags = attr.ib()
181 183
182 184
183 185 @interfaceutil.implementer(repository.irevisiondelta)
184 186 @attr.s(slots=True)
185 187 class revlogrevisiondelta(object):
186 188 node = attr.ib()
187 189 p1node = attr.ib()
188 190 p2node = attr.ib()
189 191 basenode = attr.ib()
190 192 flags = attr.ib()
191 193 baserevisionsize = attr.ib()
192 194 revision = attr.ib()
193 195 delta = attr.ib()
194 196 linknode = attr.ib(default=None)
195 197
196 198
197 199 @interfaceutil.implementer(repository.iverifyproblem)
198 200 @attr.s(frozen=True)
199 201 class revlogproblem(object):
200 202 warning = attr.ib(default=None)
201 203 error = attr.ib(default=None)
202 204 node = attr.ib(default=None)
203 205
204 206
205 207 # index v0:
206 208 # 4 bytes: offset
207 209 # 4 bytes: compressed length
208 210 # 4 bytes: base rev
209 211 # 4 bytes: link rev
210 212 # 20 bytes: parent 1 nodeid
211 213 # 20 bytes: parent 2 nodeid
212 214 # 20 bytes: nodeid
213 215 indexformatv0 = struct.Struct(b">4l20s20s20s")
214 216 indexformatv0_pack = indexformatv0.pack
215 217 indexformatv0_unpack = indexformatv0.unpack
216 218
217 219
218 220 class revlogoldindex(list):
219 221 @property
220 222 def nodemap(self):
221 223 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
222 224 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
223 225 return self._nodemap
224 226
225 227 @util.propertycache
226 228 def _nodemap(self):
227 229 nodemap = nodemaputil.NodeMap({nullid: nullrev})
228 230 for r in range(0, len(self)):
229 231 n = self[r][7]
230 232 nodemap[n] = r
231 233 return nodemap
232 234
233 235 def has_node(self, node):
234 236 """return True if the node exist in the index"""
235 237 return node in self._nodemap
236 238
237 239 def rev(self, node):
238 240 """return a revision for a node
239 241
240 242 If the node is unknown, raise a RevlogError"""
241 243 return self._nodemap[node]
242 244
243 245 def get_rev(self, node):
244 246 """return a revision for a node
245 247
246 248 If the node is unknown, return None"""
247 249 return self._nodemap.get(node)
248 250
249 251 def append(self, tup):
250 252 self._nodemap[tup[7]] = len(self)
251 253 super(revlogoldindex, self).append(tup)
252 254
253 255 def __delitem__(self, i):
254 256 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
255 257 raise ValueError(b"deleting slices only supports a:-1 with step 1")
256 258 for r in pycompat.xrange(i.start, len(self)):
257 259 del self._nodemap[self[r][7]]
258 260 super(revlogoldindex, self).__delitem__(i)
259 261
260 262 def clearcaches(self):
261 263 self.__dict__.pop('_nodemap', None)
262 264
263 265 def __getitem__(self, i):
264 266 if i == -1:
265 267 return (0, 0, 0, -1, -1, -1, -1, nullid)
266 268 return list.__getitem__(self, i)
267 269
268 270
269 271 class revlogoldio(object):
270 272 def __init__(self):
271 273 self.size = indexformatv0.size
272 274
273 275 def parseindex(self, data, inline):
274 276 s = self.size
275 277 index = []
276 278 nodemap = nodemaputil.NodeMap({nullid: nullrev})
277 279 n = off = 0
278 280 l = len(data)
279 281 while off + s <= l:
280 282 cur = data[off : off + s]
281 283 off += s
282 284 e = indexformatv0_unpack(cur)
283 285 # transform to revlogv1 format
284 286 e2 = (
285 287 offset_type(e[0], 0),
286 288 e[1],
287 289 -1,
288 290 e[2],
289 291 e[3],
290 292 nodemap.get(e[4], nullrev),
291 293 nodemap.get(e[5], nullrev),
292 294 e[6],
293 295 )
294 296 index.append(e2)
295 297 nodemap[e[6]] = n
296 298 n += 1
297 299
298 300 index = revlogoldindex(index)
299 301 return index, None
300 302
301 303 def packentry(self, entry, node, version, rev):
302 304 if gettype(entry[0]):
303 305 raise error.RevlogError(
304 306 _(b'index entry flags need revlog version 1')
305 307 )
306 308 e2 = (
307 309 getoffset(entry[0]),
308 310 entry[1],
309 311 entry[3],
310 312 entry[4],
311 313 node(entry[5]),
312 314 node(entry[6]),
313 315 entry[7],
314 316 )
315 317 return indexformatv0_pack(*e2)
316 318
317 319
318 320 # index ng:
319 321 # 6 bytes: offset
320 322 # 2 bytes: flags
321 323 # 4 bytes: compressed length
322 324 # 4 bytes: uncompressed length
323 325 # 4 bytes: base rev
324 326 # 4 bytes: link rev
325 327 # 4 bytes: parent 1 rev
326 328 # 4 bytes: parent 2 rev
327 329 # 32 bytes: nodeid
328 330 indexformatng = struct.Struct(b">Qiiiiii20s12x")
329 331 indexformatng_pack = indexformatng.pack
330 332 versionformat = struct.Struct(b">I")
331 333 versionformat_pack = versionformat.pack
332 334 versionformat_unpack = versionformat.unpack
333 335
334 336 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
335 337 # signed integer)
336 338 _maxentrysize = 0x7FFFFFFF
337 339
338 340
339 341 class revlogio(object):
340 342 def __init__(self):
341 343 self.size = indexformatng.size
342 344
343 345 def parseindex(self, data, inline):
344 346 # call the C implementation to parse the index data
345 347 index, cache = parsers.parse_index2(data, inline)
346 348 return index, cache
347 349
348 350 def packentry(self, entry, node, version, rev):
349 351 p = indexformatng_pack(*entry)
350 352 if rev == 0:
351 353 p = versionformat_pack(version) + p[4:]
352 354 return p
353 355
354 356
355 357 NodemapRevlogIO = None
356 358
357 359 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
358 360
359 361 class NodemapRevlogIO(revlogio):
360 362 """A debug oriented IO class that return a PersistentNodeMapIndexObject
361 363
362 364 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
363 365 """
364 366
365 367 def parseindex(self, data, inline):
366 368 index, cache = parsers.parse_index_devel_nodemap(data, inline)
367 369 return index, cache
368 370
369 371
370 372 class rustrevlogio(revlogio):
371 373 def parseindex(self, data, inline):
372 374 index, cache = super(rustrevlogio, self).parseindex(data, inline)
373 375 return rustrevlog.MixedIndex(index), cache
374 376
375 377
376 378 class revlog(object):
377 379 """
378 380 the underlying revision storage object
379 381
380 382 A revlog consists of two parts, an index and the revision data.
381 383
382 384 The index is a file with a fixed record size containing
383 385 information on each revision, including its nodeid (hash), the
384 386 nodeids of its parents, the position and offset of its data within
385 387 the data file, and the revision it's based on. Finally, each entry
386 388 contains a linkrev entry that can serve as a pointer to external
387 389 data.
388 390
389 391 The revision data itself is a linear collection of data chunks.
390 392 Each chunk represents a revision and is usually represented as a
391 393 delta against the previous chunk. To bound lookup time, runs of
392 394 deltas are limited to about 2 times the length of the original
393 395 version data. This makes retrieval of a version proportional to
394 396 its size, or O(1) relative to the number of revisions.
395 397
396 398 Both pieces of the revlog are written to in an append-only
397 399 fashion, which means we never need to rewrite a file to insert or
398 400 remove data, and can use some simple techniques to avoid the need
399 401 for locking while reading.
400 402
401 403 If checkambig, indexfile is opened with checkambig=True at
402 404 writing, to avoid file stat ambiguity.
403 405
404 406 If mmaplargeindex is True, and an mmapindexthreshold is set, the
405 407 index will be mmapped rather than read if it is larger than the
406 408 configured threshold.
407 409
408 410 If censorable is True, the revlog can have censored revisions.
409 411
410 412 If `upperboundcomp` is not None, this is the expected maximal gain from
411 413 compression for the data content.
412 414 """
413 415
414 416 _flagserrorclass = error.RevlogError
415 417
416 418 def __init__(
417 419 self,
418 420 opener,
419 421 indexfile,
420 422 datafile=None,
421 423 checkambig=False,
422 424 mmaplargeindex=False,
423 425 censorable=False,
424 426 upperboundcomp=None,
425 427 persistentnodemap=False,
426 428 ):
427 429 """
428 430 create a revlog object
429 431
430 432 opener is a function that abstracts the file opening operation
431 433 and can be used to implement COW semantics or the like.
432 434
433 435 """
434 436 self.upperboundcomp = upperboundcomp
435 437 self.indexfile = indexfile
436 438 self.datafile = datafile or (indexfile[:-2] + b".d")
437 439 self.nodemap_file = None
438 440 if persistentnodemap:
439 441 if indexfile.endswith(b'.a'):
440 442 pending_path = indexfile[:-4] + b".n.a"
441 443 if opener.exists(pending_path):
442 444 self.nodemap_file = pending_path
443 445 else:
444 446 self.nodemap_file = indexfile[:-4] + b".n"
445 447 else:
446 448 self.nodemap_file = indexfile[:-2] + b".n"
447 449
448 450 self.opener = opener
449 451 # When True, indexfile is opened with checkambig=True at writing, to
450 452 # avoid file stat ambiguity.
451 453 self._checkambig = checkambig
452 454 self._mmaplargeindex = mmaplargeindex
453 455 self._censorable = censorable
454 456 # 3-tuple of (node, rev, text) for a raw revision.
455 457 self._revisioncache = None
456 458 # Maps rev to chain base rev.
457 459 self._chainbasecache = util.lrucachedict(100)
458 460 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
459 461 self._chunkcache = (0, b'')
460 462 # How much data to read and cache into the raw revlog data cache.
461 463 self._chunkcachesize = 65536
462 464 self._maxchainlen = None
463 465 self._deltabothparents = True
464 466 self.index = None
465 467 self._nodemap_docket = None
466 468 # Mapping of partial identifiers to full nodes.
467 469 self._pcache = {}
468 470 # Mapping of revision integer to full node.
469 471 self._compengine = b'zlib'
470 472 self._compengineopts = {}
471 473 self._maxdeltachainspan = -1
472 474 self._withsparseread = False
473 475 self._sparserevlog = False
474 476 self._srdensitythreshold = 0.50
475 477 self._srmingapsize = 262144
476 478
477 479 # Make copy of flag processors so each revlog instance can support
478 480 # custom flags.
479 481 self._flagprocessors = dict(flagutil.flagprocessors)
480 482
481 483 # 2-tuple of file handles being used for active writing.
482 484 self._writinghandles = None
483 485
484 486 self._loadindex()
485 487
486 488 def _loadindex(self):
487 489 mmapindexthreshold = None
488 490 opts = self.opener.options
489 491
490 492 if b'revlogv2' in opts:
491 493 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
492 494 elif b'revlogv1' in opts:
493 495 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
494 496 if b'generaldelta' in opts:
495 497 newversionflags |= FLAG_GENERALDELTA
496 498 elif b'revlogv0' in self.opener.options:
497 499 newversionflags = REVLOGV0
498 500 else:
499 501 newversionflags = REVLOG_DEFAULT_VERSION
500 502
501 503 if b'chunkcachesize' in opts:
502 504 self._chunkcachesize = opts[b'chunkcachesize']
503 505 if b'maxchainlen' in opts:
504 506 self._maxchainlen = opts[b'maxchainlen']
505 507 if b'deltabothparents' in opts:
506 508 self._deltabothparents = opts[b'deltabothparents']
507 509 self._lazydelta = bool(opts.get(b'lazydelta', True))
508 510 self._lazydeltabase = False
509 511 if self._lazydelta:
510 512 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
511 513 if b'compengine' in opts:
512 514 self._compengine = opts[b'compengine']
513 515 if b'zlib.level' in opts:
514 516 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
515 517 if b'zstd.level' in opts:
516 518 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
517 519 if b'maxdeltachainspan' in opts:
518 520 self._maxdeltachainspan = opts[b'maxdeltachainspan']
519 521 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
520 522 mmapindexthreshold = opts[b'mmapindexthreshold']
521 523 self.hassidedata = bool(opts.get(b'side-data', False))
522 524 if self.hassidedata:
523 525 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
524 526 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
525 527 withsparseread = bool(opts.get(b'with-sparse-read', False))
526 528 # sparse-revlog forces sparse-read
527 529 self._withsparseread = self._sparserevlog or withsparseread
528 530 if b'sparse-read-density-threshold' in opts:
529 531 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
530 532 if b'sparse-read-min-gap-size' in opts:
531 533 self._srmingapsize = opts[b'sparse-read-min-gap-size']
532 534 if opts.get(b'enableellipsis'):
533 535 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
534 536
535 537 # revlog v0 doesn't have flag processors
536 538 for flag, processor in pycompat.iteritems(
537 539 opts.get(b'flagprocessors', {})
538 540 ):
539 541 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
540 542
541 543 if self._chunkcachesize <= 0:
542 544 raise error.RevlogError(
543 545 _(b'revlog chunk cache size %r is not greater than 0')
544 546 % self._chunkcachesize
545 547 )
546 548 elif self._chunkcachesize & (self._chunkcachesize - 1):
547 549 raise error.RevlogError(
548 550 _(b'revlog chunk cache size %r is not a power of 2')
549 551 % self._chunkcachesize
550 552 )
551 553
552 554 indexdata = b''
553 555 self._initempty = True
554 556 try:
555 557 with self._indexfp() as f:
556 558 if (
557 559 mmapindexthreshold is not None
558 560 and self.opener.fstat(f).st_size >= mmapindexthreshold
559 561 ):
560 562 # TODO: should .close() to release resources without
561 563 # relying on Python GC
562 564 indexdata = util.buffer(util.mmapread(f))
563 565 else:
564 566 indexdata = f.read()
565 567 if len(indexdata) > 0:
566 568 versionflags = versionformat_unpack(indexdata[:4])[0]
567 569 self._initempty = False
568 570 else:
569 571 versionflags = newversionflags
570 572 except IOError as inst:
571 573 if inst.errno != errno.ENOENT:
572 574 raise
573 575
574 576 versionflags = newversionflags
575 577
576 578 self.version = versionflags
577 579
578 580 flags = versionflags & ~0xFFFF
579 581 fmt = versionflags & 0xFFFF
580 582
581 583 if fmt == REVLOGV0:
582 584 if flags:
583 585 raise error.RevlogError(
584 586 _(b'unknown flags (%#04x) in version %d revlog %s')
585 587 % (flags >> 16, fmt, self.indexfile)
586 588 )
587 589
588 590 self._inline = False
589 591 self._generaldelta = False
590 592
591 593 elif fmt == REVLOGV1:
592 594 if flags & ~REVLOGV1_FLAGS:
593 595 raise error.RevlogError(
594 596 _(b'unknown flags (%#04x) in version %d revlog %s')
595 597 % (flags >> 16, fmt, self.indexfile)
596 598 )
597 599
598 600 self._inline = versionflags & FLAG_INLINE_DATA
599 601 self._generaldelta = versionflags & FLAG_GENERALDELTA
600 602
601 603 elif fmt == REVLOGV2:
602 604 if flags & ~REVLOGV2_FLAGS:
603 605 raise error.RevlogError(
604 606 _(b'unknown flags (%#04x) in version %d revlog %s')
605 607 % (flags >> 16, fmt, self.indexfile)
606 608 )
607 609
608 610 self._inline = versionflags & FLAG_INLINE_DATA
609 611 # generaldelta implied by version 2 revlogs.
610 612 self._generaldelta = True
611 613
612 614 else:
613 615 raise error.RevlogError(
614 616 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
615 617 )
616 618 # sparse-revlog can't be on without general-delta (issue6056)
617 619 if not self._generaldelta:
618 620 self._sparserevlog = False
619 621
620 622 self._storedeltachains = True
621 623
622 624 devel_nodemap = (
623 625 self.nodemap_file
624 626 and opts.get(b'devel-force-nodemap', False)
625 627 and NodemapRevlogIO is not None
626 628 )
627 629
628 630 use_rust_index = False
629 631 if rustrevlog is not None:
630 632 if self.nodemap_file is not None:
631 633 use_rust_index = True
632 634 else:
633 635 use_rust_index = self.opener.options.get(b'rust.index')
634 636
635 637 self._io = revlogio()
636 638 if self.version == REVLOGV0:
637 639 self._io = revlogoldio()
638 640 elif devel_nodemap:
639 641 self._io = NodemapRevlogIO()
640 642 elif use_rust_index:
641 643 self._io = rustrevlogio()
642 644 try:
643 645 d = self._io.parseindex(indexdata, self._inline)
644 646 index, _chunkcache = d
645 647 use_nodemap = (
646 648 not self._inline
647 649 and self.nodemap_file is not None
648 650 and util.safehasattr(index, 'update_nodemap_data')
649 651 )
650 652 if use_nodemap:
651 653 nodemap_data = nodemaputil.persisted_data(self)
652 654 if nodemap_data is not None:
653 655 docket = nodemap_data[0]
654 656 if (
655 657 len(d[0]) > docket.tip_rev
656 658 and d[0][docket.tip_rev][7] == docket.tip_node
657 659 ):
658 660 # no changelog tampering
659 661 self._nodemap_docket = docket
660 662 index.update_nodemap_data(*nodemap_data)
661 663 except (ValueError, IndexError):
662 664 raise error.RevlogError(
663 665 _(b"index %s is corrupted") % self.indexfile
664 666 )
665 667 self.index, self._chunkcache = d
666 668 if not self._chunkcache:
667 669 self._chunkclear()
668 670 # revnum -> (chain-length, sum-delta-length)
669 671 self._chaininfocache = {}
670 672 # revlog header -> revlog compressor
671 673 self._decompressors = {}
672 674
673 675 @util.propertycache
674 676 def _compressor(self):
675 677 engine = util.compengines[self._compengine]
676 678 return engine.revlogcompressor(self._compengineopts)
677 679
678 680 def _indexfp(self, mode=b'r'):
679 681 """file object for the revlog's index file"""
680 682 args = {'mode': mode}
681 683 if mode != b'r':
682 684 args['checkambig'] = self._checkambig
683 685 if mode == b'w':
684 686 args['atomictemp'] = True
685 687 return self.opener(self.indexfile, **args)
686 688
687 689 def _datafp(self, mode=b'r'):
688 690 """file object for the revlog's data file"""
689 691 return self.opener(self.datafile, mode=mode)
690 692
691 693 @contextlib.contextmanager
692 694 def _datareadfp(self, existingfp=None):
693 695 """file object suitable to read data"""
694 696 # Use explicit file handle, if given.
695 697 if existingfp is not None:
696 698 yield existingfp
697 699
698 700 # Use a file handle being actively used for writes, if available.
699 701 # There is some danger to doing this because reads will seek the
700 702 # file. However, _writeentry() performs a SEEK_END before all writes,
701 703 # so we should be safe.
702 704 elif self._writinghandles:
703 705 if self._inline:
704 706 yield self._writinghandles[0]
705 707 else:
706 708 yield self._writinghandles[1]
707 709
708 710 # Otherwise open a new file handle.
709 711 else:
710 712 if self._inline:
711 713 func = self._indexfp
712 714 else:
713 715 func = self._datafp
714 716 with func() as fp:
715 717 yield fp
716 718
717 719 def tiprev(self):
718 720 return len(self.index) - 1
719 721
720 722 def tip(self):
721 723 return self.node(self.tiprev())
722 724
723 725 def __contains__(self, rev):
724 726 return 0 <= rev < len(self)
725 727
726 728 def __len__(self):
727 729 return len(self.index)
728 730
729 731 def __iter__(self):
730 732 return iter(pycompat.xrange(len(self)))
731 733
732 734 def revs(self, start=0, stop=None):
733 735 """iterate over all rev in this revlog (from start to stop)"""
734 736 return storageutil.iterrevs(len(self), start=start, stop=stop)
735 737
736 738 @property
737 739 def nodemap(self):
738 740 msg = (
739 741 b"revlog.nodemap is deprecated, "
740 742 b"use revlog.index.[has_node|rev|get_rev]"
741 743 )
742 744 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
743 745 return self.index.nodemap
744 746
745 747 @property
746 748 def _nodecache(self):
747 749 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
748 750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
749 751 return self.index.nodemap
750 752
751 753 def hasnode(self, node):
752 754 try:
753 755 self.rev(node)
754 756 return True
755 757 except KeyError:
756 758 return False
757 759
758 760 def candelta(self, baserev, rev):
759 761 """whether two revisions (baserev, rev) can be delta-ed or not"""
760 762 # Disable delta if either rev requires a content-changing flag
761 763 # processor (ex. LFS). This is because such flag processor can alter
762 764 # the rawtext content that the delta will be based on, and two clients
763 765 # could have a same revlog node with different flags (i.e. different
764 766 # rawtext contents) and the delta could be incompatible.
765 767 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
766 768 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
767 769 ):
768 770 return False
769 771 return True
770 772
771 773 def update_caches(self, transaction):
772 774 if self.nodemap_file is not None:
773 775 if transaction is None:
774 776 nodemaputil.update_persistent_nodemap(self)
775 777 else:
776 778 nodemaputil.setup_persistent_nodemap(transaction, self)
777 779
778 780 def clearcaches(self):
779 781 self._revisioncache = None
780 782 self._chainbasecache.clear()
781 783 self._chunkcache = (0, b'')
782 784 self._pcache = {}
783 785 self._nodemap_docket = None
784 786 self.index.clearcaches()
785 787 # The python code is the one responsible for validating the docket, we
786 788 # end up having to refresh it here.
787 789 use_nodemap = (
788 790 not self._inline
789 791 and self.nodemap_file is not None
790 792 and util.safehasattr(self.index, 'update_nodemap_data')
791 793 )
792 794 if use_nodemap:
793 795 nodemap_data = nodemaputil.persisted_data(self)
794 796 if nodemap_data is not None:
795 797 self._nodemap_docket = nodemap_data[0]
796 798 self.index.update_nodemap_data(*nodemap_data)
797 799
798 800 def rev(self, node):
799 801 try:
800 802 return self.index.rev(node)
801 803 except TypeError:
802 804 raise
803 805 except error.RevlogError:
804 806 # parsers.c radix tree lookup failed
805 807 if node == wdirid or node in wdirfilenodeids:
806 808 raise error.WdirUnsupported
807 809 raise error.LookupError(node, self.indexfile, _(b'no node'))
808 810
809 811 # Accessors for index entries.
810 812
811 813 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
812 814 # are flags.
813 815 def start(self, rev):
814 816 return int(self.index[rev][0] >> 16)
815 817
816 818 def flags(self, rev):
817 819 return self.index[rev][0] & 0xFFFF
818 820
819 821 def length(self, rev):
820 822 return self.index[rev][1]
821 823
822 824 def rawsize(self, rev):
823 825 """return the length of the uncompressed text for a given revision"""
824 826 l = self.index[rev][2]
825 827 if l >= 0:
826 828 return l
827 829
828 830 t = self.rawdata(rev)
829 831 return len(t)
830 832
831 833 def size(self, rev):
832 834 """length of non-raw text (processed by a "read" flag processor)"""
833 835 # fast path: if no "read" flag processor could change the content,
834 836 # size is rawsize. note: ELLIPSIS is known to not change the content.
835 837 flags = self.flags(rev)
836 838 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
837 839 return self.rawsize(rev)
838 840
839 841 return len(self.revision(rev, raw=False))
840 842
841 843 def chainbase(self, rev):
842 844 base = self._chainbasecache.get(rev)
843 845 if base is not None:
844 846 return base
845 847
846 848 index = self.index
847 849 iterrev = rev
848 850 base = index[iterrev][3]
849 851 while base != iterrev:
850 852 iterrev = base
851 853 base = index[iterrev][3]
852 854
853 855 self._chainbasecache[rev] = base
854 856 return base
855 857
856 858 def linkrev(self, rev):
857 859 return self.index[rev][4]
858 860
859 861 def parentrevs(self, rev):
860 862 try:
861 863 entry = self.index[rev]
862 864 except IndexError:
863 865 if rev == wdirrev:
864 866 raise error.WdirUnsupported
865 867 raise
866 868
867 869 return entry[5], entry[6]
868 870
869 871 # fast parentrevs(rev) where rev isn't filtered
870 872 _uncheckedparentrevs = parentrevs
871 873
872 874 def node(self, rev):
873 875 try:
874 876 return self.index[rev][7]
875 877 except IndexError:
876 878 if rev == wdirrev:
877 879 raise error.WdirUnsupported
878 880 raise
879 881
880 882 # Derived from index values.
881 883
882 884 def end(self, rev):
883 885 return self.start(rev) + self.length(rev)
884 886
885 887 def parents(self, node):
886 888 i = self.index
887 889 d = i[self.rev(node)]
888 890 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
889 891
890 892 def chainlen(self, rev):
891 893 return self._chaininfo(rev)[0]
892 894
893 895 def _chaininfo(self, rev):
894 896 chaininfocache = self._chaininfocache
895 897 if rev in chaininfocache:
896 898 return chaininfocache[rev]
897 899 index = self.index
898 900 generaldelta = self._generaldelta
899 901 iterrev = rev
900 902 e = index[iterrev]
901 903 clen = 0
902 904 compresseddeltalen = 0
903 905 while iterrev != e[3]:
904 906 clen += 1
905 907 compresseddeltalen += e[1]
906 908 if generaldelta:
907 909 iterrev = e[3]
908 910 else:
909 911 iterrev -= 1
910 912 if iterrev in chaininfocache:
911 913 t = chaininfocache[iterrev]
912 914 clen += t[0]
913 915 compresseddeltalen += t[1]
914 916 break
915 917 e = index[iterrev]
916 918 else:
917 919 # Add text length of base since decompressing that also takes
918 920 # work. For cache hits the length is already included.
919 921 compresseddeltalen += e[1]
920 922 r = (clen, compresseddeltalen)
921 923 chaininfocache[rev] = r
922 924 return r
923 925
924 926 def _deltachain(self, rev, stoprev=None):
925 927 """Obtain the delta chain for a revision.
926 928
927 929 ``stoprev`` specifies a revision to stop at. If not specified, we
928 930 stop at the base of the chain.
929 931
930 932 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
931 933 revs in ascending order and ``stopped`` is a bool indicating whether
932 934 ``stoprev`` was hit.
933 935 """
934 936 # Try C implementation.
935 937 try:
936 938 return self.index.deltachain(rev, stoprev, self._generaldelta)
937 939 except AttributeError:
938 940 pass
939 941
940 942 chain = []
941 943
942 944 # Alias to prevent attribute lookup in tight loop.
943 945 index = self.index
944 946 generaldelta = self._generaldelta
945 947
946 948 iterrev = rev
947 949 e = index[iterrev]
948 950 while iterrev != e[3] and iterrev != stoprev:
949 951 chain.append(iterrev)
950 952 if generaldelta:
951 953 iterrev = e[3]
952 954 else:
953 955 iterrev -= 1
954 956 e = index[iterrev]
955 957
956 958 if iterrev == stoprev:
957 959 stopped = True
958 960 else:
959 961 chain.append(iterrev)
960 962 stopped = False
961 963
962 964 chain.reverse()
963 965 return chain, stopped
964 966
965 967 def ancestors(self, revs, stoprev=0, inclusive=False):
966 968 """Generate the ancestors of 'revs' in reverse revision order.
967 969 Does not generate revs lower than stoprev.
968 970
969 971 See the documentation for ancestor.lazyancestors for more details."""
970 972
971 973 # first, make sure start revisions aren't filtered
972 974 revs = list(revs)
973 975 checkrev = self.node
974 976 for r in revs:
975 977 checkrev(r)
976 978 # and we're sure ancestors aren't filtered as well
977 979
978 980 if rustancestor is not None:
979 981 lazyancestors = rustancestor.LazyAncestors
980 982 arg = self.index
981 983 else:
982 984 lazyancestors = ancestor.lazyancestors
983 985 arg = self._uncheckedparentrevs
984 986 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
985 987
986 988 def descendants(self, revs):
987 989 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
988 990
989 991 def findcommonmissing(self, common=None, heads=None):
990 992 """Return a tuple of the ancestors of common and the ancestors of heads
991 993 that are not ancestors of common. In revset terminology, we return the
992 994 tuple:
993 995
994 996 ::common, (::heads) - (::common)
995 997
996 998 The list is sorted by revision number, meaning it is
997 999 topologically sorted.
998 1000
999 1001 'heads' and 'common' are both lists of node IDs. If heads is
1000 1002 not supplied, uses all of the revlog's heads. If common is not
1001 1003 supplied, uses nullid."""
1002 1004 if common is None:
1003 1005 common = [nullid]
1004 1006 if heads is None:
1005 1007 heads = self.heads()
1006 1008
1007 1009 common = [self.rev(n) for n in common]
1008 1010 heads = [self.rev(n) for n in heads]
1009 1011
1010 1012 # we want the ancestors, but inclusive
1011 1013 class lazyset(object):
1012 1014 def __init__(self, lazyvalues):
1013 1015 self.addedvalues = set()
1014 1016 self.lazyvalues = lazyvalues
1015 1017
1016 1018 def __contains__(self, value):
1017 1019 return value in self.addedvalues or value in self.lazyvalues
1018 1020
1019 1021 def __iter__(self):
1020 1022 added = self.addedvalues
1021 1023 for r in added:
1022 1024 yield r
1023 1025 for r in self.lazyvalues:
1024 1026 if not r in added:
1025 1027 yield r
1026 1028
1027 1029 def add(self, value):
1028 1030 self.addedvalues.add(value)
1029 1031
1030 1032 def update(self, values):
1031 1033 self.addedvalues.update(values)
1032 1034
1033 1035 has = lazyset(self.ancestors(common))
1034 1036 has.add(nullrev)
1035 1037 has.update(common)
1036 1038
1037 1039 # take all ancestors from heads that aren't in has
1038 1040 missing = set()
1039 1041 visit = collections.deque(r for r in heads if r not in has)
1040 1042 while visit:
1041 1043 r = visit.popleft()
1042 1044 if r in missing:
1043 1045 continue
1044 1046 else:
1045 1047 missing.add(r)
1046 1048 for p in self.parentrevs(r):
1047 1049 if p not in has:
1048 1050 visit.append(p)
1049 1051 missing = list(missing)
1050 1052 missing.sort()
1051 1053 return has, [self.node(miss) for miss in missing]
1052 1054
1053 1055 def incrementalmissingrevs(self, common=None):
1054 1056 """Return an object that can be used to incrementally compute the
1055 1057 revision numbers of the ancestors of arbitrary sets that are not
1056 1058 ancestors of common. This is an ancestor.incrementalmissingancestors
1057 1059 object.
1058 1060
1059 1061 'common' is a list of revision numbers. If common is not supplied, uses
1060 1062 nullrev.
1061 1063 """
1062 1064 if common is None:
1063 1065 common = [nullrev]
1064 1066
1065 1067 if rustancestor is not None:
1066 1068 return rustancestor.MissingAncestors(self.index, common)
1067 1069 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1068 1070
1069 1071 def findmissingrevs(self, common=None, heads=None):
1070 1072 """Return the revision numbers of the ancestors of heads that
1071 1073 are not ancestors of common.
1072 1074
1073 1075 More specifically, return a list of revision numbers corresponding to
1074 1076 nodes N such that every N satisfies the following constraints:
1075 1077
1076 1078 1. N is an ancestor of some node in 'heads'
1077 1079 2. N is not an ancestor of any node in 'common'
1078 1080
1079 1081 The list is sorted by revision number, meaning it is
1080 1082 topologically sorted.
1081 1083
1082 1084 'heads' and 'common' are both lists of revision numbers. If heads is
1083 1085 not supplied, uses all of the revlog's heads. If common is not
1084 1086 supplied, uses nullid."""
1085 1087 if common is None:
1086 1088 common = [nullrev]
1087 1089 if heads is None:
1088 1090 heads = self.headrevs()
1089 1091
1090 1092 inc = self.incrementalmissingrevs(common=common)
1091 1093 return inc.missingancestors(heads)
1092 1094
1093 1095 def findmissing(self, common=None, heads=None):
1094 1096 """Return the ancestors of heads that are not ancestors of common.
1095 1097
1096 1098 More specifically, return a list of nodes N such that every N
1097 1099 satisfies the following constraints:
1098 1100
1099 1101 1. N is an ancestor of some node in 'heads'
1100 1102 2. N is not an ancestor of any node in 'common'
1101 1103
1102 1104 The list is sorted by revision number, meaning it is
1103 1105 topologically sorted.
1104 1106
1105 1107 'heads' and 'common' are both lists of node IDs. If heads is
1106 1108 not supplied, uses all of the revlog's heads. If common is not
1107 1109 supplied, uses nullid."""
1108 1110 if common is None:
1109 1111 common = [nullid]
1110 1112 if heads is None:
1111 1113 heads = self.heads()
1112 1114
1113 1115 common = [self.rev(n) for n in common]
1114 1116 heads = [self.rev(n) for n in heads]
1115 1117
1116 1118 inc = self.incrementalmissingrevs(common=common)
1117 1119 return [self.node(r) for r in inc.missingancestors(heads)]
1118 1120
1119 1121 def nodesbetween(self, roots=None, heads=None):
1120 1122 """Return a topological path from 'roots' to 'heads'.
1121 1123
1122 1124 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1123 1125 topologically sorted list of all nodes N that satisfy both of
1124 1126 these constraints:
1125 1127
1126 1128 1. N is a descendant of some node in 'roots'
1127 1129 2. N is an ancestor of some node in 'heads'
1128 1130
1129 1131 Every node is considered to be both a descendant and an ancestor
1130 1132 of itself, so every reachable node in 'roots' and 'heads' will be
1131 1133 included in 'nodes'.
1132 1134
1133 1135 'outroots' is the list of reachable nodes in 'roots', i.e., the
1134 1136 subset of 'roots' that is returned in 'nodes'. Likewise,
1135 1137 'outheads' is the subset of 'heads' that is also in 'nodes'.
1136 1138
1137 1139 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1138 1140 unspecified, uses nullid as the only root. If 'heads' is
1139 1141 unspecified, uses list of all of the revlog's heads."""
1140 1142 nonodes = ([], [], [])
1141 1143 if roots is not None:
1142 1144 roots = list(roots)
1143 1145 if not roots:
1144 1146 return nonodes
1145 1147 lowestrev = min([self.rev(n) for n in roots])
1146 1148 else:
1147 1149 roots = [nullid] # Everybody's a descendant of nullid
1148 1150 lowestrev = nullrev
1149 1151 if (lowestrev == nullrev) and (heads is None):
1150 1152 # We want _all_ the nodes!
1151 1153 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1152 1154 if heads is None:
1153 1155 # All nodes are ancestors, so the latest ancestor is the last
1154 1156 # node.
1155 1157 highestrev = len(self) - 1
1156 1158 # Set ancestors to None to signal that every node is an ancestor.
1157 1159 ancestors = None
1158 1160 # Set heads to an empty dictionary for later discovery of heads
1159 1161 heads = {}
1160 1162 else:
1161 1163 heads = list(heads)
1162 1164 if not heads:
1163 1165 return nonodes
1164 1166 ancestors = set()
1165 1167 # Turn heads into a dictionary so we can remove 'fake' heads.
1166 1168 # Also, later we will be using it to filter out the heads we can't
1167 1169 # find from roots.
1168 1170 heads = dict.fromkeys(heads, False)
1169 1171 # Start at the top and keep marking parents until we're done.
1170 1172 nodestotag = set(heads)
1171 1173 # Remember where the top was so we can use it as a limit later.
1172 1174 highestrev = max([self.rev(n) for n in nodestotag])
1173 1175 while nodestotag:
1174 1176 # grab a node to tag
1175 1177 n = nodestotag.pop()
1176 1178 # Never tag nullid
1177 1179 if n == nullid:
1178 1180 continue
1179 1181 # A node's revision number represents its place in a
1180 1182 # topologically sorted list of nodes.
1181 1183 r = self.rev(n)
1182 1184 if r >= lowestrev:
1183 1185 if n not in ancestors:
1184 1186 # If we are possibly a descendant of one of the roots
1185 1187 # and we haven't already been marked as an ancestor
1186 1188 ancestors.add(n) # Mark as ancestor
1187 1189 # Add non-nullid parents to list of nodes to tag.
1188 1190 nodestotag.update(
1189 1191 [p for p in self.parents(n) if p != nullid]
1190 1192 )
1191 1193 elif n in heads: # We've seen it before, is it a fake head?
1192 1194 # So it is, real heads should not be the ancestors of
1193 1195 # any other heads.
1194 1196 heads.pop(n)
1195 1197 if not ancestors:
1196 1198 return nonodes
1197 1199 # Now that we have our set of ancestors, we want to remove any
1198 1200 # roots that are not ancestors.
1199 1201
1200 1202 # If one of the roots was nullid, everything is included anyway.
1201 1203 if lowestrev > nullrev:
1202 1204 # But, since we weren't, let's recompute the lowest rev to not
1203 1205 # include roots that aren't ancestors.
1204 1206
1205 1207 # Filter out roots that aren't ancestors of heads
1206 1208 roots = [root for root in roots if root in ancestors]
1207 1209 # Recompute the lowest revision
1208 1210 if roots:
1209 1211 lowestrev = min([self.rev(root) for root in roots])
1210 1212 else:
1211 1213 # No more roots? Return empty list
1212 1214 return nonodes
1213 1215 else:
1214 1216 # We are descending from nullid, and don't need to care about
1215 1217 # any other roots.
1216 1218 lowestrev = nullrev
1217 1219 roots = [nullid]
1218 1220 # Transform our roots list into a set.
1219 1221 descendants = set(roots)
1220 1222 # Also, keep the original roots so we can filter out roots that aren't
1221 1223 # 'real' roots (i.e. are descended from other roots).
1222 1224 roots = descendants.copy()
1223 1225 # Our topologically sorted list of output nodes.
1224 1226 orderedout = []
1225 1227 # Don't start at nullid since we don't want nullid in our output list,
1226 1228 # and if nullid shows up in descendants, empty parents will look like
1227 1229 # they're descendants.
1228 1230 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1229 1231 n = self.node(r)
1230 1232 isdescendant = False
1231 1233 if lowestrev == nullrev: # Everybody is a descendant of nullid
1232 1234 isdescendant = True
1233 1235 elif n in descendants:
1234 1236 # n is already a descendant
1235 1237 isdescendant = True
1236 1238 # This check only needs to be done here because all the roots
1237 1239 # will start being marked is descendants before the loop.
1238 1240 if n in roots:
1239 1241 # If n was a root, check if it's a 'real' root.
1240 1242 p = tuple(self.parents(n))
1241 1243 # If any of its parents are descendants, it's not a root.
1242 1244 if (p[0] in descendants) or (p[1] in descendants):
1243 1245 roots.remove(n)
1244 1246 else:
1245 1247 p = tuple(self.parents(n))
1246 1248 # A node is a descendant if either of its parents are
1247 1249 # descendants. (We seeded the dependents list with the roots
1248 1250 # up there, remember?)
1249 1251 if (p[0] in descendants) or (p[1] in descendants):
1250 1252 descendants.add(n)
1251 1253 isdescendant = True
1252 1254 if isdescendant and ((ancestors is None) or (n in ancestors)):
1253 1255 # Only include nodes that are both descendants and ancestors.
1254 1256 orderedout.append(n)
1255 1257 if (ancestors is not None) and (n in heads):
1256 1258 # We're trying to figure out which heads are reachable
1257 1259 # from roots.
1258 1260 # Mark this head as having been reached
1259 1261 heads[n] = True
1260 1262 elif ancestors is None:
1261 1263 # Otherwise, we're trying to discover the heads.
1262 1264 # Assume this is a head because if it isn't, the next step
1263 1265 # will eventually remove it.
1264 1266 heads[n] = True
1265 1267 # But, obviously its parents aren't.
1266 1268 for p in self.parents(n):
1267 1269 heads.pop(p, None)
1268 1270 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1269 1271 roots = list(roots)
1270 1272 assert orderedout
1271 1273 assert roots
1272 1274 assert heads
1273 1275 return (orderedout, roots, heads)
1274 1276
1275 1277 def headrevs(self, revs=None):
1276 1278 if revs is None:
1277 1279 try:
1278 1280 return self.index.headrevs()
1279 1281 except AttributeError:
1280 1282 return self._headrevs()
1281 1283 if rustdagop is not None:
1282 1284 return rustdagop.headrevs(self.index, revs)
1283 1285 return dagop.headrevs(revs, self._uncheckedparentrevs)
1284 1286
1285 1287 def computephases(self, roots):
1286 1288 return self.index.computephasesmapsets(roots)
1287 1289
1288 1290 def _headrevs(self):
1289 1291 count = len(self)
1290 1292 if not count:
1291 1293 return [nullrev]
1292 1294 # we won't iter over filtered rev so nobody is a head at start
1293 1295 ishead = [0] * (count + 1)
1294 1296 index = self.index
1295 1297 for r in self:
1296 1298 ishead[r] = 1 # I may be an head
1297 1299 e = index[r]
1298 1300 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1299 1301 return [r for r, val in enumerate(ishead) if val]
1300 1302
1301 1303 def heads(self, start=None, stop=None):
1302 1304 """return the list of all nodes that have no children
1303 1305
1304 1306 if start is specified, only heads that are descendants of
1305 1307 start will be returned
1306 1308 if stop is specified, it will consider all the revs from stop
1307 1309 as if they had no children
1308 1310 """
1309 1311 if start is None and stop is None:
1310 1312 if not len(self):
1311 1313 return [nullid]
1312 1314 return [self.node(r) for r in self.headrevs()]
1313 1315
1314 1316 if start is None:
1315 1317 start = nullrev
1316 1318 else:
1317 1319 start = self.rev(start)
1318 1320
1319 1321 stoprevs = {self.rev(n) for n in stop or []}
1320 1322
1321 1323 revs = dagop.headrevssubset(
1322 1324 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1323 1325 )
1324 1326
1325 1327 return [self.node(rev) for rev in revs]
1326 1328
1327 1329 def children(self, node):
1328 1330 """find the children of a given node"""
1329 1331 c = []
1330 1332 p = self.rev(node)
1331 1333 for r in self.revs(start=p + 1):
1332 1334 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1333 1335 if prevs:
1334 1336 for pr in prevs:
1335 1337 if pr == p:
1336 1338 c.append(self.node(r))
1337 1339 elif p == nullrev:
1338 1340 c.append(self.node(r))
1339 1341 return c
1340 1342
1341 1343 def commonancestorsheads(self, a, b):
1342 1344 """calculate all the heads of the common ancestors of nodes a and b"""
1343 1345 a, b = self.rev(a), self.rev(b)
1344 1346 ancs = self._commonancestorsheads(a, b)
1345 1347 return pycompat.maplist(self.node, ancs)
1346 1348
1347 1349 def _commonancestorsheads(self, *revs):
1348 1350 """calculate all the heads of the common ancestors of revs"""
1349 1351 try:
1350 1352 ancs = self.index.commonancestorsheads(*revs)
1351 1353 except (AttributeError, OverflowError): # C implementation failed
1352 1354 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1353 1355 return ancs
1354 1356
1355 1357 def isancestor(self, a, b):
1356 1358 """return True if node a is an ancestor of node b
1357 1359
1358 1360 A revision is considered an ancestor of itself."""
1359 1361 a, b = self.rev(a), self.rev(b)
1360 1362 return self.isancestorrev(a, b)
1361 1363
1362 1364 def isancestorrev(self, a, b):
1363 1365 """return True if revision a is an ancestor of revision b
1364 1366
1365 1367 A revision is considered an ancestor of itself.
1366 1368
1367 1369 The implementation of this is trivial but the use of
1368 1370 reachableroots is not."""
1369 1371 if a == nullrev:
1370 1372 return True
1371 1373 elif a == b:
1372 1374 return True
1373 1375 elif a > b:
1374 1376 return False
1375 1377 return bool(self.reachableroots(a, [b], [a], includepath=False))
1376 1378
1377 1379 def reachableroots(self, minroot, heads, roots, includepath=False):
1378 1380 """return (heads(::(<roots> and <roots>::<heads>)))
1379 1381
1380 1382 If includepath is True, return (<roots>::<heads>)."""
1381 1383 try:
1382 1384 return self.index.reachableroots2(
1383 1385 minroot, heads, roots, includepath
1384 1386 )
1385 1387 except AttributeError:
1386 1388 return dagop._reachablerootspure(
1387 1389 self.parentrevs, minroot, roots, heads, includepath
1388 1390 )
1389 1391
1390 1392 def ancestor(self, a, b):
1391 1393 """calculate the "best" common ancestor of nodes a and b"""
1392 1394
1393 1395 a, b = self.rev(a), self.rev(b)
1394 1396 try:
1395 1397 ancs = self.index.ancestors(a, b)
1396 1398 except (AttributeError, OverflowError):
1397 1399 ancs = ancestor.ancestors(self.parentrevs, a, b)
1398 1400 if ancs:
1399 1401 # choose a consistent winner when there's a tie
1400 1402 return min(map(self.node, ancs))
1401 1403 return nullid
1402 1404
1403 1405 def _match(self, id):
1404 1406 if isinstance(id, int):
1405 1407 # rev
1406 1408 return self.node(id)
1407 1409 if len(id) == 20:
1408 1410 # possibly a binary node
1409 1411 # odds of a binary node being all hex in ASCII are 1 in 10**25
1410 1412 try:
1411 1413 node = id
1412 1414 self.rev(node) # quick search the index
1413 1415 return node
1414 1416 except error.LookupError:
1415 1417 pass # may be partial hex id
1416 1418 try:
1417 1419 # str(rev)
1418 1420 rev = int(id)
1419 1421 if b"%d" % rev != id:
1420 1422 raise ValueError
1421 1423 if rev < 0:
1422 1424 rev = len(self) + rev
1423 1425 if rev < 0 or rev >= len(self):
1424 1426 raise ValueError
1425 1427 return self.node(rev)
1426 1428 except (ValueError, OverflowError):
1427 1429 pass
1428 1430 if len(id) == 40:
1429 1431 try:
1430 1432 # a full hex nodeid?
1431 1433 node = bin(id)
1432 1434 self.rev(node)
1433 1435 return node
1434 1436 except (TypeError, error.LookupError):
1435 1437 pass
1436 1438
1437 1439 def _partialmatch(self, id):
1438 1440 # we don't care wdirfilenodeids as they should be always full hash
1439 1441 maybewdir = wdirhex.startswith(id)
1440 1442 try:
1441 1443 partial = self.index.partialmatch(id)
1442 1444 if partial and self.hasnode(partial):
1443 1445 if maybewdir:
1444 1446 # single 'ff...' match in radix tree, ambiguous with wdir
1445 1447 raise error.RevlogError
1446 1448 return partial
1447 1449 if maybewdir:
1448 1450 # no 'ff...' match in radix tree, wdir identified
1449 1451 raise error.WdirUnsupported
1450 1452 return None
1451 1453 except error.RevlogError:
1452 1454 # parsers.c radix tree lookup gave multiple matches
1453 1455 # fast path: for unfiltered changelog, radix tree is accurate
1454 1456 if not getattr(self, 'filteredrevs', None):
1455 1457 raise error.AmbiguousPrefixLookupError(
1456 1458 id, self.indexfile, _(b'ambiguous identifier')
1457 1459 )
1458 1460 # fall through to slow path that filters hidden revisions
1459 1461 except (AttributeError, ValueError):
1460 1462 # we are pure python, or key was too short to search radix tree
1461 1463 pass
1462 1464
1463 1465 if id in self._pcache:
1464 1466 return self._pcache[id]
1465 1467
1466 1468 if len(id) <= 40:
1467 1469 try:
1468 1470 # hex(node)[:...]
1469 1471 l = len(id) // 2 # grab an even number of digits
1470 1472 prefix = bin(id[: l * 2])
1471 1473 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1472 1474 nl = [
1473 1475 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1474 1476 ]
1475 1477 if nullhex.startswith(id):
1476 1478 nl.append(nullid)
1477 1479 if len(nl) > 0:
1478 1480 if len(nl) == 1 and not maybewdir:
1479 1481 self._pcache[id] = nl[0]
1480 1482 return nl[0]
1481 1483 raise error.AmbiguousPrefixLookupError(
1482 1484 id, self.indexfile, _(b'ambiguous identifier')
1483 1485 )
1484 1486 if maybewdir:
1485 1487 raise error.WdirUnsupported
1486 1488 return None
1487 1489 except TypeError:
1488 1490 pass
1489 1491
1490 1492 def lookup(self, id):
1491 1493 """locate a node based on:
1492 1494 - revision number or str(revision number)
1493 1495 - nodeid or subset of hex nodeid
1494 1496 """
1495 1497 n = self._match(id)
1496 1498 if n is not None:
1497 1499 return n
1498 1500 n = self._partialmatch(id)
1499 1501 if n:
1500 1502 return n
1501 1503
1502 1504 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1503 1505
1504 1506 def shortest(self, node, minlength=1):
1505 1507 """Find the shortest unambiguous prefix that matches node."""
1506 1508
1507 1509 def isvalid(prefix):
1508 1510 try:
1509 1511 matchednode = self._partialmatch(prefix)
1510 1512 except error.AmbiguousPrefixLookupError:
1511 1513 return False
1512 1514 except error.WdirUnsupported:
1513 1515 # single 'ff...' match
1514 1516 return True
1515 1517 if matchednode is None:
1516 1518 raise error.LookupError(node, self.indexfile, _(b'no node'))
1517 1519 return True
1518 1520
1519 1521 def maybewdir(prefix):
1520 1522 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1521 1523
1522 1524 hexnode = hex(node)
1523 1525
1524 1526 def disambiguate(hexnode, minlength):
1525 1527 """Disambiguate against wdirid."""
1526 1528 for length in range(minlength, len(hexnode) + 1):
1527 1529 prefix = hexnode[:length]
1528 1530 if not maybewdir(prefix):
1529 1531 return prefix
1530 1532
1531 1533 if not getattr(self, 'filteredrevs', None):
1532 1534 try:
1533 1535 length = max(self.index.shortest(node), minlength)
1534 1536 return disambiguate(hexnode, length)
1535 1537 except error.RevlogError:
1536 1538 if node != wdirid:
1537 1539 raise error.LookupError(node, self.indexfile, _(b'no node'))
1538 1540 except AttributeError:
1539 1541 # Fall through to pure code
1540 1542 pass
1541 1543
1542 1544 if node == wdirid:
1543 1545 for length in range(minlength, len(hexnode) + 1):
1544 1546 prefix = hexnode[:length]
1545 1547 if isvalid(prefix):
1546 1548 return prefix
1547 1549
1548 1550 for length in range(minlength, len(hexnode) + 1):
1549 1551 prefix = hexnode[:length]
1550 1552 if isvalid(prefix):
1551 1553 return disambiguate(hexnode, length)
1552 1554
1553 1555 def cmp(self, node, text):
1554 1556 """compare text with a given file revision
1555 1557
1556 1558 returns True if text is different than what is stored.
1557 1559 """
1558 1560 p1, p2 = self.parents(node)
1559 1561 return storageutil.hashrevisionsha1(text, p1, p2) != node
1560 1562
1561 1563 def _cachesegment(self, offset, data):
1562 1564 """Add a segment to the revlog cache.
1563 1565
1564 1566 Accepts an absolute offset and the data that is at that location.
1565 1567 """
1566 1568 o, d = self._chunkcache
1567 1569 # try to add to existing cache
1568 1570 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1569 1571 self._chunkcache = o, d + data
1570 1572 else:
1571 1573 self._chunkcache = offset, data
1572 1574
1573 1575 def _readsegment(self, offset, length, df=None):
1574 1576 """Load a segment of raw data from the revlog.
1575 1577
1576 1578 Accepts an absolute offset, length to read, and an optional existing
1577 1579 file handle to read from.
1578 1580
1579 1581 If an existing file handle is passed, it will be seeked and the
1580 1582 original seek position will NOT be restored.
1581 1583
1582 1584 Returns a str or buffer of raw byte data.
1583 1585
1584 1586 Raises if the requested number of bytes could not be read.
1585 1587 """
1586 1588 # Cache data both forward and backward around the requested
1587 1589 # data, in a fixed size window. This helps speed up operations
1588 1590 # involving reading the revlog backwards.
1589 1591 cachesize = self._chunkcachesize
1590 1592 realoffset = offset & ~(cachesize - 1)
1591 1593 reallength = (
1592 1594 (offset + length + cachesize) & ~(cachesize - 1)
1593 1595 ) - realoffset
1594 1596 with self._datareadfp(df) as df:
1595 1597 df.seek(realoffset)
1596 1598 d = df.read(reallength)
1597 1599
1598 1600 self._cachesegment(realoffset, d)
1599 1601 if offset != realoffset or reallength != length:
1600 1602 startoffset = offset - realoffset
1601 1603 if len(d) - startoffset < length:
1602 1604 raise error.RevlogError(
1603 1605 _(
1604 1606 b'partial read of revlog %s; expected %d bytes from '
1605 1607 b'offset %d, got %d'
1606 1608 )
1607 1609 % (
1608 1610 self.indexfile if self._inline else self.datafile,
1609 1611 length,
1610 1612 realoffset,
1611 1613 len(d) - startoffset,
1612 1614 )
1613 1615 )
1614 1616
1615 1617 return util.buffer(d, startoffset, length)
1616 1618
1617 1619 if len(d) < length:
1618 1620 raise error.RevlogError(
1619 1621 _(
1620 1622 b'partial read of revlog %s; expected %d bytes from offset '
1621 1623 b'%d, got %d'
1622 1624 )
1623 1625 % (
1624 1626 self.indexfile if self._inline else self.datafile,
1625 1627 length,
1626 1628 offset,
1627 1629 len(d),
1628 1630 )
1629 1631 )
1630 1632
1631 1633 return d
1632 1634
1633 1635 def _getsegment(self, offset, length, df=None):
1634 1636 """Obtain a segment of raw data from the revlog.
1635 1637
1636 1638 Accepts an absolute offset, length of bytes to obtain, and an
1637 1639 optional file handle to the already-opened revlog. If the file
1638 1640 handle is used, it's original seek position will not be preserved.
1639 1641
1640 1642 Requests for data may be returned from a cache.
1641 1643
1642 1644 Returns a str or a buffer instance of raw byte data.
1643 1645 """
1644 1646 o, d = self._chunkcache
1645 1647 l = len(d)
1646 1648
1647 1649 # is it in the cache?
1648 1650 cachestart = offset - o
1649 1651 cacheend = cachestart + length
1650 1652 if cachestart >= 0 and cacheend <= l:
1651 1653 if cachestart == 0 and cacheend == l:
1652 1654 return d # avoid a copy
1653 1655 return util.buffer(d, cachestart, cacheend - cachestart)
1654 1656
1655 1657 return self._readsegment(offset, length, df=df)
1656 1658
1657 1659 def _getsegmentforrevs(self, startrev, endrev, df=None):
1658 1660 """Obtain a segment of raw data corresponding to a range of revisions.
1659 1661
1660 1662 Accepts the start and end revisions and an optional already-open
1661 1663 file handle to be used for reading. If the file handle is read, its
1662 1664 seek position will not be preserved.
1663 1665
1664 1666 Requests for data may be satisfied by a cache.
1665 1667
1666 1668 Returns a 2-tuple of (offset, data) for the requested range of
1667 1669 revisions. Offset is the integer offset from the beginning of the
1668 1670 revlog and data is a str or buffer of the raw byte data.
1669 1671
1670 1672 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1671 1673 to determine where each revision's data begins and ends.
1672 1674 """
1673 1675 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1674 1676 # (functions are expensive).
1675 1677 index = self.index
1676 1678 istart = index[startrev]
1677 1679 start = int(istart[0] >> 16)
1678 1680 if startrev == endrev:
1679 1681 end = start + istart[1]
1680 1682 else:
1681 1683 iend = index[endrev]
1682 1684 end = int(iend[0] >> 16) + iend[1]
1683 1685
1684 1686 if self._inline:
1685 1687 start += (startrev + 1) * self._io.size
1686 1688 end += (endrev + 1) * self._io.size
1687 1689 length = end - start
1688 1690
1689 1691 return start, self._getsegment(start, length, df=df)
1690 1692
1691 1693 def _chunk(self, rev, df=None):
1692 1694 """Obtain a single decompressed chunk for a revision.
1693 1695
1694 1696 Accepts an integer revision and an optional already-open file handle
1695 1697 to be used for reading. If used, the seek position of the file will not
1696 1698 be preserved.
1697 1699
1698 1700 Returns a str holding uncompressed data for the requested revision.
1699 1701 """
1700 1702 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1701 1703
1702 1704 def _chunks(self, revs, df=None, targetsize=None):
1703 1705 """Obtain decompressed chunks for the specified revisions.
1704 1706
1705 1707 Accepts an iterable of numeric revisions that are assumed to be in
1706 1708 ascending order. Also accepts an optional already-open file handle
1707 1709 to be used for reading. If used, the seek position of the file will
1708 1710 not be preserved.
1709 1711
1710 1712 This function is similar to calling ``self._chunk()`` multiple times,
1711 1713 but is faster.
1712 1714
1713 1715 Returns a list with decompressed data for each requested revision.
1714 1716 """
1715 1717 if not revs:
1716 1718 return []
1717 1719 start = self.start
1718 1720 length = self.length
1719 1721 inline = self._inline
1720 1722 iosize = self._io.size
1721 1723 buffer = util.buffer
1722 1724
1723 1725 l = []
1724 1726 ladd = l.append
1725 1727
1726 1728 if not self._withsparseread:
1727 1729 slicedchunks = (revs,)
1728 1730 else:
1729 1731 slicedchunks = deltautil.slicechunk(
1730 1732 self, revs, targetsize=targetsize
1731 1733 )
1732 1734
1733 1735 for revschunk in slicedchunks:
1734 1736 firstrev = revschunk[0]
1735 1737 # Skip trailing revisions with empty diff
1736 1738 for lastrev in revschunk[::-1]:
1737 1739 if length(lastrev) != 0:
1738 1740 break
1739 1741
1740 1742 try:
1741 1743 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1742 1744 except OverflowError:
1743 1745 # issue4215 - we can't cache a run of chunks greater than
1744 1746 # 2G on Windows
1745 1747 return [self._chunk(rev, df=df) for rev in revschunk]
1746 1748
1747 1749 decomp = self.decompress
1748 1750 for rev in revschunk:
1749 1751 chunkstart = start(rev)
1750 1752 if inline:
1751 1753 chunkstart += (rev + 1) * iosize
1752 1754 chunklength = length(rev)
1753 1755 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1754 1756
1755 1757 return l
1756 1758
1757 1759 def _chunkclear(self):
1758 1760 """Clear the raw chunk cache."""
1759 1761 self._chunkcache = (0, b'')
1760 1762
1761 1763 def deltaparent(self, rev):
1762 1764 """return deltaparent of the given revision"""
1763 1765 base = self.index[rev][3]
1764 1766 if base == rev:
1765 1767 return nullrev
1766 1768 elif self._generaldelta:
1767 1769 return base
1768 1770 else:
1769 1771 return rev - 1
1770 1772
1771 1773 def issnapshot(self, rev):
1772 1774 """tells whether rev is a snapshot
1773 1775 """
1774 1776 if not self._sparserevlog:
1775 1777 return self.deltaparent(rev) == nullrev
1776 1778 elif util.safehasattr(self.index, b'issnapshot'):
1777 1779 # directly assign the method to cache the testing and access
1778 1780 self.issnapshot = self.index.issnapshot
1779 1781 return self.issnapshot(rev)
1780 1782 if rev == nullrev:
1781 1783 return True
1782 1784 entry = self.index[rev]
1783 1785 base = entry[3]
1784 1786 if base == rev:
1785 1787 return True
1786 1788 if base == nullrev:
1787 1789 return True
1788 1790 p1 = entry[5]
1789 1791 p2 = entry[6]
1790 1792 if base == p1 or base == p2:
1791 1793 return False
1792 1794 return self.issnapshot(base)
1793 1795
1794 1796 def snapshotdepth(self, rev):
1795 1797 """number of snapshot in the chain before this one"""
1796 1798 if not self.issnapshot(rev):
1797 1799 raise error.ProgrammingError(b'revision %d not a snapshot')
1798 1800 return len(self._deltachain(rev)[0]) - 1
1799 1801
1800 1802 def revdiff(self, rev1, rev2):
1801 1803 """return or calculate a delta between two revisions
1802 1804
1803 1805 The delta calculated is in binary form and is intended to be written to
1804 1806 revlog data directly. So this function needs raw revision data.
1805 1807 """
1806 1808 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1807 1809 return bytes(self._chunk(rev2))
1808 1810
1809 1811 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1810 1812
1811 1813 def _processflags(self, text, flags, operation, raw=False):
1812 1814 """deprecated entry point to access flag processors"""
1813 1815 msg = b'_processflag(...) use the specialized variant'
1814 1816 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1815 1817 if raw:
1816 1818 return text, flagutil.processflagsraw(self, text, flags)
1817 1819 elif operation == b'read':
1818 1820 return flagutil.processflagsread(self, text, flags)
1819 1821 else: # write operation
1820 1822 return flagutil.processflagswrite(self, text, flags)
1821 1823
1822 1824 def revision(self, nodeorrev, _df=None, raw=False):
1823 1825 """return an uncompressed revision of a given node or revision
1824 1826 number.
1825 1827
1826 1828 _df - an existing file handle to read from. (internal-only)
1827 1829 raw - an optional argument specifying if the revision data is to be
1828 1830 treated as raw data when applying flag transforms. 'raw' should be set
1829 1831 to True when generating changegroups or in debug commands.
1830 1832 """
1831 1833 if raw:
1832 1834 msg = (
1833 1835 b'revlog.revision(..., raw=True) is deprecated, '
1834 1836 b'use revlog.rawdata(...)'
1835 1837 )
1836 1838 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1837 1839 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1838 1840
1839 1841 def sidedata(self, nodeorrev, _df=None):
1840 1842 """a map of extra data related to the changeset but not part of the hash
1841 1843
1842 1844 This function currently return a dictionary. However, more advanced
1843 1845 mapping object will likely be used in the future for a more
1844 1846 efficient/lazy code.
1845 1847 """
1846 1848 return self._revisiondata(nodeorrev, _df)[1]
1847 1849
1848 1850 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1849 1851 # deal with <nodeorrev> argument type
1850 1852 if isinstance(nodeorrev, int):
1851 1853 rev = nodeorrev
1852 1854 node = self.node(rev)
1853 1855 else:
1854 1856 node = nodeorrev
1855 1857 rev = None
1856 1858
1857 1859 # fast path the special `nullid` rev
1858 1860 if node == nullid:
1859 1861 return b"", {}
1860 1862
1861 1863 # ``rawtext`` is the text as stored inside the revlog. Might be the
1862 1864 # revision or might need to be processed to retrieve the revision.
1863 1865 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1864 1866
1865 1867 if raw and validated:
1866 1868 # if we don't want to process the raw text and that raw
1867 1869 # text is cached, we can exit early.
1868 1870 return rawtext, {}
1869 1871 if rev is None:
1870 1872 rev = self.rev(node)
1871 1873 # the revlog's flag for this revision
1872 1874 # (usually alter its state or content)
1873 1875 flags = self.flags(rev)
1874 1876
1875 1877 if validated and flags == REVIDX_DEFAULT_FLAGS:
1876 1878 # no extra flags set, no flag processor runs, text = rawtext
1877 1879 return rawtext, {}
1878 1880
1879 1881 sidedata = {}
1880 1882 if raw:
1881 1883 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1882 1884 text = rawtext
1883 1885 else:
1884 1886 try:
1885 1887 r = flagutil.processflagsread(self, rawtext, flags)
1886 1888 except error.SidedataHashError as exc:
1887 1889 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1888 1890 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1889 1891 raise error.RevlogError(msg)
1890 1892 text, validatehash, sidedata = r
1891 1893 if validatehash:
1892 1894 self.checkhash(text, node, rev=rev)
1893 1895 if not validated:
1894 1896 self._revisioncache = (node, rev, rawtext)
1895 1897
1896 1898 return text, sidedata
1897 1899
1898 1900 def _rawtext(self, node, rev, _df=None):
1899 1901 """return the possibly unvalidated rawtext for a revision
1900 1902
1901 1903 returns (rev, rawtext, validated)
1902 1904 """
1903 1905
1904 1906 # revision in the cache (could be useful to apply delta)
1905 1907 cachedrev = None
1906 1908 # An intermediate text to apply deltas to
1907 1909 basetext = None
1908 1910
1909 1911 # Check if we have the entry in cache
1910 1912 # The cache entry looks like (node, rev, rawtext)
1911 1913 if self._revisioncache:
1912 1914 if self._revisioncache[0] == node:
1913 1915 return (rev, self._revisioncache[2], True)
1914 1916 cachedrev = self._revisioncache[1]
1915 1917
1916 1918 if rev is None:
1917 1919 rev = self.rev(node)
1918 1920
1919 1921 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1920 1922 if stopped:
1921 1923 basetext = self._revisioncache[2]
1922 1924
1923 1925 # drop cache to save memory, the caller is expected to
1924 1926 # update self._revisioncache after validating the text
1925 1927 self._revisioncache = None
1926 1928
1927 1929 targetsize = None
1928 1930 rawsize = self.index[rev][2]
1929 1931 if 0 <= rawsize:
1930 1932 targetsize = 4 * rawsize
1931 1933
1932 1934 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1933 1935 if basetext is None:
1934 1936 basetext = bytes(bins[0])
1935 1937 bins = bins[1:]
1936 1938
1937 1939 rawtext = mdiff.patches(basetext, bins)
1938 1940 del basetext # let us have a chance to free memory early
1939 1941 return (rev, rawtext, False)
1940 1942
1941 1943 def rawdata(self, nodeorrev, _df=None):
1942 1944 """return an uncompressed raw data of a given node or revision number.
1943 1945
1944 1946 _df - an existing file handle to read from. (internal-only)
1945 1947 """
1946 1948 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1947 1949
1948 1950 def hash(self, text, p1, p2):
1949 1951 """Compute a node hash.
1950 1952
1951 1953 Available as a function so that subclasses can replace the hash
1952 1954 as needed.
1953 1955 """
1954 1956 return storageutil.hashrevisionsha1(text, p1, p2)
1955 1957
1956 1958 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1957 1959 """Check node hash integrity.
1958 1960
1959 1961 Available as a function so that subclasses can extend hash mismatch
1960 1962 behaviors as needed.
1961 1963 """
1962 1964 try:
1963 1965 if p1 is None and p2 is None:
1964 1966 p1, p2 = self.parents(node)
1965 1967 if node != self.hash(text, p1, p2):
1966 1968 # Clear the revision cache on hash failure. The revision cache
1967 1969 # only stores the raw revision and clearing the cache does have
1968 1970 # the side-effect that we won't have a cache hit when the raw
1969 1971 # revision data is accessed. But this case should be rare and
1970 1972 # it is extra work to teach the cache about the hash
1971 1973 # verification state.
1972 1974 if self._revisioncache and self._revisioncache[0] == node:
1973 1975 self._revisioncache = None
1974 1976
1975 1977 revornode = rev
1976 1978 if revornode is None:
1977 1979 revornode = templatefilters.short(hex(node))
1978 1980 raise error.RevlogError(
1979 1981 _(b"integrity check failed on %s:%s")
1980 1982 % (self.indexfile, pycompat.bytestr(revornode))
1981 1983 )
1982 1984 except error.RevlogError:
1983 1985 if self._censorable and storageutil.iscensoredtext(text):
1984 1986 raise error.CensoredNodeError(self.indexfile, node, text)
1985 1987 raise
1986 1988
1987 1989 def _enforceinlinesize(self, tr, fp=None):
1988 1990 """Check if the revlog is too big for inline and convert if so.
1989 1991
1990 1992 This should be called after revisions are added to the revlog. If the
1991 1993 revlog has grown too large to be an inline revlog, it will convert it
1992 1994 to use multiple index and data files.
1993 1995 """
1994 1996 tiprev = len(self) - 1
1995 1997 if (
1996 1998 not self._inline
1997 1999 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1998 2000 ):
1999 2001 return
2000 2002
2001 2003 trinfo = tr.find(self.indexfile)
2002 2004 if trinfo is None:
2003 2005 raise error.RevlogError(
2004 2006 _(b"%s not found in the transaction") % self.indexfile
2005 2007 )
2006 2008
2007 2009 trindex = trinfo[2]
2008 2010 if trindex is not None:
2009 2011 dataoff = self.start(trindex)
2010 2012 else:
2011 2013 # revlog was stripped at start of transaction, use all leftover data
2012 2014 trindex = len(self) - 1
2013 2015 dataoff = self.end(tiprev)
2014 2016
2015 2017 tr.add(self.datafile, dataoff)
2016 2018
2017 2019 if fp:
2018 2020 fp.flush()
2019 2021 fp.close()
2020 2022 # We can't use the cached file handle after close(). So prevent
2021 2023 # its usage.
2022 2024 self._writinghandles = None
2023 2025
2024 2026 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2025 2027 for r in self:
2026 2028 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2027 2029
2028 2030 with self._indexfp(b'w') as fp:
2029 2031 self.version &= ~FLAG_INLINE_DATA
2030 2032 self._inline = False
2031 2033 io = self._io
2032 2034 for i in self:
2033 2035 e = io.packentry(self.index[i], self.node, self.version, i)
2034 2036 fp.write(e)
2035 2037
2036 2038 # the temp file replace the real index when we exit the context
2037 2039 # manager
2038 2040
2039 2041 tr.replace(self.indexfile, trindex * self._io.size)
2040 2042 nodemaputil.setup_persistent_nodemap(tr, self)
2041 2043 self._chunkclear()
2042 2044
2043 2045 def _nodeduplicatecallback(self, transaction, node):
2044 2046 """called when trying to add a node already stored.
2045 2047 """
2046 2048
2047 2049 def addrevision(
2048 2050 self,
2049 2051 text,
2050 2052 transaction,
2051 2053 link,
2052 2054 p1,
2053 2055 p2,
2054 2056 cachedelta=None,
2055 2057 node=None,
2056 2058 flags=REVIDX_DEFAULT_FLAGS,
2057 2059 deltacomputer=None,
2058 2060 sidedata=None,
2059 2061 ):
2060 2062 """add a revision to the log
2061 2063
2062 2064 text - the revision data to add
2063 2065 transaction - the transaction object used for rollback
2064 2066 link - the linkrev data to add
2065 2067 p1, p2 - the parent nodeids of the revision
2066 2068 cachedelta - an optional precomputed delta
2067 2069 node - nodeid of revision; typically node is not specified, and it is
2068 2070 computed by default as hash(text, p1, p2), however subclasses might
2069 2071 use different hashing method (and override checkhash() in such case)
2070 2072 flags - the known flags to set on the revision
2071 2073 deltacomputer - an optional deltacomputer instance shared between
2072 2074 multiple calls
2073 2075 """
2074 2076 if link == nullrev:
2075 2077 raise error.RevlogError(
2076 2078 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2077 2079 )
2078 2080
2079 2081 if sidedata is None:
2080 2082 sidedata = {}
2081 2083 flags = flags & ~REVIDX_SIDEDATA
2082 2084 elif not self.hassidedata:
2083 2085 raise error.ProgrammingError(
2084 2086 _(b"trying to add sidedata to a revlog who don't support them")
2085 2087 )
2086 2088 else:
2087 2089 flags |= REVIDX_SIDEDATA
2088 2090
2089 2091 if flags:
2090 2092 node = node or self.hash(text, p1, p2)
2091 2093
2092 2094 rawtext, validatehash = flagutil.processflagswrite(
2093 2095 self, text, flags, sidedata=sidedata
2094 2096 )
2095 2097
2096 2098 # If the flag processor modifies the revision data, ignore any provided
2097 2099 # cachedelta.
2098 2100 if rawtext != text:
2099 2101 cachedelta = None
2100 2102
2101 2103 if len(rawtext) > _maxentrysize:
2102 2104 raise error.RevlogError(
2103 2105 _(
2104 2106 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2105 2107 )
2106 2108 % (self.indexfile, len(rawtext))
2107 2109 )
2108 2110
2109 2111 node = node or self.hash(rawtext, p1, p2)
2110 2112 if self.index.has_node(node):
2111 2113 return node
2112 2114
2113 2115 if validatehash:
2114 2116 self.checkhash(rawtext, node, p1=p1, p2=p2)
2115 2117
2116 2118 return self.addrawrevision(
2117 2119 rawtext,
2118 2120 transaction,
2119 2121 link,
2120 2122 p1,
2121 2123 p2,
2122 2124 node,
2123 2125 flags,
2124 2126 cachedelta=cachedelta,
2125 2127 deltacomputer=deltacomputer,
2126 2128 )
2127 2129
2128 2130 def addrawrevision(
2129 2131 self,
2130 2132 rawtext,
2131 2133 transaction,
2132 2134 link,
2133 2135 p1,
2134 2136 p2,
2135 2137 node,
2136 2138 flags,
2137 2139 cachedelta=None,
2138 2140 deltacomputer=None,
2139 2141 ):
2140 2142 """add a raw revision with known flags, node and parents
2141 2143 useful when reusing a revision not stored in this revlog (ex: received
2142 2144 over wire, or read from an external bundle).
2143 2145 """
2144 2146 dfh = None
2145 2147 if not self._inline:
2146 2148 dfh = self._datafp(b"a+")
2147 2149 ifh = self._indexfp(b"a+")
2148 2150 try:
2149 2151 return self._addrevision(
2150 2152 node,
2151 2153 rawtext,
2152 2154 transaction,
2153 2155 link,
2154 2156 p1,
2155 2157 p2,
2156 2158 flags,
2157 2159 cachedelta,
2158 2160 ifh,
2159 2161 dfh,
2160 2162 deltacomputer=deltacomputer,
2161 2163 )
2162 2164 finally:
2163 2165 if dfh:
2164 2166 dfh.close()
2165 2167 ifh.close()
2166 2168
2167 2169 def compress(self, data):
2168 2170 """Generate a possibly-compressed representation of data."""
2169 2171 if not data:
2170 2172 return b'', data
2171 2173
2172 2174 compressed = self._compressor.compress(data)
2173 2175
2174 2176 if compressed:
2175 2177 # The revlog compressor added the header in the returned data.
2176 2178 return b'', compressed
2177 2179
2178 2180 if data[0:1] == b'\0':
2179 2181 return b'', data
2180 2182 return b'u', data
2181 2183
2182 2184 def decompress(self, data):
2183 2185 """Decompress a revlog chunk.
2184 2186
2185 2187 The chunk is expected to begin with a header identifying the
2186 2188 format type so it can be routed to an appropriate decompressor.
2187 2189 """
2188 2190 if not data:
2189 2191 return data
2190 2192
2191 2193 # Revlogs are read much more frequently than they are written and many
2192 2194 # chunks only take microseconds to decompress, so performance is
2193 2195 # important here.
2194 2196 #
2195 2197 # We can make a few assumptions about revlogs:
2196 2198 #
2197 2199 # 1) the majority of chunks will be compressed (as opposed to inline
2198 2200 # raw data).
2199 2201 # 2) decompressing *any* data will likely by at least 10x slower than
2200 2202 # returning raw inline data.
2201 2203 # 3) we want to prioritize common and officially supported compression
2202 2204 # engines
2203 2205 #
2204 2206 # It follows that we want to optimize for "decompress compressed data
2205 2207 # when encoded with common and officially supported compression engines"
2206 2208 # case over "raw data" and "data encoded by less common or non-official
2207 2209 # compression engines." That is why we have the inline lookup first
2208 2210 # followed by the compengines lookup.
2209 2211 #
2210 2212 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 2213 # compressed chunks. And this matters for changelog and manifest reads.
2212 2214 t = data[0:1]
2213 2215
2214 2216 if t == b'x':
2215 2217 try:
2216 2218 return _zlibdecompress(data)
2217 2219 except zlib.error as e:
2218 2220 raise error.RevlogError(
2219 2221 _(b'revlog decompress error: %s')
2220 2222 % stringutil.forcebytestr(e)
2221 2223 )
2222 2224 # '\0' is more common than 'u' so it goes first.
2223 2225 elif t == b'\0':
2224 2226 return data
2225 2227 elif t == b'u':
2226 2228 return util.buffer(data, 1)
2227 2229
2228 2230 try:
2229 2231 compressor = self._decompressors[t]
2230 2232 except KeyError:
2231 2233 try:
2232 2234 engine = util.compengines.forrevlogheader(t)
2233 2235 compressor = engine.revlogcompressor(self._compengineopts)
2234 2236 self._decompressors[t] = compressor
2235 2237 except KeyError:
2236 2238 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237 2239
2238 2240 return compressor.decompress(data)
2239 2241
2240 2242 def _addrevision(
2241 2243 self,
2242 2244 node,
2243 2245 rawtext,
2244 2246 transaction,
2245 2247 link,
2246 2248 p1,
2247 2249 p2,
2248 2250 flags,
2249 2251 cachedelta,
2250 2252 ifh,
2251 2253 dfh,
2252 2254 alwayscache=False,
2253 2255 deltacomputer=None,
2254 2256 ):
2255 2257 """internal function to add revisions to the log
2256 2258
2257 2259 see addrevision for argument descriptions.
2258 2260
2259 2261 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260 2262
2261 2263 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 2264 be used.
2263 2265
2264 2266 invariants:
2265 2267 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 2268 if both are set, they must correspond to each other.
2267 2269 """
2268 2270 if node == nullid:
2269 2271 raise error.RevlogError(
2270 2272 _(b"%s: attempt to add null revision") % self.indexfile
2271 2273 )
2272 2274 if node == wdirid or node in wdirfilenodeids:
2273 2275 raise error.RevlogError(
2274 2276 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 2277 )
2276 2278
2277 2279 if self._inline:
2278 2280 fh = ifh
2279 2281 else:
2280 2282 fh = dfh
2281 2283
2282 2284 btext = [rawtext]
2283 2285
2284 2286 curr = len(self)
2285 2287 prev = curr - 1
2286 2288 offset = self.end(prev)
2287 2289 p1r, p2r = self.rev(p1), self.rev(p2)
2288 2290
2289 2291 # full versions are inserted when the needed deltas
2290 2292 # become comparable to the uncompressed text
2291 2293 if rawtext is None:
2292 2294 # need rawtext size, before changed by flag processors, which is
2293 2295 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 2296 # logic that might remove metadata size.
2295 2297 textlen = mdiff.patchedsize(
2296 2298 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 2299 )
2298 2300 else:
2299 2301 textlen = len(rawtext)
2300 2302
2301 2303 if deltacomputer is None:
2302 2304 deltacomputer = deltautil.deltacomputer(self)
2303 2305
2304 2306 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305 2307
2306 2308 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307 2309
2308 2310 e = (
2309 2311 offset_type(offset, flags),
2310 2312 deltainfo.deltalen,
2311 2313 textlen,
2312 2314 deltainfo.base,
2313 2315 link,
2314 2316 p1r,
2315 2317 p2r,
2316 2318 node,
2317 2319 )
2318 2320 self.index.append(e)
2319 2321
2320 2322 entry = self._io.packentry(e, self.node, self.version, curr)
2321 2323 self._writeentry(
2322 2324 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 2325 )
2324 2326
2325 2327 rawtext = btext[0]
2326 2328
2327 2329 if alwayscache and rawtext is None:
2328 2330 rawtext = deltacomputer.buildtext(revinfo, fh)
2329 2331
2330 2332 if type(rawtext) == bytes: # only accept immutable objects
2331 2333 self._revisioncache = (node, curr, rawtext)
2332 2334 self._chainbasecache[curr] = deltainfo.chainbase
2333 2335 return node
2334 2336
2335 2337 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 2338 # Files opened in a+ mode have inconsistent behavior on various
2337 2339 # platforms. Windows requires that a file positioning call be made
2338 2340 # when the file handle transitions between reads and writes. See
2339 2341 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 2342 # platforms, Python or the platform itself can be buggy. Some versions
2341 2343 # of Solaris have been observed to not append at the end of the file
2342 2344 # if the file was seeked to before the end. See issue4943 for more.
2343 2345 #
2344 2346 # We work around this issue by inserting a seek() before writing.
2345 2347 # Note: This is likely not necessary on Python 3. However, because
2346 2348 # the file handle is reused for reads and may be seeked there, we need
2347 2349 # to be careful before changing this.
2348 2350 ifh.seek(0, os.SEEK_END)
2349 2351 if dfh:
2350 2352 dfh.seek(0, os.SEEK_END)
2351 2353
2352 2354 curr = len(self) - 1
2353 2355 if not self._inline:
2354 2356 transaction.add(self.datafile, offset)
2355 2357 transaction.add(self.indexfile, curr * len(entry))
2356 2358 if data[0]:
2357 2359 dfh.write(data[0])
2358 2360 dfh.write(data[1])
2359 2361 ifh.write(entry)
2360 2362 else:
2361 2363 offset += curr * self._io.size
2362 2364 transaction.add(self.indexfile, offset, curr)
2363 2365 ifh.write(entry)
2364 2366 ifh.write(data[0])
2365 2367 ifh.write(data[1])
2366 2368 self._enforceinlinesize(transaction, ifh)
2367 2369 nodemaputil.setup_persistent_nodemap(transaction, self)
2368 2370
2369 2371 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2370 2372 """
2371 2373 add a delta group
2372 2374
2373 2375 given a set of deltas, add them to the revision log. the
2374 2376 first delta is against its parent, which should be in our
2375 2377 log, the rest are against the previous delta.
2376 2378
2377 2379 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 2380 this revlog and the node that was added.
2379 2381 """
2380 2382
2381 2383 if self._writinghandles:
2382 2384 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383 2385
2384 2386 nodes = []
2385 2387
2386 2388 r = len(self)
2387 2389 end = 0
2388 2390 if r:
2389 2391 end = self.end(r - 1)
2390 2392 ifh = self._indexfp(b"a+")
2391 2393 isize = r * self._io.size
2392 2394 if self._inline:
2393 2395 transaction.add(self.indexfile, end + isize, r)
2394 2396 dfh = None
2395 2397 else:
2396 2398 transaction.add(self.indexfile, isize, r)
2397 2399 transaction.add(self.datafile, end)
2398 2400 dfh = self._datafp(b"a+")
2399 2401
2400 2402 def flush():
2401 2403 if dfh:
2402 2404 dfh.flush()
2403 2405 ifh.flush()
2404 2406
2405 2407 self._writinghandles = (ifh, dfh)
2406 2408
2407 2409 try:
2408 2410 deltacomputer = deltautil.deltacomputer(self)
2409 2411 # loop through our set of deltas
2410 2412 for data in deltas:
2411 2413 node, p1, p2, linknode, deltabase, delta, flags = data
2412 2414 link = linkmapper(linknode)
2413 2415 flags = flags or REVIDX_DEFAULT_FLAGS
2414 2416
2415 2417 nodes.append(node)
2416 2418
2417 2419 if self.index.has_node(node):
2418 2420 self._nodeduplicatecallback(transaction, node)
2419 2421 # this can happen if two branches make the same change
2420 2422 continue
2421 2423
2422 2424 for p in (p1, p2):
2423 2425 if not self.index.has_node(p):
2424 2426 raise error.LookupError(
2425 2427 p, self.indexfile, _(b'unknown parent')
2426 2428 )
2427 2429
2428 2430 if not self.index.has_node(deltabase):
2429 2431 raise error.LookupError(
2430 2432 deltabase, self.indexfile, _(b'unknown delta base')
2431 2433 )
2432 2434
2433 2435 baserev = self.rev(deltabase)
2434 2436
2435 2437 if baserev != nullrev and self.iscensored(baserev):
2436 2438 # if base is censored, delta must be full replacement in a
2437 2439 # single patch operation
2438 2440 hlen = struct.calcsize(b">lll")
2439 2441 oldlen = self.rawsize(baserev)
2440 2442 newlen = len(delta) - hlen
2441 2443 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2442 2444 raise error.CensoredBaseError(
2443 2445 self.indexfile, self.node(baserev)
2444 2446 )
2445 2447
2446 2448 if not flags and self._peek_iscensored(baserev, delta, flush):
2447 2449 flags |= REVIDX_ISCENSORED
2448 2450
2449 2451 # We assume consumers of addrevisioncb will want to retrieve
2450 2452 # the added revision, which will require a call to
2451 2453 # revision(). revision() will fast path if there is a cache
2452 2454 # hit. So, we tell _addrevision() to always cache in this case.
2453 2455 # We're only using addgroup() in the context of changegroup
2454 2456 # generation so the revision data can always be handled as raw
2455 2457 # by the flagprocessor.
2456 2458 self._addrevision(
2457 2459 node,
2458 2460 None,
2459 2461 transaction,
2460 2462 link,
2461 2463 p1,
2462 2464 p2,
2463 2465 flags,
2464 2466 (baserev, delta),
2465 2467 ifh,
2466 2468 dfh,
2467 2469 alwayscache=bool(addrevisioncb),
2468 2470 deltacomputer=deltacomputer,
2469 2471 )
2470 2472
2471 2473 if addrevisioncb:
2472 2474 addrevisioncb(self, node)
2473 2475
2474 2476 if not dfh and not self._inline:
2475 2477 # addrevision switched from inline to conventional
2476 2478 # reopen the index
2477 2479 ifh.close()
2478 2480 dfh = self._datafp(b"a+")
2479 2481 ifh = self._indexfp(b"a+")
2480 2482 self._writinghandles = (ifh, dfh)
2481 2483 finally:
2482 2484 self._writinghandles = None
2483 2485
2484 2486 if dfh:
2485 2487 dfh.close()
2486 2488 ifh.close()
2487 2489
2488 2490 return nodes
2489 2491
2490 2492 def iscensored(self, rev):
2491 2493 """Check if a file revision is censored."""
2492 2494 if not self._censorable:
2493 2495 return False
2494 2496
2495 2497 return self.flags(rev) & REVIDX_ISCENSORED
2496 2498
2497 2499 def _peek_iscensored(self, baserev, delta, flush):
2498 2500 """Quickly check if a delta produces a censored revision."""
2499 2501 if not self._censorable:
2500 2502 return False
2501 2503
2502 2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2503 2505
2504 2506 def getstrippoint(self, minlink):
2505 2507 """find the minimum rev that must be stripped to strip the linkrev
2506 2508
2507 2509 Returns a tuple containing the minimum rev and a set of all revs that
2508 2510 have linkrevs that will be broken by this strip.
2509 2511 """
2510 2512 return storageutil.resolvestripinfo(
2511 2513 minlink,
2512 2514 len(self) - 1,
2513 2515 self.headrevs(),
2514 2516 self.linkrev,
2515 2517 self.parentrevs,
2516 2518 )
2517 2519
2518 2520 def strip(self, minlink, transaction):
2519 2521 """truncate the revlog on the first revision with a linkrev >= minlink
2520 2522
2521 2523 This function is called when we're stripping revision minlink and
2522 2524 its descendants from the repository.
2523 2525
2524 2526 We have to remove all revisions with linkrev >= minlink, because
2525 2527 the equivalent changelog revisions will be renumbered after the
2526 2528 strip.
2527 2529
2528 2530 So we truncate the revlog on the first of these revisions, and
2529 2531 trust that the caller has saved the revisions that shouldn't be
2530 2532 removed and that it'll re-add them after this truncation.
2531 2533 """
2532 2534 if len(self) == 0:
2533 2535 return
2534 2536
2535 2537 rev, _ = self.getstrippoint(minlink)
2536 2538 if rev == len(self):
2537 2539 return
2538 2540
2539 2541 # first truncate the files on disk
2540 2542 end = self.start(rev)
2541 2543 if not self._inline:
2542 2544 transaction.add(self.datafile, end)
2543 2545 end = rev * self._io.size
2544 2546 else:
2545 2547 end += rev * self._io.size
2546 2548
2547 2549 transaction.add(self.indexfile, end)
2548 2550
2549 2551 # then reset internal state in memory to forget those revisions
2550 2552 self._revisioncache = None
2551 2553 self._chaininfocache = {}
2552 2554 self._chunkclear()
2553 2555
2554 2556 del self.index[rev:-1]
2555 2557
2556 2558 def checksize(self):
2557 2559 """Check size of index and data files
2558 2560
2559 2561 return a (dd, di) tuple.
2560 2562 - dd: extra bytes for the "data" file
2561 2563 - di: extra bytes for the "index" file
2562 2564
2563 2565 A healthy revlog will return (0, 0).
2564 2566 """
2565 2567 expected = 0
2566 2568 if len(self):
2567 2569 expected = max(0, self.end(len(self) - 1))
2568 2570
2569 2571 try:
2570 2572 with self._datafp() as f:
2571 2573 f.seek(0, io.SEEK_END)
2572 2574 actual = f.tell()
2573 2575 dd = actual - expected
2574 2576 except IOError as inst:
2575 2577 if inst.errno != errno.ENOENT:
2576 2578 raise
2577 2579 dd = 0
2578 2580
2579 2581 try:
2580 2582 f = self.opener(self.indexfile)
2581 2583 f.seek(0, io.SEEK_END)
2582 2584 actual = f.tell()
2583 2585 f.close()
2584 2586 s = self._io.size
2585 2587 i = max(0, actual // s)
2586 2588 di = actual - (i * s)
2587 2589 if self._inline:
2588 2590 databytes = 0
2589 2591 for r in self:
2590 2592 databytes += max(0, self.length(r))
2591 2593 dd = 0
2592 2594 di = actual - len(self) * s - databytes
2593 2595 except IOError as inst:
2594 2596 if inst.errno != errno.ENOENT:
2595 2597 raise
2596 2598 di = 0
2597 2599
2598 2600 return (dd, di)
2599 2601
2600 2602 def files(self):
2601 2603 res = [self.indexfile]
2602 2604 if not self._inline:
2603 2605 res.append(self.datafile)
2604 2606 return res
2605 2607
2606 2608 def emitrevisions(
2607 2609 self,
2608 2610 nodes,
2609 2611 nodesorder=None,
2610 2612 revisiondata=False,
2611 2613 assumehaveparentrevisions=False,
2612 2614 deltamode=repository.CG_DELTAMODE_STD,
2613 2615 ):
2614 2616 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2615 2617 raise error.ProgrammingError(
2616 2618 b'unhandled value for nodesorder: %s' % nodesorder
2617 2619 )
2618 2620
2619 2621 if nodesorder is None and not self._generaldelta:
2620 2622 nodesorder = b'storage'
2621 2623
2622 2624 if (
2623 2625 not self._storedeltachains
2624 2626 and deltamode != repository.CG_DELTAMODE_PREV
2625 2627 ):
2626 2628 deltamode = repository.CG_DELTAMODE_FULL
2627 2629
2628 2630 return storageutil.emitrevisions(
2629 2631 self,
2630 2632 nodes,
2631 2633 nodesorder,
2632 2634 revlogrevisiondelta,
2633 2635 deltaparentfn=self.deltaparent,
2634 2636 candeltafn=self.candelta,
2635 2637 rawsizefn=self.rawsize,
2636 2638 revdifffn=self.revdiff,
2637 2639 flagsfn=self.flags,
2638 2640 deltamode=deltamode,
2639 2641 revisiondata=revisiondata,
2640 2642 assumehaveparentrevisions=assumehaveparentrevisions,
2641 2643 )
2642 2644
2643 2645 DELTAREUSEALWAYS = b'always'
2644 2646 DELTAREUSESAMEREVS = b'samerevs'
2645 2647 DELTAREUSENEVER = b'never'
2646 2648
2647 2649 DELTAREUSEFULLADD = b'fulladd'
2648 2650
2649 2651 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2650 2652
2651 2653 def clone(
2652 2654 self,
2653 2655 tr,
2654 2656 destrevlog,
2655 2657 addrevisioncb=None,
2656 2658 deltareuse=DELTAREUSESAMEREVS,
2657 2659 forcedeltabothparents=None,
2658 2660 sidedatacompanion=None,
2659 2661 ):
2660 2662 """Copy this revlog to another, possibly with format changes.
2661 2663
2662 2664 The destination revlog will contain the same revisions and nodes.
2663 2665 However, it may not be bit-for-bit identical due to e.g. delta encoding
2664 2666 differences.
2665 2667
2666 2668 The ``deltareuse`` argument control how deltas from the existing revlog
2667 2669 are preserved in the destination revlog. The argument can have the
2668 2670 following values:
2669 2671
2670 2672 DELTAREUSEALWAYS
2671 2673 Deltas will always be reused (if possible), even if the destination
2672 2674 revlog would not select the same revisions for the delta. This is the
2673 2675 fastest mode of operation.
2674 2676 DELTAREUSESAMEREVS
2675 2677 Deltas will be reused if the destination revlog would pick the same
2676 2678 revisions for the delta. This mode strikes a balance between speed
2677 2679 and optimization.
2678 2680 DELTAREUSENEVER
2679 2681 Deltas will never be reused. This is the slowest mode of execution.
2680 2682 This mode can be used to recompute deltas (e.g. if the diff/delta
2681 2683 algorithm changes).
2682 2684 DELTAREUSEFULLADD
2683 2685 Revision will be re-added as if their were new content. This is
2684 2686 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2685 2687 eg: large file detection and handling.
2686 2688
2687 2689 Delta computation can be slow, so the choice of delta reuse policy can
2688 2690 significantly affect run time.
2689 2691
2690 2692 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2691 2693 two extremes. Deltas will be reused if they are appropriate. But if the
2692 2694 delta could choose a better revision, it will do so. This means if you
2693 2695 are converting a non-generaldelta revlog to a generaldelta revlog,
2694 2696 deltas will be recomputed if the delta's parent isn't a parent of the
2695 2697 revision.
2696 2698
2697 2699 In addition to the delta policy, the ``forcedeltabothparents``
2698 2700 argument controls whether to force compute deltas against both parents
2699 2701 for merges. By default, the current default is used.
2700 2702
2701 2703 If not None, the `sidedatacompanion` is callable that accept two
2702 2704 arguments:
2703 2705
2704 2706 (srcrevlog, rev)
2705 2707
2706 2708 and return a triplet that control changes to sidedata content from the
2707 2709 old revision to the new clone result:
2708 2710
2709 2711 (dropall, filterout, update)
2710 2712
2711 2713 * if `dropall` is True, all sidedata should be dropped
2712 2714 * `filterout` is a set of sidedata keys that should be dropped
2713 2715 * `update` is a mapping of additionnal/new key -> value
2714 2716 """
2715 2717 if deltareuse not in self.DELTAREUSEALL:
2716 2718 raise ValueError(
2717 2719 _(b'value for deltareuse invalid: %s') % deltareuse
2718 2720 )
2719 2721
2720 2722 if len(destrevlog):
2721 2723 raise ValueError(_(b'destination revlog is not empty'))
2722 2724
2723 2725 if getattr(self, 'filteredrevs', None):
2724 2726 raise ValueError(_(b'source revlog has filtered revisions'))
2725 2727 if getattr(destrevlog, 'filteredrevs', None):
2726 2728 raise ValueError(_(b'destination revlog has filtered revisions'))
2727 2729
2728 2730 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2729 2731 # if possible.
2730 2732 oldlazydelta = destrevlog._lazydelta
2731 2733 oldlazydeltabase = destrevlog._lazydeltabase
2732 2734 oldamd = destrevlog._deltabothparents
2733 2735
2734 2736 try:
2735 2737 if deltareuse == self.DELTAREUSEALWAYS:
2736 2738 destrevlog._lazydeltabase = True
2737 2739 destrevlog._lazydelta = True
2738 2740 elif deltareuse == self.DELTAREUSESAMEREVS:
2739 2741 destrevlog._lazydeltabase = False
2740 2742 destrevlog._lazydelta = True
2741 2743 elif deltareuse == self.DELTAREUSENEVER:
2742 2744 destrevlog._lazydeltabase = False
2743 2745 destrevlog._lazydelta = False
2744 2746
2745 2747 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2746 2748
2747 2749 self._clone(
2748 2750 tr,
2749 2751 destrevlog,
2750 2752 addrevisioncb,
2751 2753 deltareuse,
2752 2754 forcedeltabothparents,
2753 2755 sidedatacompanion,
2754 2756 )
2755 2757
2756 2758 finally:
2757 2759 destrevlog._lazydelta = oldlazydelta
2758 2760 destrevlog._lazydeltabase = oldlazydeltabase
2759 2761 destrevlog._deltabothparents = oldamd
2760 2762
2761 2763 def _clone(
2762 2764 self,
2763 2765 tr,
2764 2766 destrevlog,
2765 2767 addrevisioncb,
2766 2768 deltareuse,
2767 2769 forcedeltabothparents,
2768 2770 sidedatacompanion,
2769 2771 ):
2770 2772 """perform the core duty of `revlog.clone` after parameter processing"""
2771 2773 deltacomputer = deltautil.deltacomputer(destrevlog)
2772 2774 index = self.index
2773 2775 for rev in self:
2774 2776 entry = index[rev]
2775 2777
2776 2778 # Some classes override linkrev to take filtered revs into
2777 2779 # account. Use raw entry from index.
2778 2780 flags = entry[0] & 0xFFFF
2779 2781 linkrev = entry[4]
2780 2782 p1 = index[entry[5]][7]
2781 2783 p2 = index[entry[6]][7]
2782 2784 node = entry[7]
2783 2785
2784 2786 sidedataactions = (False, [], {})
2785 2787 if sidedatacompanion is not None:
2786 2788 sidedataactions = sidedatacompanion(self, rev)
2787 2789
2788 2790 # (Possibly) reuse the delta from the revlog if allowed and
2789 2791 # the revlog chunk is a delta.
2790 2792 cachedelta = None
2791 2793 rawtext = None
2792 2794 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2793 2795 dropall, filterout, update = sidedataactions
2794 2796 text, sidedata = self._revisiondata(rev)
2795 2797 if dropall:
2796 2798 sidedata = {}
2797 2799 for key in filterout:
2798 2800 sidedata.pop(key, None)
2799 2801 sidedata.update(update)
2800 2802 if not sidedata:
2801 2803 sidedata = None
2802 2804 destrevlog.addrevision(
2803 2805 text,
2804 2806 tr,
2805 2807 linkrev,
2806 2808 p1,
2807 2809 p2,
2808 2810 cachedelta=cachedelta,
2809 2811 node=node,
2810 2812 flags=flags,
2811 2813 deltacomputer=deltacomputer,
2812 2814 sidedata=sidedata,
2813 2815 )
2814 2816 else:
2815 2817 if destrevlog._lazydelta:
2816 2818 dp = self.deltaparent(rev)
2817 2819 if dp != nullrev:
2818 2820 cachedelta = (dp, bytes(self._chunk(rev)))
2819 2821
2820 2822 if not cachedelta:
2821 2823 rawtext = self.rawdata(rev)
2822 2824
2823 2825 ifh = destrevlog.opener(
2824 2826 destrevlog.indexfile, b'a+', checkambig=False
2825 2827 )
2826 2828 dfh = None
2827 2829 if not destrevlog._inline:
2828 2830 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2829 2831 try:
2830 2832 destrevlog._addrevision(
2831 2833 node,
2832 2834 rawtext,
2833 2835 tr,
2834 2836 linkrev,
2835 2837 p1,
2836 2838 p2,
2837 2839 flags,
2838 2840 cachedelta,
2839 2841 ifh,
2840 2842 dfh,
2841 2843 deltacomputer=deltacomputer,
2842 2844 )
2843 2845 finally:
2844 2846 if dfh:
2845 2847 dfh.close()
2846 2848 ifh.close()
2847 2849
2848 2850 if addrevisioncb:
2849 2851 addrevisioncb(self, rev, node)
2850 2852
2851 2853 def censorrevision(self, tr, censornode, tombstone=b''):
2852 2854 if (self.version & 0xFFFF) == REVLOGV0:
2853 2855 raise error.RevlogError(
2854 2856 _(b'cannot censor with version %d revlogs') % self.version
2855 2857 )
2856 2858
2857 2859 censorrev = self.rev(censornode)
2858 2860 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2859 2861
2860 2862 if len(tombstone) > self.rawsize(censorrev):
2861 2863 raise error.Abort(
2862 2864 _(b'censor tombstone must be no longer than censored data')
2863 2865 )
2864 2866
2865 2867 # Rewriting the revlog in place is hard. Our strategy for censoring is
2866 2868 # to create a new revlog, copy all revisions to it, then replace the
2867 2869 # revlogs on transaction close.
2868 2870
2869 2871 newindexfile = self.indexfile + b'.tmpcensored'
2870 2872 newdatafile = self.datafile + b'.tmpcensored'
2871 2873
2872 2874 # This is a bit dangerous. We could easily have a mismatch of state.
2873 2875 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2874 2876 newrl.version = self.version
2875 2877 newrl._generaldelta = self._generaldelta
2876 2878 newrl._io = self._io
2877 2879
2878 2880 for rev in self.revs():
2879 2881 node = self.node(rev)
2880 2882 p1, p2 = self.parents(node)
2881 2883
2882 2884 if rev == censorrev:
2883 2885 newrl.addrawrevision(
2884 2886 tombstone,
2885 2887 tr,
2886 2888 self.linkrev(censorrev),
2887 2889 p1,
2888 2890 p2,
2889 2891 censornode,
2890 2892 REVIDX_ISCENSORED,
2891 2893 )
2892 2894
2893 2895 if newrl.deltaparent(rev) != nullrev:
2894 2896 raise error.Abort(
2895 2897 _(
2896 2898 b'censored revision stored as delta; '
2897 2899 b'cannot censor'
2898 2900 ),
2899 2901 hint=_(
2900 2902 b'censoring of revlogs is not '
2901 2903 b'fully implemented; please report '
2902 2904 b'this bug'
2903 2905 ),
2904 2906 )
2905 2907 continue
2906 2908
2907 2909 if self.iscensored(rev):
2908 2910 if self.deltaparent(rev) != nullrev:
2909 2911 raise error.Abort(
2910 2912 _(
2911 2913 b'cannot censor due to censored '
2912 2914 b'revision having delta stored'
2913 2915 )
2914 2916 )
2915 2917 rawtext = self._chunk(rev)
2916 2918 else:
2917 2919 rawtext = self.rawdata(rev)
2918 2920
2919 2921 newrl.addrawrevision(
2920 2922 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2921 2923 )
2922 2924
2923 2925 tr.addbackup(self.indexfile, location=b'store')
2924 2926 if not self._inline:
2925 2927 tr.addbackup(self.datafile, location=b'store')
2926 2928
2927 2929 self.opener.rename(newrl.indexfile, self.indexfile)
2928 2930 if not self._inline:
2929 2931 self.opener.rename(newrl.datafile, self.datafile)
2930 2932
2931 2933 self.clearcaches()
2932 2934 self._loadindex()
2933 2935
2934 2936 def verifyintegrity(self, state):
2935 2937 """Verifies the integrity of the revlog.
2936 2938
2937 2939 Yields ``revlogproblem`` instances describing problems that are
2938 2940 found.
2939 2941 """
2940 2942 dd, di = self.checksize()
2941 2943 if dd:
2942 2944 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2943 2945 if di:
2944 2946 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2945 2947
2946 2948 version = self.version & 0xFFFF
2947 2949
2948 2950 # The verifier tells us what version revlog we should be.
2949 2951 if version != state[b'expectedversion']:
2950 2952 yield revlogproblem(
2951 2953 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2952 2954 % (self.indexfile, version, state[b'expectedversion'])
2953 2955 )
2954 2956
2955 2957 state[b'skipread'] = set()
2956 2958 state[b'safe_renamed'] = set()
2957 2959
2958 2960 for rev in self:
2959 2961 node = self.node(rev)
2960 2962
2961 2963 # Verify contents. 4 cases to care about:
2962 2964 #
2963 2965 # common: the most common case
2964 2966 # rename: with a rename
2965 2967 # meta: file content starts with b'\1\n', the metadata
2966 2968 # header defined in filelog.py, but without a rename
2967 2969 # ext: content stored externally
2968 2970 #
2969 2971 # More formally, their differences are shown below:
2970 2972 #
2971 2973 # | common | rename | meta | ext
2972 2974 # -------------------------------------------------------
2973 2975 # flags() | 0 | 0 | 0 | not 0
2974 2976 # renamed() | False | True | False | ?
2975 2977 # rawtext[0:2]=='\1\n'| False | True | True | ?
2976 2978 #
2977 2979 # "rawtext" means the raw text stored in revlog data, which
2978 2980 # could be retrieved by "rawdata(rev)". "text"
2979 2981 # mentioned below is "revision(rev)".
2980 2982 #
2981 2983 # There are 3 different lengths stored physically:
2982 2984 # 1. L1: rawsize, stored in revlog index
2983 2985 # 2. L2: len(rawtext), stored in revlog data
2984 2986 # 3. L3: len(text), stored in revlog data if flags==0, or
2985 2987 # possibly somewhere else if flags!=0
2986 2988 #
2987 2989 # L1 should be equal to L2. L3 could be different from them.
2988 2990 # "text" may or may not affect commit hash depending on flag
2989 2991 # processors (see flagutil.addflagprocessor).
2990 2992 #
2991 2993 # | common | rename | meta | ext
2992 2994 # -------------------------------------------------
2993 2995 # rawsize() | L1 | L1 | L1 | L1
2994 2996 # size() | L1 | L2-LM | L1(*) | L1 (?)
2995 2997 # len(rawtext) | L2 | L2 | L2 | L2
2996 2998 # len(text) | L2 | L2 | L2 | L3
2997 2999 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2998 3000 #
2999 3001 # LM: length of metadata, depending on rawtext
3000 3002 # (*): not ideal, see comment in filelog.size
3001 3003 # (?): could be "- len(meta)" if the resolved content has
3002 3004 # rename metadata
3003 3005 #
3004 3006 # Checks needed to be done:
3005 3007 # 1. length check: L1 == L2, in all cases.
3006 3008 # 2. hash check: depending on flag processor, we may need to
3007 3009 # use either "text" (external), or "rawtext" (in revlog).
3008 3010
3009 3011 try:
3010 3012 skipflags = state.get(b'skipflags', 0)
3011 3013 if skipflags:
3012 3014 skipflags &= self.flags(rev)
3013 3015
3014 3016 _verify_revision(self, skipflags, state, node)
3015 3017
3016 3018 l1 = self.rawsize(rev)
3017 3019 l2 = len(self.rawdata(node))
3018 3020
3019 3021 if l1 != l2:
3020 3022 yield revlogproblem(
3021 3023 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3022 3024 node=node,
3023 3025 )
3024 3026
3025 3027 except error.CensoredNodeError:
3026 3028 if state[b'erroroncensored']:
3027 3029 yield revlogproblem(
3028 3030 error=_(b'censored file data'), node=node
3029 3031 )
3030 3032 state[b'skipread'].add(node)
3031 3033 except Exception as e:
3032 3034 yield revlogproblem(
3033 3035 error=_(b'unpacking %s: %s')
3034 3036 % (short(node), stringutil.forcebytestr(e)),
3035 3037 node=node,
3036 3038 )
3037 3039 state[b'skipread'].add(node)
3038 3040
3039 3041 def storageinfo(
3040 3042 self,
3041 3043 exclusivefiles=False,
3042 3044 sharedfiles=False,
3043 3045 revisionscount=False,
3044 3046 trackedsize=False,
3045 3047 storedsize=False,
3046 3048 ):
3047 3049 d = {}
3048 3050
3049 3051 if exclusivefiles:
3050 3052 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3051 3053 if not self._inline:
3052 3054 d[b'exclusivefiles'].append((self.opener, self.datafile))
3053 3055
3054 3056 if sharedfiles:
3055 3057 d[b'sharedfiles'] = []
3056 3058
3057 3059 if revisionscount:
3058 3060 d[b'revisionscount'] = len(self)
3059 3061
3060 3062 if trackedsize:
3061 3063 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3062 3064
3063 3065 if storedsize:
3064 3066 d[b'storedsize'] = sum(
3065 3067 self.opener.stat(path).st_size for path in self.files()
3066 3068 )
3067 3069
3068 3070 return d
@@ -1,57 +1,60 b''
1 1 # revlogdeltas.py - constant used for revlog logic
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 # Copyright 2018 Octobus <contact@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8 """Helper class to compute deltas stored inside revlogs"""
9 9
10 10 from __future__ import absolute_import
11 11
12 12 from ..interfaces import repository
13 13
14 14 # revlog header flags
15 15 REVLOGV0 = 0
16 16 REVLOGV1 = 1
17 17 # Dummy value until file format is finalized.
18 18 # Reminder: change the bounds check in revlog.__init__ when this is changed.
19 19 REVLOGV2 = 0xDEAD
20 20 # Shared across v1 and v2.
21 21 FLAG_INLINE_DATA = 1 << 16
22 22 # Only used by v1, implied by v2.
23 23 FLAG_GENERALDELTA = 1 << 17
24 24 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
25 25 REVLOG_DEFAULT_FORMAT = REVLOGV1
26 26 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
27 27 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
28 28 REVLOGV2_FLAGS = FLAG_INLINE_DATA
29 29
30 30 # revlog index flags
31 31
32 32 # For historical reasons, revlog's internal flags were exposed via the
33 33 # wire protocol and are even exposed in parts of the storage APIs.
34 34
35 35 # revision has censor metadata, must be verified
36 36 REVIDX_ISCENSORED = repository.REVISION_FLAG_CENSORED
37 37 # revision hash does not match data (narrowhg)
38 38 REVIDX_ELLIPSIS = repository.REVISION_FLAG_ELLIPSIS
39 39 # revision data is stored externally
40 40 REVIDX_EXTSTORED = repository.REVISION_FLAG_EXTSTORED
41 41 # revision data contains extra metadata not part of the official digest
42 42 REVIDX_SIDEDATA = repository.REVISION_FLAG_SIDEDATA
43 # revision changes files in a way that could affect copy tracing.
44 REVIDX_HASCOPIESINFO = repository.REVISION_FLAG_HASCOPIESINFO
43 45 REVIDX_DEFAULT_FLAGS = 0
44 46 # stable order in which flags need to be processed and their processors applied
45 47 REVIDX_FLAGS_ORDER = [
46 48 REVIDX_ISCENSORED,
47 49 REVIDX_ELLIPSIS,
48 50 REVIDX_EXTSTORED,
49 51 REVIDX_SIDEDATA,
52 REVIDX_HASCOPIESINFO,
50 53 ]
51 54
52 55 # bitmark for flags that could cause rawdata content change
53 56 REVIDX_RAWTEXT_CHANGING_FLAGS = (
54 57 REVIDX_ISCENSORED | REVIDX_EXTSTORED | REVIDX_SIDEDATA
55 58 )
56 59
57 60 SPARSE_REVLOG_MAX_CHAIN_LENGTH = 1000
@@ -1,192 +1,195 b''
1 1 # flagutils.py - code to deal with revlog flags and their processors
2 2 #
3 3 # Copyright 2016 Remi Chaintron <remi@fb.com>
4 4 # Copyright 2016-2019 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 from ..i18n import _
12 12
13 13 from .constants import (
14 14 REVIDX_DEFAULT_FLAGS,
15 15 REVIDX_ELLIPSIS,
16 16 REVIDX_EXTSTORED,
17 17 REVIDX_FLAGS_ORDER,
18 REVIDX_HASCOPIESINFO,
18 19 REVIDX_ISCENSORED,
19 20 REVIDX_RAWTEXT_CHANGING_FLAGS,
20 21 REVIDX_SIDEDATA,
21 22 )
22 23
23 24 from .. import error, util
24 25
25 26 # blanked usage of all the name to prevent pyflakes constraints
26 27 # We need these name available in the module for extensions.
27 28 REVIDX_ISCENSORED
28 29 REVIDX_ELLIPSIS
29 30 REVIDX_EXTSTORED
30 31 REVIDX_SIDEDATA
32 REVIDX_HASCOPIESINFO,
31 33 REVIDX_DEFAULT_FLAGS
32 34 REVIDX_FLAGS_ORDER
33 35 REVIDX_RAWTEXT_CHANGING_FLAGS
34 36
35 37 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
36 38
37 39 # Store flag processors (cf. 'addflagprocessor()' to register)
38 40 flagprocessors = {
39 41 REVIDX_ISCENSORED: None,
42 REVIDX_HASCOPIESINFO: None,
40 43 }
41 44
42 45
43 46 def addflagprocessor(flag, processor):
44 47 """Register a flag processor on a revision data flag.
45 48
46 49 Invariant:
47 50 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
48 51 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
49 52 - Only one flag processor can be registered on a specific flag.
50 53 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
51 54 following signatures:
52 55 - (read) f(self, rawtext) -> text, bool
53 56 - (write) f(self, text) -> rawtext, bool
54 57 - (raw) f(self, rawtext) -> bool
55 58 "text" is presented to the user. "rawtext" is stored in revlog data, not
56 59 directly visible to the user.
57 60 The boolean returned by these transforms is used to determine whether
58 61 the returned text can be used for hash integrity checking. For example,
59 62 if "write" returns False, then "text" is used to generate hash. If
60 63 "write" returns True, that basically means "rawtext" returned by "write"
61 64 should be used to generate hash. Usually, "write" and "read" return
62 65 different booleans. And "raw" returns a same boolean as "write".
63 66
64 67 Note: The 'raw' transform is used for changegroup generation and in some
65 68 debug commands. In this case the transform only indicates whether the
66 69 contents can be used for hash integrity checks.
67 70 """
68 71 insertflagprocessor(flag, processor, flagprocessors)
69 72
70 73
71 74 def insertflagprocessor(flag, processor, flagprocessors):
72 75 if not flag & REVIDX_KNOWN_FLAGS:
73 76 msg = _(b"cannot register processor on unknown flag '%#x'.") % flag
74 77 raise error.ProgrammingError(msg)
75 78 if flag not in REVIDX_FLAGS_ORDER:
76 79 msg = _(b"flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % flag
77 80 raise error.ProgrammingError(msg)
78 81 if flag in flagprocessors:
79 82 msg = _(b"cannot register multiple processors on flag '%#x'.") % flag
80 83 raise error.Abort(msg)
81 84 flagprocessors[flag] = processor
82 85
83 86
84 87 def processflagswrite(revlog, text, flags, sidedata):
85 88 """Inspect revision data flags and applies write transformations defined
86 89 by registered flag processors.
87 90
88 91 ``text`` - the revision data to process
89 92 ``flags`` - the revision flags
90 93
91 94 This method processes the flags in the order (or reverse order if
92 95 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
93 96 flag processors registered for present flags. The order of flags defined
94 97 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
95 98
96 99 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
97 100 processed text and ``validatehash`` is a bool indicating whether the
98 101 returned text should be checked for hash integrity.
99 102 """
100 103 return _processflagsfunc(revlog, text, flags, b'write', sidedata=sidedata)[
101 104 :2
102 105 ]
103 106
104 107
105 108 def processflagsread(revlog, text, flags):
106 109 """Inspect revision data flags and applies read transformations defined
107 110 by registered flag processors.
108 111
109 112 ``text`` - the revision data to process
110 113 ``flags`` - the revision flags
111 114 ``raw`` - an optional argument describing if the raw transform should be
112 115 applied.
113 116
114 117 This method processes the flags in the order (or reverse order if
115 118 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
116 119 flag processors registered for present flags. The order of flags defined
117 120 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
118 121
119 122 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
120 123 processed text and ``validatehash`` is a bool indicating whether the
121 124 returned text should be checked for hash integrity.
122 125 """
123 126 return _processflagsfunc(revlog, text, flags, b'read')
124 127
125 128
126 129 def processflagsraw(revlog, text, flags):
127 130 """Inspect revision data flags to check is the content hash should be
128 131 validated.
129 132
130 133 ``text`` - the revision data to process
131 134 ``flags`` - the revision flags
132 135
133 136 This method processes the flags in the order (or reverse order if
134 137 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
135 138 flag processors registered for present flags. The order of flags defined
136 139 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
137 140
138 141 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
139 142 processed text and ``validatehash`` is a bool indicating whether the
140 143 returned text should be checked for hash integrity.
141 144 """
142 145 return _processflagsfunc(revlog, text, flags, b'raw')[1]
143 146
144 147
145 148 def _processflagsfunc(revlog, text, flags, operation, sidedata=None):
146 149 """internal function to process flag on a revlog
147 150
148 151 This function is private to this module, code should never needs to call it
149 152 directly."""
150 153 # fast path: no flag processors will run
151 154 if flags == 0:
152 155 return text, True, {}
153 156 if operation not in (b'read', b'write', b'raw'):
154 157 raise error.ProgrammingError(_(b"invalid '%s' operation") % operation)
155 158 # Check all flags are known.
156 159 if flags & ~REVIDX_KNOWN_FLAGS:
157 160 raise revlog._flagserrorclass(
158 161 _(b"incompatible revision flag '%#x'")
159 162 % (flags & ~REVIDX_KNOWN_FLAGS)
160 163 )
161 164 validatehash = True
162 165 # Depending on the operation (read or write), the order might be
163 166 # reversed due to non-commutative transforms.
164 167 orderedflags = REVIDX_FLAGS_ORDER
165 168 if operation == b'write':
166 169 orderedflags = reversed(orderedflags)
167 170
168 171 outsidedata = {}
169 172 for flag in orderedflags:
170 173 # If a flagprocessor has been registered for a known flag, apply the
171 174 # related operation transform and update result tuple.
172 175 if flag & flags:
173 176 vhash = True
174 177
175 178 if flag not in revlog._flagprocessors:
176 179 message = _(b"missing processor for flag '%#x'") % flag
177 180 raise revlog._flagserrorclass(message)
178 181
179 182 processor = revlog._flagprocessors[flag]
180 183 if processor is not None:
181 184 readtransform, writetransform, rawtransform = processor
182 185
183 186 if operation == b'raw':
184 187 vhash = rawtransform(revlog, text)
185 188 elif operation == b'read':
186 189 text, vhash, s = readtransform(revlog, text)
187 190 outsidedata.update(s)
188 191 else: # write operation
189 192 text, vhash = writetransform(revlog, text, sidedata)
190 193 validatehash = validatehash and vhash
191 194
192 195 return text, validatehash, outsidedata
@@ -1,691 +1,698 b''
1 1 #testcases lfsremote-on lfsremote-off
2 2 #require serve no-reposimplestore no-chg
3 3
4 4 This test splits `hg serve` with and without using the extension into separate
5 5 tests cases. The tests are broken down as follows, where "LFS"/"No-LFS"
6 6 indicates whether or not there are commits that use an LFS file, and "D"/"E"
7 7 indicates whether or not the extension is loaded. The "X" cases are not tested
8 8 individually, because the lfs requirement causes the process to bail early if
9 9 the extension is disabled.
10 10
11 11 . Server
12 12 .
13 13 . No-LFS LFS
14 14 . +----------------------------+
15 15 . | || D | E | D | E |
16 16 . |---++=======================|
17 17 . C | D || N/A | #1 | X | #4 |
18 18 . l No +---++-----------------------|
19 19 . i LFS | E || #2 | #2 | X | #5 |
20 20 . e +---++-----------------------|
21 21 . n | D || X | X | X | X |
22 22 . t LFS |---++-----------------------|
23 23 . | E || #3 | #3 | X | #6 |
24 24 . |---++-----------------------+
25 25
26 26 make command server magic visible
27 27
28 28 #if windows
29 29 $ PYTHONPATH="$TESTDIR/../contrib;$PYTHONPATH"
30 30 #else
31 31 $ PYTHONPATH="$TESTDIR/../contrib:$PYTHONPATH"
32 32 #endif
33 33 $ export PYTHONPATH
34 34
35 35 $ hg init server
36 36 $ SERVER_REQUIRES="$TESTTMP/server/.hg/requires"
37 37
38 38 $ cat > $TESTTMP/debugprocessors.py <<EOF
39 39 > from mercurial import (
40 40 > cmdutil,
41 41 > commands,
42 42 > pycompat,
43 43 > registrar,
44 44 > )
45 45 > cmdtable = {}
46 46 > command = registrar.command(cmdtable)
47 47 > @command(b'debugprocessors', [], b'FILE')
48 48 > def debugprocessors(ui, repo, file_=None, **opts):
49 49 > opts = pycompat.byteskwargs(opts)
50 50 > opts[b'changelog'] = False
51 51 > opts[b'manifest'] = False
52 52 > opts[b'dir'] = False
53 53 > rl = cmdutil.openrevlog(repo, b'debugprocessors', file_, opts)
54 54 > for flag, proc in rl._flagprocessors.items():
55 55 > ui.status(b"registered processor '%#x'\n" % (flag))
56 56 > EOF
57 57
58 58 Skip the experimental.changegroup3=True config. Failure to agree on this comes
59 59 first, and causes an "abort: no common changegroup version" if the extension is
60 60 only loaded on one side. If that *is* enabled, the subsequent failure is "abort:
61 61 missing processor for flag '0x2000'!" if the extension is only loaded on one side
62 62 (possibly also masked by the Internal Server Error message).
63 63 $ cat >> $HGRCPATH <<EOF
64 64 > [extensions]
65 65 > debugprocessors = $TESTTMP/debugprocessors.py
66 66 > [experimental]
67 67 > lfs.disableusercache = True
68 68 > lfs.worker-enable = False
69 69 > [lfs]
70 70 > threshold=10
71 71 > [web]
72 72 > allow_push=*
73 73 > push_ssl=False
74 74 > EOF
75 75
76 76 $ cp $HGRCPATH $HGRCPATH.orig
77 77
78 78 #if lfsremote-on
79 79 $ hg --config extensions.lfs= -R server \
80 80 > serve -p $HGPORT -d --pid-file=hg.pid --errorlog=$TESTTMP/errors.log
81 81 #else
82 82 $ hg --config extensions.lfs=! -R server \
83 83 > serve -p $HGPORT -d --pid-file=hg.pid --errorlog=$TESTTMP/errors.log
84 84 #endif
85 85
86 86 $ cat hg.pid >> $DAEMON_PIDS
87 87 $ hg clone -q http://localhost:$HGPORT client
88 88 $ grep 'lfs' client/.hg/requires $SERVER_REQUIRES
89 89 [1]
90 90
91 91 This trivial repo will force commandserver to load the extension, but not call
92 92 reposetup() on another repo actually being operated on. This gives coverage
93 93 that wrapper functions are not assuming reposetup() was called.
94 94
95 95 $ hg init $TESTTMP/cmdservelfs
96 96 $ cat >> $TESTTMP/cmdservelfs/.hg/hgrc << EOF
97 97 > [extensions]
98 98 > lfs =
99 99 > EOF
100 100
101 101 --------------------------------------------------------------------------------
102 102 Case #1: client with non-lfs content and the extension disabled; server with
103 103 non-lfs content, and the extension enabled.
104 104
105 105 $ cd client
106 106 $ echo 'non-lfs' > nonlfs.txt
107 107 >>> from __future__ import absolute_import
108 108 >>> from hgclient import check, readchannel, runcommand
109 109 >>> @check
110 110 ... def diff(server):
111 111 ... readchannel(server)
112 112 ... # run an arbitrary command in the repo with the extension loaded
113 113 ... runcommand(server, [b'id', b'-R', b'../cmdservelfs'])
114 114 ... # now run a command in a repo without the extension to ensure that
115 115 ... # files are added safely..
116 116 ... runcommand(server, [b'ci', b'-Aqm', b'non-lfs'])
117 117 ... # .. and that scmutil.prefetchfiles() safely no-ops..
118 118 ... runcommand(server, [b'diff', b'-r', b'.~1'])
119 119 ... # .. and that debugupgraderepo safely no-ops.
120 120 ... runcommand(server, [b'debugupgraderepo', b'-q', b'--run'])
121 121 *** runcommand id -R ../cmdservelfs
122 122 000000000000 tip
123 123 *** runcommand ci -Aqm non-lfs
124 124 *** runcommand diff -r .~1
125 125 diff -r 000000000000 nonlfs.txt
126 126 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
127 127 +++ b/nonlfs.txt Thu Jan 01 00:00:00 1970 +0000
128 128 @@ -0,0 +1,1 @@
129 129 +non-lfs
130 130 *** runcommand debugupgraderepo -q --run
131 131 upgrade will perform the following actions:
132 132
133 133 requirements
134 134 preserved: dotencode, fncache, generaldelta, revlogv1, sparserevlog, store
135 135
136 136
137 137 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
138 138 [1]
139 139
140 140 #if lfsremote-on
141 141
142 142 $ hg push -q
143 143 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
144 144 [1]
145 145
146 146 $ hg clone -q http://localhost:$HGPORT $TESTTMP/client1_clone
147 147 $ grep 'lfs' $TESTTMP/client1_clone/.hg/requires $SERVER_REQUIRES
148 148 [1]
149 149
150 150 $ hg init $TESTTMP/client1_pull
151 151 $ hg -R $TESTTMP/client1_pull pull -q http://localhost:$HGPORT
152 152 $ grep 'lfs' $TESTTMP/client1_pull/.hg/requires $SERVER_REQUIRES
153 153 [1]
154 154
155 155 $ hg identify http://localhost:$HGPORT
156 156 d437e1d24fbd
157 157
158 158 #endif
159 159
160 160 --------------------------------------------------------------------------------
161 161 Case #2: client with non-lfs content and the extension enabled; server with
162 162 non-lfs content, and the extension state controlled by #testcases.
163 163
164 164 $ cat >> $HGRCPATH <<EOF
165 165 > [extensions]
166 166 > lfs =
167 167 > EOF
168 168 $ echo 'non-lfs' > nonlfs2.txt
169 169 $ hg ci -Aqm 'non-lfs file with lfs client'
170 170
171 171 Since no lfs content has been added yet, the push is allowed, even when the
172 172 extension is not enabled remotely.
173 173
174 174 $ hg push -q
175 175 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
176 176 [1]
177 177
178 178 $ hg clone -q http://localhost:$HGPORT $TESTTMP/client2_clone
179 179 $ grep 'lfs' $TESTTMP/client2_clone/.hg/requires $SERVER_REQUIRES
180 180 [1]
181 181
182 182 $ hg init $TESTTMP/client2_pull
183 183 $ hg -R $TESTTMP/client2_pull pull -q http://localhost:$HGPORT
184 184 $ grep 'lfs' $TESTTMP/client2_pull/.hg/requires $SERVER_REQUIRES
185 185 [1]
186 186
187 187 $ hg identify http://localhost:$HGPORT
188 188 1477875038c6
189 189
190 190 --------------------------------------------------------------------------------
191 191 Case #3: client with lfs content and the extension enabled; server with
192 192 non-lfs content, and the extension state controlled by #testcases. The server
193 193 should have an 'lfs' requirement after it picks up its first commit with a blob.
194 194
195 195 $ echo 'this is a big lfs file' > lfs.bin
196 196 $ hg ci -Aqm 'lfs'
197 197 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
198 198 .hg/requires:lfs
199 199
200 200 #if lfsremote-off
201 201 $ hg push -q
202 202 abort: required features are not supported in the destination: lfs
203 203 (enable the lfs extension on the server)
204 204 [255]
205 205 #else
206 206 $ hg push -q
207 207 #endif
208 208 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
209 209 .hg/requires:lfs
210 210 $TESTTMP/server/.hg/requires:lfs (lfsremote-on !)
211 211
212 212 $ hg clone -q http://localhost:$HGPORT $TESTTMP/client3_clone
213 213 $ grep 'lfs' $TESTTMP/client3_clone/.hg/requires $SERVER_REQUIRES || true
214 214 $TESTTMP/client3_clone/.hg/requires:lfs (lfsremote-on !)
215 215 $TESTTMP/server/.hg/requires:lfs (lfsremote-on !)
216 216
217 217 $ hg init $TESTTMP/client3_pull
218 218 $ hg -R $TESTTMP/client3_pull pull -q http://localhost:$HGPORT
219 219 $ grep 'lfs' $TESTTMP/client3_pull/.hg/requires $SERVER_REQUIRES || true
220 220 $TESTTMP/client3_pull/.hg/requires:lfs (lfsremote-on !)
221 221 $TESTTMP/server/.hg/requires:lfs (lfsremote-on !)
222 222
223 223 Test that the commit/changegroup requirement check hook can be run multiple
224 224 times.
225 225
226 226 $ hg clone -qr 0 http://localhost:$HGPORT $TESTTMP/cmdserve_client3
227 227
228 228 $ cd ../cmdserve_client3
229 229
230 230 >>> from __future__ import absolute_import
231 231 >>> from hgclient import check, readchannel, runcommand
232 232 >>> @check
233 233 ... def addrequirement(server):
234 234 ... readchannel(server)
235 235 ... # change the repo in a way that adds the lfs requirement
236 236 ... runcommand(server, [b'pull', b'-qu'])
237 237 ... # Now cause the requirement adding hook to fire again, without going
238 238 ... # through reposetup() again.
239 239 ... with open('file.txt', 'wb') as fp:
240 240 ... fp.write(b'data')
241 241 ... runcommand(server, [b'ci', b'-Aqm', b'non-lfs'])
242 242 *** runcommand pull -qu
243 243 *** runcommand ci -Aqm non-lfs
244 244
245 245 $ cd ../client
246 246
247 247 The difference here is the push failed above when the extension isn't
248 248 enabled on the server.
249 249 $ hg identify http://localhost:$HGPORT
250 250 8374dc4052cb (lfsremote-on !)
251 251 1477875038c6 (lfsremote-off !)
252 252
253 253 Don't bother testing the lfsremote-off cases- the server won't be able
254 254 to launch if there's lfs content and the extension is disabled.
255 255
256 256 #if lfsremote-on
257 257
258 258 --------------------------------------------------------------------------------
259 259 Case #4: client with non-lfs content and the extension disabled; server with
260 260 lfs content, and the extension enabled.
261 261
262 262 $ cat >> $HGRCPATH <<EOF
263 263 > [extensions]
264 264 > lfs = !
265 265 > EOF
266 266
267 267 $ hg init $TESTTMP/client4
268 268 $ cd $TESTTMP/client4
269 269 $ cat >> .hg/hgrc <<EOF
270 270 > [paths]
271 271 > default = http://localhost:$HGPORT
272 272 > EOF
273 273 $ echo 'non-lfs' > nonlfs2.txt
274 274 $ hg ci -Aqm 'non-lfs'
275 275 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
276 276 $TESTTMP/server/.hg/requires:lfs
277 277
278 278 $ hg push -q --force
279 279 warning: repository is unrelated
280 280 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
281 281 $TESTTMP/server/.hg/requires:lfs
282 282
283 283 $ hg clone http://localhost:$HGPORT $TESTTMP/client4_clone
284 284 (remote is using large file support (lfs), but it is explicitly disabled in the local configuration)
285 285 abort: repository requires features unknown to this Mercurial: lfs!
286 286 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
287 287 [255]
288 288 $ grep 'lfs' $TESTTMP/client4_clone/.hg/requires $SERVER_REQUIRES
289 289 grep: $TESTTMP/client4_clone/.hg/requires: $ENOENT$
290 290 $TESTTMP/server/.hg/requires:lfs
291 291 [2]
292 292
293 293 TODO: fail more gracefully.
294 294
295 295 $ hg init $TESTTMP/client4_pull
296 296 $ hg -R $TESTTMP/client4_pull pull http://localhost:$HGPORT
297 297 pulling from http://localhost:$HGPORT/
298 298 requesting all changes
299 299 remote: abort: no common changegroup version
300 300 abort: pull failed on remote
301 301 [255]
302 302 $ grep 'lfs' $TESTTMP/client4_pull/.hg/requires $SERVER_REQUIRES
303 303 $TESTTMP/server/.hg/requires:lfs
304 304
305 305 $ hg identify http://localhost:$HGPORT
306 306 03b080fa9d93
307 307
308 308 --------------------------------------------------------------------------------
309 309 Case #5: client with non-lfs content and the extension enabled; server with
310 310 lfs content, and the extension enabled.
311 311
312 312 $ cat >> $HGRCPATH <<EOF
313 313 > [extensions]
314 314 > lfs =
315 315 > EOF
316 316 $ echo 'non-lfs' > nonlfs3.txt
317 317 $ hg ci -Aqm 'non-lfs file with lfs client'
318 318
319 319 $ hg push -q
320 320 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
321 321 $TESTTMP/server/.hg/requires:lfs
322 322
323 323 $ hg clone -q http://localhost:$HGPORT $TESTTMP/client5_clone
324 324 $ grep 'lfs' $TESTTMP/client5_clone/.hg/requires $SERVER_REQUIRES
325 325 $TESTTMP/client5_clone/.hg/requires:lfs
326 326 $TESTTMP/server/.hg/requires:lfs
327 327
328 328 $ hg init $TESTTMP/client5_pull
329 329 $ hg -R $TESTTMP/client5_pull pull -q http://localhost:$HGPORT
330 330 $ grep 'lfs' $TESTTMP/client5_pull/.hg/requires $SERVER_REQUIRES
331 331 $TESTTMP/client5_pull/.hg/requires:lfs
332 332 $TESTTMP/server/.hg/requires:lfs
333 333
334 334 $ hg identify http://localhost:$HGPORT
335 335 c729025cc5e3
336 336
337 337 $ mv $HGRCPATH $HGRCPATH.tmp
338 338 $ cp $HGRCPATH.orig $HGRCPATH
339 339
340 340 >>> from __future__ import absolute_import
341 341 >>> from hgclient import bprint, check, readchannel, runcommand, stdout
342 342 >>> @check
343 343 ... def checkflags(server):
344 344 ... readchannel(server)
345 345 ... bprint(b'')
346 346 ... bprint(b'# LFS required- both lfs and non-lfs revlogs have 0x2000 flag')
347 347 ... stdout.flush()
348 348 ... runcommand(server, [b'debugprocessors', b'lfs.bin', b'-R',
349 349 ... b'../server'])
350 350 ... runcommand(server, [b'debugprocessors', b'nonlfs2.txt', b'-R',
351 351 ... b'../server'])
352 352 ... runcommand(server, [b'config', b'extensions', b'--cwd',
353 353 ... b'../server'])
354 354 ...
355 355 ... bprint(b"\n# LFS not enabled- revlogs don't have 0x2000 flag")
356 356 ... stdout.flush()
357 357 ... runcommand(server, [b'debugprocessors', b'nonlfs3.txt'])
358 358 ... runcommand(server, [b'config', b'extensions'])
359 359
360 360 # LFS required- both lfs and non-lfs revlogs have 0x2000 flag
361 361 *** runcommand debugprocessors lfs.bin -R ../server
362 362 registered processor '0x8000'
363 registered processor '0x800'
363 364 registered processor '0x2000'
364 365 *** runcommand debugprocessors nonlfs2.txt -R ../server
365 366 registered processor '0x8000'
367 registered processor '0x800'
366 368 registered processor '0x2000'
367 369 *** runcommand config extensions --cwd ../server
368 370 extensions.debugprocessors=$TESTTMP/debugprocessors.py
369 371 extensions.lfs=
370 372
371 373 # LFS not enabled- revlogs don't have 0x2000 flag
372 374 *** runcommand debugprocessors nonlfs3.txt
373 375 registered processor '0x8000'
376 registered processor '0x800'
374 377 *** runcommand config extensions
375 378 extensions.debugprocessors=$TESTTMP/debugprocessors.py
376 379
377 380 $ rm $HGRCPATH
378 381 $ mv $HGRCPATH.tmp $HGRCPATH
379 382
380 383 $ hg clone $TESTTMP/client $TESTTMP/nonlfs -qr 0 --config extensions.lfs=
381 384 $ cat >> $TESTTMP/nonlfs/.hg/hgrc <<EOF
382 385 > [extensions]
383 386 > lfs = !
384 387 > EOF
385 388
386 389 >>> from __future__ import absolute_import, print_function
387 390 >>> from hgclient import bprint, check, readchannel, runcommand, stdout
388 391 >>> @check
389 392 ... def checkflags2(server):
390 393 ... readchannel(server)
391 394 ... bprint(b'')
392 395 ... bprint(b'# LFS enabled- both lfs and non-lfs revlogs have 0x2000 flag')
393 396 ... stdout.flush()
394 397 ... runcommand(server, [b'debugprocessors', b'lfs.bin', b'-R',
395 398 ... b'../server'])
396 399 ... runcommand(server, [b'debugprocessors', b'nonlfs2.txt', b'-R',
397 400 ... b'../server'])
398 401 ... runcommand(server, [b'config', b'extensions', b'--cwd',
399 402 ... b'../server'])
400 403 ...
401 404 ... bprint(b'\n# LFS enabled without requirement- revlogs have 0x2000 flag')
402 405 ... stdout.flush()
403 406 ... runcommand(server, [b'debugprocessors', b'nonlfs3.txt'])
404 407 ... runcommand(server, [b'config', b'extensions'])
405 408 ...
406 409 ... bprint(b"\n# LFS disabled locally- revlogs don't have 0x2000 flag")
407 410 ... stdout.flush()
408 411 ... runcommand(server, [b'debugprocessors', b'nonlfs.txt', b'-R',
409 412 ... b'../nonlfs'])
410 413 ... runcommand(server, [b'config', b'extensions', b'--cwd',
411 414 ... b'../nonlfs'])
412 415
413 416 # LFS enabled- both lfs and non-lfs revlogs have 0x2000 flag
414 417 *** runcommand debugprocessors lfs.bin -R ../server
415 418 registered processor '0x8000'
419 registered processor '0x800'
416 420 registered processor '0x2000'
417 421 *** runcommand debugprocessors nonlfs2.txt -R ../server
418 422 registered processor '0x8000'
423 registered processor '0x800'
419 424 registered processor '0x2000'
420 425 *** runcommand config extensions --cwd ../server
421 426 extensions.debugprocessors=$TESTTMP/debugprocessors.py
422 427 extensions.lfs=
423 428
424 429 # LFS enabled without requirement- revlogs have 0x2000 flag
425 430 *** runcommand debugprocessors nonlfs3.txt
426 431 registered processor '0x8000'
432 registered processor '0x800'
427 433 registered processor '0x2000'
428 434 *** runcommand config extensions
429 435 extensions.debugprocessors=$TESTTMP/debugprocessors.py
430 436 extensions.lfs=
431 437
432 438 # LFS disabled locally- revlogs don't have 0x2000 flag
433 439 *** runcommand debugprocessors nonlfs.txt -R ../nonlfs
434 440 registered processor '0x8000'
441 registered processor '0x800'
435 442 *** runcommand config extensions --cwd ../nonlfs
436 443 extensions.debugprocessors=$TESTTMP/debugprocessors.py
437 444 extensions.lfs=!
438 445
439 446 --------------------------------------------------------------------------------
440 447 Case #6: client with lfs content and the extension enabled; server with
441 448 lfs content, and the extension enabled.
442 449
443 450 $ echo 'this is another lfs file' > lfs2.txt
444 451 $ hg ci -Aqm 'lfs file with lfs client'
445 452
446 453 $ hg --config paths.default= push -v http://localhost:$HGPORT
447 454 pushing to http://localhost:$HGPORT/
448 455 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
449 456 searching for changes
450 457 remote has heads on branch 'default' that are not known locally: 8374dc4052cb
451 458 lfs: uploading a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de (25 bytes)
452 459 lfs: processed: a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de
453 460 lfs: uploaded 1 files (25 bytes)
454 461 1 changesets found
455 462 uncompressed size of bundle content:
456 463 206 (changelog)
457 464 172 (manifests)
458 465 275 lfs2.txt
459 466 remote: adding changesets
460 467 remote: adding manifests
461 468 remote: adding file changes
462 469 remote: added 1 changesets with 1 changes to 1 files
463 470 $ grep 'lfs' .hg/requires $SERVER_REQUIRES
464 471 .hg/requires:lfs
465 472 $TESTTMP/server/.hg/requires:lfs
466 473
467 474 $ hg clone -q http://localhost:$HGPORT $TESTTMP/client6_clone
468 475 $ grep 'lfs' $TESTTMP/client6_clone/.hg/requires $SERVER_REQUIRES
469 476 $TESTTMP/client6_clone/.hg/requires:lfs
470 477 $TESTTMP/server/.hg/requires:lfs
471 478
472 479 $ hg init $TESTTMP/client6_pull
473 480 $ hg -R $TESTTMP/client6_pull pull -u -v http://localhost:$HGPORT
474 481 pulling from http://localhost:$HGPORT/
475 482 requesting all changes
476 483 adding changesets
477 484 adding manifests
478 485 adding file changes
479 486 calling hook pretxnchangegroup.lfs: hgext.lfs.checkrequireslfs
480 487 added 6 changesets with 5 changes to 5 files (+1 heads)
481 488 new changesets d437e1d24fbd:d3b84d50eacb
482 489 resolving manifests
483 490 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
484 491 lfs: downloading a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de (25 bytes)
485 492 lfs: processed: a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de
486 493 lfs: downloaded 1 files (25 bytes)
487 494 getting lfs2.txt
488 495 lfs: found a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de in the local lfs store
489 496 getting nonlfs2.txt
490 497 getting nonlfs3.txt
491 498 3 files updated, 0 files merged, 0 files removed, 0 files unresolved
492 499 updated to "d3b84d50eacb: lfs file with lfs client"
493 500 1 other heads for branch "default"
494 501 (sent 3 HTTP requests and * bytes; received * bytes in responses) (glob)
495 502 $ grep 'lfs' $TESTTMP/client6_pull/.hg/requires $SERVER_REQUIRES
496 503 $TESTTMP/client6_pull/.hg/requires:lfs
497 504 $TESTTMP/server/.hg/requires:lfs
498 505
499 506 $ hg identify http://localhost:$HGPORT
500 507 d3b84d50eacb
501 508
502 509 --------------------------------------------------------------------------------
503 510 Misc: process dies early if a requirement exists and the extension is disabled
504 511
505 512 $ hg --config extensions.lfs=! summary
506 513 abort: repository requires features unknown to this Mercurial: lfs!
507 514 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
508 515 [255]
509 516
510 517 $ echo 'this is an lfs file' > $TESTTMP/client6_clone/lfspair1.bin
511 518 $ echo 'this is an lfs file too' > $TESTTMP/client6_clone/lfspair2.bin
512 519 $ hg -R $TESTTMP/client6_clone ci -Aqm 'add lfs pair'
513 520 $ hg -R $TESTTMP/client6_clone push -q
514 521
515 522 $ hg clone -qU http://localhost:$HGPORT $TESTTMP/bulkfetch
516 523
517 524 Cat doesn't prefetch unless data is needed (e.g. '-T {rawdata}' doesn't need it)
518 525
519 526 $ hg --cwd $TESTTMP/bulkfetch cat -vr tip lfspair1.bin -T '{rawdata}\n{path}\n'
520 527 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
521 528 version https://git-lfs.github.com/spec/v1
522 529 oid sha256:cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782
523 530 size 20
524 531 x-is-binary 0
525 532
526 533 lfspair1.bin
527 534
528 535 $ hg --cwd $TESTTMP/bulkfetch cat -vr tip lfspair1.bin -T json
529 536 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
530 537 [lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
531 538 lfs: downloading cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 (20 bytes)
532 539 lfs: processed: cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782
533 540 lfs: downloaded 1 files (20 bytes)
534 541 lfs: found cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 in the local lfs store
535 542
536 543 {
537 544 "data": "this is an lfs file\n",
538 545 "path": "lfspair1.bin",
539 546 "rawdata": "version https://git-lfs.github.com/spec/v1\noid sha256:cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782\nsize 20\nx-is-binary 0\n"
540 547 }
541 548 ]
542 549
543 550 $ rm -r $TESTTMP/bulkfetch/.hg/store/lfs
544 551
545 552 $ hg --cwd $TESTTMP/bulkfetch cat -vr tip lfspair1.bin -T '{data}\n'
546 553 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
547 554 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
548 555 lfs: downloading cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 (20 bytes)
549 556 lfs: processed: cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782
550 557 lfs: downloaded 1 files (20 bytes)
551 558 lfs: found cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 in the local lfs store
552 559 this is an lfs file
553 560
554 561 $ hg --cwd $TESTTMP/bulkfetch cat -vr tip lfspair2.bin
555 562 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
556 563 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
557 564 lfs: downloading d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e (24 bytes)
558 565 lfs: processed: d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e
559 566 lfs: downloaded 1 files (24 bytes)
560 567 lfs: found d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e in the local lfs store
561 568 this is an lfs file too
562 569
563 570 Export will prefetch all needed files across all needed revisions
564 571
565 572 $ rm -r $TESTTMP/bulkfetch/.hg/store/lfs
566 573 $ hg -R $TESTTMP/bulkfetch -v export -r 0:tip -o all.export
567 574 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
568 575 exporting patches:
569 576 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
570 577 lfs: need to transfer 4 objects (92 bytes)
571 578 lfs: downloading a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de (25 bytes)
572 579 lfs: processed: a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de
573 580 lfs: downloading bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc (23 bytes)
574 581 lfs: processed: bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc
575 582 lfs: downloading cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 (20 bytes)
576 583 lfs: processed: cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782
577 584 lfs: downloading d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e (24 bytes)
578 585 lfs: processed: d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e
579 586 lfs: downloaded 4 files (92 bytes)
580 587 all.export
581 588 lfs: found bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc in the local lfs store
582 589 lfs: found a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de in the local lfs store
583 590 lfs: found cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 in the local lfs store
584 591 lfs: found d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e in the local lfs store
585 592
586 593 Export with selected files is used with `extdiff --patch`
587 594
588 595 $ rm -r $TESTTMP/bulkfetch/.hg/store/lfs
589 596 $ hg --config extensions.extdiff= \
590 597 > -R $TESTTMP/bulkfetch -v extdiff -r 2:tip --patch $TESTTMP/bulkfetch/lfs.bin
591 598 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
592 599 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
593 600 lfs: downloading bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc (23 bytes)
594 601 lfs: processed: bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc
595 602 lfs: downloaded 1 files (23 bytes)
596 603 */hg-8374dc4052cb.patch (glob)
597 604 lfs: found bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc in the local lfs store
598 605 */hg-9640b57e77b1.patch (glob)
599 606 --- */hg-8374dc4052cb.patch * (glob)
600 607 +++ */hg-9640b57e77b1.patch * (glob)
601 608 @@ -2,12 +2,7 @@
602 609 # User test
603 610 # Date 0 0
604 611 # Thu Jan 01 00:00:00 1970 +0000
605 612 -# Node ID 8374dc4052cbd388e79d9dc4ddb29784097aa354
606 613 -# Parent 1477875038c60152e391238920a16381c627b487
607 614 -lfs
608 615 +# Node ID 9640b57e77b14c3a0144fb4478b6cc13e13ea0d1
609 616 +# Parent d3b84d50eacbd56638e11abce6b8616aaba54420
610 617 +add lfs pair
611 618
612 619 -diff -r 1477875038c6 -r 8374dc4052cb lfs.bin
613 620 ---- /dev/null Thu Jan 01 00:00:00 1970 +0000
614 621 -+++ b/lfs.bin Thu Jan 01 00:00:00 1970 +0000
615 622 -@@ -0,0 +1,1 @@
616 623 -+this is a big lfs file
617 624 cleaning up temp directory
618 625 [1]
619 626
620 627 Diff will prefetch files
621 628
622 629 $ rm -r $TESTTMP/bulkfetch/.hg/store/lfs
623 630 $ hg -R $TESTTMP/bulkfetch -v diff -r 2:tip
624 631 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
625 632 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
626 633 lfs: need to transfer 4 objects (92 bytes)
627 634 lfs: downloading a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de (25 bytes)
628 635 lfs: processed: a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de
629 636 lfs: downloading bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc (23 bytes)
630 637 lfs: processed: bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc
631 638 lfs: downloading cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 (20 bytes)
632 639 lfs: processed: cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782
633 640 lfs: downloading d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e (24 bytes)
634 641 lfs: processed: d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e
635 642 lfs: downloaded 4 files (92 bytes)
636 643 lfs: found bed80f00180ac404b843628ab56a1c1984d6145c391cd1628a7dd7d2598d71fc in the local lfs store
637 644 lfs: found a82f1c5cea0d40e3bb3a849686bb4e6ae47ca27e614de55c1ed0325698ef68de in the local lfs store
638 645 lfs: found cf1b2787b74e66547d931b6ebe28ff63303e803cb2baa14a8f57c4383d875782 in the local lfs store
639 646 lfs: found d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e in the local lfs store
640 647 diff -r 8374dc4052cb -r 9640b57e77b1 lfs.bin
641 648 --- a/lfs.bin Thu Jan 01 00:00:00 1970 +0000
642 649 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
643 650 @@ -1,1 +0,0 @@
644 651 -this is a big lfs file
645 652 diff -r 8374dc4052cb -r 9640b57e77b1 lfs2.txt
646 653 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
647 654 +++ b/lfs2.txt Thu Jan 01 00:00:00 1970 +0000
648 655 @@ -0,0 +1,1 @@
649 656 +this is another lfs file
650 657 diff -r 8374dc4052cb -r 9640b57e77b1 lfspair1.bin
651 658 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
652 659 +++ b/lfspair1.bin Thu Jan 01 00:00:00 1970 +0000
653 660 @@ -0,0 +1,1 @@
654 661 +this is an lfs file
655 662 diff -r 8374dc4052cb -r 9640b57e77b1 lfspair2.bin
656 663 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
657 664 +++ b/lfspair2.bin Thu Jan 01 00:00:00 1970 +0000
658 665 @@ -0,0 +1,1 @@
659 666 +this is an lfs file too
660 667 diff -r 8374dc4052cb -r 9640b57e77b1 nonlfs.txt
661 668 --- a/nonlfs.txt Thu Jan 01 00:00:00 1970 +0000
662 669 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
663 670 @@ -1,1 +0,0 @@
664 671 -non-lfs
665 672 diff -r 8374dc4052cb -r 9640b57e77b1 nonlfs3.txt
666 673 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
667 674 +++ b/nonlfs3.txt Thu Jan 01 00:00:00 1970 +0000
668 675 @@ -0,0 +1,1 @@
669 676 +non-lfs
670 677
671 678 Only the files required by diff are prefetched
672 679
673 680 $ rm -r $TESTTMP/bulkfetch/.hg/store/lfs
674 681 $ hg -R $TESTTMP/bulkfetch -v diff -r 2:tip $TESTTMP/bulkfetch/lfspair2.bin
675 682 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
676 683 lfs: assuming remote store: http://localhost:$HGPORT/.git/info/lfs
677 684 lfs: downloading d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e (24 bytes)
678 685 lfs: processed: d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e
679 686 lfs: downloaded 1 files (24 bytes)
680 687 lfs: found d96eda2c74b56e95cfb5ffb66b6503e198cc6fc4a09dc877de925feebc65786e in the local lfs store
681 688 diff -r 8374dc4052cb -r 9640b57e77b1 lfspair2.bin
682 689 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
683 690 +++ b/lfspair2.bin Thu Jan 01 00:00:00 1970 +0000
684 691 @@ -0,0 +1,1 @@
685 692 +this is an lfs file too
686 693
687 694 #endif
688 695
689 696 $ "$PYTHON" $TESTDIR/killdaemons.py $DAEMON_PIDS
690 697
691 698 $ cat $TESTTMP/errors.log
General Comments 0
You need to be logged in to leave comments. Login now