##// END OF EJS Templates
revlog: split the `version` attribute into its two components...
marmoute -
r47910:f58a13c5 default
parent child Browse files
Show More
@@ -1,625 +1,625
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 )
15 15 from .thirdparty import attr
16 16
17 17 from . import (
18 18 encoding,
19 19 error,
20 20 metadata,
21 21 pycompat,
22 22 revlog,
23 23 )
24 24 from .utils import (
25 25 dateutil,
26 26 stringutil,
27 27 )
28 28 from .revlogutils import (
29 29 constants as revlog_constants,
30 30 flagutil,
31 31 )
32 32
33 33 _defaultextra = {b'branch': b'default'}
34 34
35 35
36 36 def _string_escape(text):
37 37 """
38 38 >>> from .pycompat import bytechr as chr
39 39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 41 >>> s
42 42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 43 >>> res = _string_escape(s)
44 44 >>> s == _string_unescape(res)
45 45 True
46 46 """
47 47 # subset of the string_escape codec
48 48 text = (
49 49 text.replace(b'\\', b'\\\\')
50 50 .replace(b'\n', b'\\n')
51 51 .replace(b'\r', b'\\r')
52 52 )
53 53 return text.replace(b'\0', b'\\0')
54 54
55 55
56 56 def _string_unescape(text):
57 57 if b'\\0' in text:
58 58 # fix up \0 without getting into trouble with \\0
59 59 text = text.replace(b'\\\\', b'\\\\\n')
60 60 text = text.replace(b'\\0', b'\0')
61 61 text = text.replace(b'\n', b'')
62 62 return stringutil.unescapestr(text)
63 63
64 64
65 65 def decodeextra(text):
66 66 """
67 67 >>> from .pycompat import bytechr as chr
68 68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 69 ... ).items())
70 70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 72 ... b'baz': chr(92) + chr(0) + b'2'})
73 73 ... ).items())
74 74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 75 """
76 76 extra = _defaultextra.copy()
77 77 for l in text.split(b'\0'):
78 78 if l:
79 79 k, v = _string_unescape(l).split(b':', 1)
80 80 extra[k] = v
81 81 return extra
82 82
83 83
84 84 def encodeextra(d):
85 85 # keys must be sorted to produce a deterministic changelog entry
86 86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 87 return b"\0".join(items)
88 88
89 89
90 90 def stripdesc(desc):
91 91 """strip trailing whitespace and leading and trailing empty lines"""
92 92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93 93
94 94
95 95 class appender(object):
96 96 """the changelog index must be updated last on disk, so we use this class
97 97 to delay writes to it"""
98 98
99 99 def __init__(self, vfs, name, mode, buf):
100 100 self.data = buf
101 101 fp = vfs(name, mode)
102 102 self.fp = fp
103 103 self.offset = fp.tell()
104 104 self.size = vfs.fstat(fp).st_size
105 105 self._end = self.size
106 106
107 107 def end(self):
108 108 return self._end
109 109
110 110 def tell(self):
111 111 return self.offset
112 112
113 113 def flush(self):
114 114 pass
115 115
116 116 @property
117 117 def closed(self):
118 118 return self.fp.closed
119 119
120 120 def close(self):
121 121 self.fp.close()
122 122
123 123 def seek(self, offset, whence=0):
124 124 '''virtual file offset spans real file and data'''
125 125 if whence == 0:
126 126 self.offset = offset
127 127 elif whence == 1:
128 128 self.offset += offset
129 129 elif whence == 2:
130 130 self.offset = self.end() + offset
131 131 if self.offset < self.size:
132 132 self.fp.seek(self.offset)
133 133
134 134 def read(self, count=-1):
135 135 '''only trick here is reads that span real file and data'''
136 136 ret = b""
137 137 if self.offset < self.size:
138 138 s = self.fp.read(count)
139 139 ret = s
140 140 self.offset += len(s)
141 141 if count > 0:
142 142 count -= len(s)
143 143 if count != 0:
144 144 doff = self.offset - self.size
145 145 self.data.insert(0, b"".join(self.data))
146 146 del self.data[1:]
147 147 s = self.data[0][doff : doff + count]
148 148 self.offset += len(s)
149 149 ret += s
150 150 return ret
151 151
152 152 def write(self, s):
153 153 self.data.append(bytes(s))
154 154 self.offset += len(s)
155 155 self._end += len(s)
156 156
157 157 def __enter__(self):
158 158 self.fp.__enter__()
159 159 return self
160 160
161 161 def __exit__(self, *args):
162 162 return self.fp.__exit__(*args)
163 163
164 164
165 165 class _divertopener(object):
166 166 def __init__(self, opener, target):
167 167 self._opener = opener
168 168 self._target = target
169 169
170 170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 171 if name != self._target:
172 172 return self._opener(name, mode, **kwargs)
173 173 return self._opener(name + b".a", mode, **kwargs)
174 174
175 175 def __getattr__(self, attr):
176 176 return getattr(self._opener, attr)
177 177
178 178
179 179 def _delayopener(opener, target, buf):
180 180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181 181
182 182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 183 if name != target:
184 184 return opener(name, mode, **kwargs)
185 185 assert not kwargs
186 186 return appender(opener, name, mode, buf)
187 187
188 188 return _delay
189 189
190 190
191 191 @attr.s
192 192 class _changelogrevision(object):
193 193 # Extensions might modify _defaultextra, so let the constructor below pass
194 194 # it in
195 195 extra = attr.ib()
196 196 manifest = attr.ib()
197 197 user = attr.ib(default=b'')
198 198 date = attr.ib(default=(0, 0))
199 199 files = attr.ib(default=attr.Factory(list))
200 200 filesadded = attr.ib(default=None)
201 201 filesremoved = attr.ib(default=None)
202 202 p1copies = attr.ib(default=None)
203 203 p2copies = attr.ib(default=None)
204 204 description = attr.ib(default=b'')
205 205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206 206
207 207
208 208 class changelogrevision(object):
209 209 """Holds results of a parsed changelog revision.
210 210
211 211 Changelog revisions consist of multiple pieces of data, including
212 212 the manifest node, user, and date. This object exposes a view into
213 213 the parsed object.
214 214 """
215 215
216 216 __slots__ = (
217 217 '_offsets',
218 218 '_text',
219 219 '_sidedata',
220 220 '_cpsd',
221 221 '_changes',
222 222 )
223 223
224 224 def __new__(cls, cl, text, sidedata, cpsd):
225 225 if not text:
226 226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227 227
228 228 self = super(changelogrevision, cls).__new__(cls)
229 229 # We could return here and implement the following as an __init__.
230 230 # But doing it here is equivalent and saves an extra function call.
231 231
232 232 # format used:
233 233 # nodeid\n : manifest node in ascii
234 234 # user\n : user, no \n or \r allowed
235 235 # time tz extra\n : date (time is int or float, timezone is int)
236 236 # : extra is metadata, encoded and separated by '\0'
237 237 # : older versions ignore it
238 238 # files\n\n : files modified by the cset, no \n or \r allowed
239 239 # (.*) : comment (free text, ideally utf-8)
240 240 #
241 241 # changelog v0 doesn't use extra
242 242
243 243 nl1 = text.index(b'\n')
244 244 nl2 = text.index(b'\n', nl1 + 1)
245 245 nl3 = text.index(b'\n', nl2 + 1)
246 246
247 247 # The list of files may be empty. Which means nl3 is the first of the
248 248 # double newline that precedes the description.
249 249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 250 doublenl = nl3
251 251 else:
252 252 doublenl = text.index(b'\n\n', nl3 + 1)
253 253
254 254 self._offsets = (nl1, nl2, nl3, doublenl)
255 255 self._text = text
256 256 self._sidedata = sidedata
257 257 self._cpsd = cpsd
258 258 self._changes = None
259 259
260 260 return self
261 261
262 262 @property
263 263 def manifest(self):
264 264 return bin(self._text[0 : self._offsets[0]])
265 265
266 266 @property
267 267 def user(self):
268 268 off = self._offsets
269 269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270 270
271 271 @property
272 272 def _rawdate(self):
273 273 off = self._offsets
274 274 dateextra = self._text[off[1] + 1 : off[2]]
275 275 return dateextra.split(b' ', 2)[0:2]
276 276
277 277 @property
278 278 def _rawextra(self):
279 279 off = self._offsets
280 280 dateextra = self._text[off[1] + 1 : off[2]]
281 281 fields = dateextra.split(b' ', 2)
282 282 if len(fields) != 3:
283 283 return None
284 284
285 285 return fields[2]
286 286
287 287 @property
288 288 def date(self):
289 289 raw = self._rawdate
290 290 time = float(raw[0])
291 291 # Various tools did silly things with the timezone.
292 292 try:
293 293 timezone = int(raw[1])
294 294 except ValueError:
295 295 timezone = 0
296 296
297 297 return time, timezone
298 298
299 299 @property
300 300 def extra(self):
301 301 raw = self._rawextra
302 302 if raw is None:
303 303 return _defaultextra
304 304
305 305 return decodeextra(raw)
306 306
307 307 @property
308 308 def changes(self):
309 309 if self._changes is not None:
310 310 return self._changes
311 311 if self._cpsd:
312 312 changes = metadata.decode_files_sidedata(self._sidedata)
313 313 else:
314 314 changes = metadata.ChangingFiles(
315 315 touched=self.files or (),
316 316 added=self.filesadded or (),
317 317 removed=self.filesremoved or (),
318 318 p1_copies=self.p1copies or {},
319 319 p2_copies=self.p2copies or {},
320 320 )
321 321 self._changes = changes
322 322 return changes
323 323
324 324 @property
325 325 def files(self):
326 326 if self._cpsd:
327 327 return sorted(self.changes.touched)
328 328 off = self._offsets
329 329 if off[2] == off[3]:
330 330 return []
331 331
332 332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333 333
334 334 @property
335 335 def filesadded(self):
336 336 if self._cpsd:
337 337 return self.changes.added
338 338 else:
339 339 rawindices = self.extra.get(b'filesadded')
340 340 if rawindices is None:
341 341 return None
342 342 return metadata.decodefileindices(self.files, rawindices)
343 343
344 344 @property
345 345 def filesremoved(self):
346 346 if self._cpsd:
347 347 return self.changes.removed
348 348 else:
349 349 rawindices = self.extra.get(b'filesremoved')
350 350 if rawindices is None:
351 351 return None
352 352 return metadata.decodefileindices(self.files, rawindices)
353 353
354 354 @property
355 355 def p1copies(self):
356 356 if self._cpsd:
357 357 return self.changes.copied_from_p1
358 358 else:
359 359 rawcopies = self.extra.get(b'p1copies')
360 360 if rawcopies is None:
361 361 return None
362 362 return metadata.decodecopies(self.files, rawcopies)
363 363
364 364 @property
365 365 def p2copies(self):
366 366 if self._cpsd:
367 367 return self.changes.copied_from_p2
368 368 else:
369 369 rawcopies = self.extra.get(b'p2copies')
370 370 if rawcopies is None:
371 371 return None
372 372 return metadata.decodecopies(self.files, rawcopies)
373 373
374 374 @property
375 375 def description(self):
376 376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377 377
378 378 @property
379 379 def branchinfo(self):
380 380 extra = self.extra
381 381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382 382
383 383
384 384 class changelog(revlog.revlog):
385 385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 386 """Load a changelog revlog using an opener.
387 387
388 388 If ``trypending`` is true, we attempt to load the index from a
389 389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 390 The ``00changelog.i.a`` file contains index (and possibly inline
391 391 revision) data for a transaction that hasn't been finalized yet.
392 392 It exists in a separate file to facilitate readers (such as
393 393 hooks processes) accessing data before a transaction is finalized.
394 394
395 395 ``concurrencychecker`` will be passed to the revlog init function, see
396 396 the documentation there.
397 397 """
398 398 if trypending and opener.exists(b'00changelog.i.a'):
399 399 indexfile = b'00changelog.i.a'
400 400 else:
401 401 indexfile = b'00changelog.i'
402 402
403 403 datafile = b'00changelog.d'
404 404 revlog.revlog.__init__(
405 405 self,
406 406 opener,
407 407 target=(revlog_constants.KIND_CHANGELOG, None),
408 408 indexfile=indexfile,
409 409 datafile=datafile,
410 410 checkambig=True,
411 411 mmaplargeindex=True,
412 412 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 413 concurrencychecker=concurrencychecker,
414 414 )
415 415
416 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
416 if self._initempty and (self._format_version == revlog.REVLOGV1):
417 417 # changelogs don't benefit from generaldelta.
418 418
419 self.version &= ~revlog.FLAG_GENERALDELTA
419 self._format_flags &= ~revlog.FLAG_GENERALDELTA
420 420 self._generaldelta = False
421 421
422 422 # Delta chains for changelogs tend to be very small because entries
423 423 # tend to be small and don't delta well with each. So disable delta
424 424 # chains.
425 425 self._storedeltachains = False
426 426
427 427 self._realopener = opener
428 428 self._delayed = False
429 429 self._delaybuf = None
430 430 self._divert = False
431 431 self._filteredrevs = frozenset()
432 432 self._filteredrevs_hashcache = {}
433 433 self._copiesstorage = opener.options.get(b'copies-storage')
434 434
435 435 @property
436 436 def filteredrevs(self):
437 437 return self._filteredrevs
438 438
439 439 @filteredrevs.setter
440 440 def filteredrevs(self, val):
441 441 # Ensure all updates go through this function
442 442 assert isinstance(val, frozenset)
443 443 self._filteredrevs = val
444 444 self._filteredrevs_hashcache = {}
445 445
446 446 def delayupdate(self, tr):
447 447 """delay visibility of index updates to other readers"""
448 448
449 449 if not self._delayed:
450 450 if len(self) == 0:
451 451 self._divert = True
452 452 if self._realopener.exists(self.indexfile + b'.a'):
453 453 self._realopener.unlink(self.indexfile + b'.a')
454 454 self.opener = _divertopener(self._realopener, self.indexfile)
455 455 else:
456 456 self._delaybuf = []
457 457 self.opener = _delayopener(
458 458 self._realopener, self.indexfile, self._delaybuf
459 459 )
460 460 self._delayed = True
461 461 tr.addpending(b'cl-%i' % id(self), self._writepending)
462 462 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
463 463
464 464 def _finalize(self, tr):
465 465 """finalize index updates"""
466 466 self._delayed = False
467 467 self.opener = self._realopener
468 468 # move redirected index data back into place
469 469 if self._divert:
470 470 assert not self._delaybuf
471 471 tmpname = self.indexfile + b".a"
472 472 nfile = self.opener.open(tmpname)
473 473 nfile.close()
474 474 self.opener.rename(tmpname, self.indexfile, checkambig=True)
475 475 elif self._delaybuf:
476 476 fp = self.opener(self.indexfile, b'a', checkambig=True)
477 477 fp.write(b"".join(self._delaybuf))
478 478 fp.close()
479 479 self._delaybuf = None
480 480 self._divert = False
481 481 # split when we're done
482 482 self._enforceinlinesize(tr)
483 483
484 484 def _writepending(self, tr):
485 485 """create a file containing the unfinalized state for
486 486 pretxnchangegroup"""
487 487 if self._delaybuf:
488 488 # make a temporary copy of the index
489 489 fp1 = self._realopener(self.indexfile)
490 490 pendingfilename = self.indexfile + b".a"
491 491 # register as a temp file to ensure cleanup on failure
492 492 tr.registertmp(pendingfilename)
493 493 # write existing data
494 494 fp2 = self._realopener(pendingfilename, b"w")
495 495 fp2.write(fp1.read())
496 496 # add pending data
497 497 fp2.write(b"".join(self._delaybuf))
498 498 fp2.close()
499 499 # switch modes so finalize can simply rename
500 500 self._delaybuf = None
501 501 self._divert = True
502 502 self.opener = _divertopener(self._realopener, self.indexfile)
503 503
504 504 if self._divert:
505 505 return True
506 506
507 507 return False
508 508
509 509 def _enforceinlinesize(self, tr, fp=None):
510 510 if not self._delayed:
511 511 revlog.revlog._enforceinlinesize(self, tr, fp)
512 512
513 513 def read(self, nodeorrev):
514 514 """Obtain data from a parsed changelog revision.
515 515
516 516 Returns a 6-tuple of:
517 517
518 518 - manifest node in binary
519 519 - author/user as a localstr
520 520 - date as a 2-tuple of (time, timezone)
521 521 - list of files
522 522 - commit message as a localstr
523 523 - dict of extra metadata
524 524
525 525 Unless you need to access all fields, consider calling
526 526 ``changelogrevision`` instead, as it is faster for partial object
527 527 access.
528 528 """
529 529 d, s = self._revisiondata(nodeorrev)
530 530 c = changelogrevision(
531 531 self, d, s, self._copiesstorage == b'changeset-sidedata'
532 532 )
533 533 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
534 534
535 535 def changelogrevision(self, nodeorrev):
536 536 """Obtain a ``changelogrevision`` for a node or revision."""
537 537 text, sidedata = self._revisiondata(nodeorrev)
538 538 return changelogrevision(
539 539 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
540 540 )
541 541
542 542 def readfiles(self, nodeorrev):
543 543 """
544 544 short version of read that only returns the files modified by the cset
545 545 """
546 546 text = self.revision(nodeorrev)
547 547 if not text:
548 548 return []
549 549 last = text.index(b"\n\n")
550 550 l = text[:last].split(b'\n')
551 551 return l[3:]
552 552
553 553 def add(
554 554 self,
555 555 manifest,
556 556 files,
557 557 desc,
558 558 transaction,
559 559 p1,
560 560 p2,
561 561 user,
562 562 date=None,
563 563 extra=None,
564 564 ):
565 565 # Convert to UTF-8 encoded bytestrings as the very first
566 566 # thing: calling any method on a localstr object will turn it
567 567 # into a str object and the cached UTF-8 string is thus lost.
568 568 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
569 569
570 570 user = user.strip()
571 571 # An empty username or a username with a "\n" will make the
572 572 # revision text contain two "\n\n" sequences -> corrupt
573 573 # repository since read cannot unpack the revision.
574 574 if not user:
575 575 raise error.StorageError(_(b"empty username"))
576 576 if b"\n" in user:
577 577 raise error.StorageError(
578 578 _(b"username %r contains a newline") % pycompat.bytestr(user)
579 579 )
580 580
581 581 desc = stripdesc(desc)
582 582
583 583 if date:
584 584 parseddate = b"%d %d" % dateutil.parsedate(date)
585 585 else:
586 586 parseddate = b"%d %d" % dateutil.makedate()
587 587 if extra:
588 588 branch = extra.get(b"branch")
589 589 if branch in (b"default", b""):
590 590 del extra[b"branch"]
591 591 elif branch in (b".", b"null", b"tip"):
592 592 raise error.StorageError(
593 593 _(b'the name \'%s\' is reserved') % branch
594 594 )
595 595 sortedfiles = sorted(files.touched)
596 596 flags = 0
597 597 sidedata = None
598 598 if self._copiesstorage == b'changeset-sidedata':
599 599 if files.has_copies_info:
600 600 flags |= flagutil.REVIDX_HASCOPIESINFO
601 601 sidedata = metadata.encode_files_sidedata(files)
602 602
603 603 if extra:
604 604 extra = encodeextra(extra)
605 605 parseddate = b"%s %s" % (parseddate, extra)
606 606 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
607 607 text = b"\n".join(l)
608 608 rev = self.addrevision(
609 609 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
610 610 )
611 611 return self.node(rev)
612 612
613 613 def branchinfo(self, rev):
614 614 """return the branch name and open/close state of a revision
615 615
616 616 This function exists because creating a changectx object
617 617 just to access this is costly."""
618 618 return self.changelogrevision(rev).branchinfo
619 619
620 620 def _nodeduplicatecallback(self, transaction, rev):
621 621 # keep track of revisions that got "re-added", eg: unbunde of know rev.
622 622 #
623 623 # We track them in a list to preserve their order from the source bundle
624 624 duplicates = transaction.changes.setdefault(b'revduplicates', [])
625 625 duplicates.append(rev)
@@ -1,4828 +1,4828
1 1 # debugcommands.py - command processing for debug* commands
2 2 #
3 3 # Copyright 2005-2016 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import codecs
11 11 import collections
12 12 import contextlib
13 13 import difflib
14 14 import errno
15 15 import glob
16 16 import operator
17 17 import os
18 18 import platform
19 19 import random
20 20 import re
21 21 import socket
22 22 import ssl
23 23 import stat
24 24 import string
25 25 import subprocess
26 26 import sys
27 27 import time
28 28
29 29 from .i18n import _
30 30 from .node import (
31 31 bin,
32 32 hex,
33 33 nullrev,
34 34 short,
35 35 )
36 36 from .pycompat import (
37 37 getattr,
38 38 open,
39 39 )
40 40 from . import (
41 41 bundle2,
42 42 bundlerepo,
43 43 changegroup,
44 44 cmdutil,
45 45 color,
46 46 context,
47 47 copies,
48 48 dagparser,
49 49 encoding,
50 50 error,
51 51 exchange,
52 52 extensions,
53 53 filemerge,
54 54 filesetlang,
55 55 formatter,
56 56 hg,
57 57 httppeer,
58 58 localrepo,
59 59 lock as lockmod,
60 60 logcmdutil,
61 61 mergestate as mergestatemod,
62 62 metadata,
63 63 obsolete,
64 64 obsutil,
65 65 pathutil,
66 66 phases,
67 67 policy,
68 68 pvec,
69 69 pycompat,
70 70 registrar,
71 71 repair,
72 72 repoview,
73 73 revlog,
74 74 revset,
75 75 revsetlang,
76 76 scmutil,
77 77 setdiscovery,
78 78 simplemerge,
79 79 sshpeer,
80 80 sslutil,
81 81 streamclone,
82 82 strip,
83 83 tags as tagsmod,
84 84 templater,
85 85 treediscovery,
86 86 upgrade,
87 87 url as urlmod,
88 88 util,
89 89 vfs as vfsmod,
90 90 wireprotoframing,
91 91 wireprotoserver,
92 92 wireprotov2peer,
93 93 )
94 94 from .utils import (
95 95 cborutil,
96 96 compression,
97 97 dateutil,
98 98 procutil,
99 99 stringutil,
100 100 urlutil,
101 101 )
102 102
103 103 from .revlogutils import (
104 104 deltas as deltautil,
105 105 nodemap,
106 106 sidedata,
107 107 )
108 108
109 109 release = lockmod.release
110 110
111 111 table = {}
112 112 table.update(strip.command._table)
113 113 command = registrar.command(table)
114 114
115 115
116 116 @command(b'debugancestor', [], _(b'[INDEX] REV1 REV2'), optionalrepo=True)
117 117 def debugancestor(ui, repo, *args):
118 118 """find the ancestor revision of two revisions in a given index"""
119 119 if len(args) == 3:
120 120 index, rev1, rev2 = args
121 121 r = revlog.revlog(vfsmod.vfs(encoding.getcwd(), audit=False), index)
122 122 lookup = r.lookup
123 123 elif len(args) == 2:
124 124 if not repo:
125 125 raise error.Abort(
126 126 _(b'there is no Mercurial repository here (.hg not found)')
127 127 )
128 128 rev1, rev2 = args
129 129 r = repo.changelog
130 130 lookup = repo.lookup
131 131 else:
132 132 raise error.Abort(_(b'either two or three arguments required'))
133 133 a = r.ancestor(lookup(rev1), lookup(rev2))
134 134 ui.write(b'%d:%s\n' % (r.rev(a), hex(a)))
135 135
136 136
137 137 @command(b'debugantivirusrunning', [])
138 138 def debugantivirusrunning(ui, repo):
139 139 """attempt to trigger an antivirus scanner to see if one is active"""
140 140 with repo.cachevfs.open('eicar-test-file.com', b'wb') as f:
141 141 f.write(
142 142 util.b85decode(
143 143 # This is a base85-armored version of the EICAR test file. See
144 144 # https://en.wikipedia.org/wiki/EICAR_test_file for details.
145 145 b'ST#=}P$fV?P+K%yP+C|uG$>GBDK|qyDK~v2MM*<JQY}+dK~6+LQba95P'
146 146 b'E<)&Nm5l)EmTEQR4qnHOhq9iNGnJx'
147 147 )
148 148 )
149 149 # Give an AV engine time to scan the file.
150 150 time.sleep(2)
151 151 util.unlink(repo.cachevfs.join('eicar-test-file.com'))
152 152
153 153
154 154 @command(b'debugapplystreamclonebundle', [], b'FILE')
155 155 def debugapplystreamclonebundle(ui, repo, fname):
156 156 """apply a stream clone bundle file"""
157 157 f = hg.openpath(ui, fname)
158 158 gen = exchange.readbundle(ui, f, fname)
159 159 gen.apply(repo)
160 160
161 161
162 162 @command(
163 163 b'debugbuilddag',
164 164 [
165 165 (
166 166 b'm',
167 167 b'mergeable-file',
168 168 None,
169 169 _(b'add single file mergeable changes'),
170 170 ),
171 171 (
172 172 b'o',
173 173 b'overwritten-file',
174 174 None,
175 175 _(b'add single file all revs overwrite'),
176 176 ),
177 177 (b'n', b'new-file', None, _(b'add new file at each rev')),
178 178 ],
179 179 _(b'[OPTION]... [TEXT]'),
180 180 )
181 181 def debugbuilddag(
182 182 ui,
183 183 repo,
184 184 text=None,
185 185 mergeable_file=False,
186 186 overwritten_file=False,
187 187 new_file=False,
188 188 ):
189 189 """builds a repo with a given DAG from scratch in the current empty repo
190 190
191 191 The description of the DAG is read from stdin if not given on the
192 192 command line.
193 193
194 194 Elements:
195 195
196 196 - "+n" is a linear run of n nodes based on the current default parent
197 197 - "." is a single node based on the current default parent
198 198 - "$" resets the default parent to null (implied at the start);
199 199 otherwise the default parent is always the last node created
200 200 - "<p" sets the default parent to the backref p
201 201 - "*p" is a fork at parent p, which is a backref
202 202 - "*p1/p2" is a merge of parents p1 and p2, which are backrefs
203 203 - "/p2" is a merge of the preceding node and p2
204 204 - ":tag" defines a local tag for the preceding node
205 205 - "@branch" sets the named branch for subsequent nodes
206 206 - "#...\\n" is a comment up to the end of the line
207 207
208 208 Whitespace between the above elements is ignored.
209 209
210 210 A backref is either
211 211
212 212 - a number n, which references the node curr-n, where curr is the current
213 213 node, or
214 214 - the name of a local tag you placed earlier using ":tag", or
215 215 - empty to denote the default parent.
216 216
217 217 All string valued-elements are either strictly alphanumeric, or must
218 218 be enclosed in double quotes ("..."), with "\\" as escape character.
219 219 """
220 220
221 221 if text is None:
222 222 ui.status(_(b"reading DAG from stdin\n"))
223 223 text = ui.fin.read()
224 224
225 225 cl = repo.changelog
226 226 if len(cl) > 0:
227 227 raise error.Abort(_(b'repository is not empty'))
228 228
229 229 # determine number of revs in DAG
230 230 total = 0
231 231 for type, data in dagparser.parsedag(text):
232 232 if type == b'n':
233 233 total += 1
234 234
235 235 if mergeable_file:
236 236 linesperrev = 2
237 237 # make a file with k lines per rev
238 238 initialmergedlines = [
239 239 b'%d' % i for i in pycompat.xrange(0, total * linesperrev)
240 240 ]
241 241 initialmergedlines.append(b"")
242 242
243 243 tags = []
244 244 progress = ui.makeprogress(
245 245 _(b'building'), unit=_(b'revisions'), total=total
246 246 )
247 247 with progress, repo.wlock(), repo.lock(), repo.transaction(b"builddag"):
248 248 at = -1
249 249 atbranch = b'default'
250 250 nodeids = []
251 251 id = 0
252 252 progress.update(id)
253 253 for type, data in dagparser.parsedag(text):
254 254 if type == b'n':
255 255 ui.note((b'node %s\n' % pycompat.bytestr(data)))
256 256 id, ps = data
257 257
258 258 files = []
259 259 filecontent = {}
260 260
261 261 p2 = None
262 262 if mergeable_file:
263 263 fn = b"mf"
264 264 p1 = repo[ps[0]]
265 265 if len(ps) > 1:
266 266 p2 = repo[ps[1]]
267 267 pa = p1.ancestor(p2)
268 268 base, local, other = [
269 269 x[fn].data() for x in (pa, p1, p2)
270 270 ]
271 271 m3 = simplemerge.Merge3Text(base, local, other)
272 272 ml = [l.strip() for l in m3.merge_lines()]
273 273 ml.append(b"")
274 274 elif at > 0:
275 275 ml = p1[fn].data().split(b"\n")
276 276 else:
277 277 ml = initialmergedlines
278 278 ml[id * linesperrev] += b" r%i" % id
279 279 mergedtext = b"\n".join(ml)
280 280 files.append(fn)
281 281 filecontent[fn] = mergedtext
282 282
283 283 if overwritten_file:
284 284 fn = b"of"
285 285 files.append(fn)
286 286 filecontent[fn] = b"r%i\n" % id
287 287
288 288 if new_file:
289 289 fn = b"nf%i" % id
290 290 files.append(fn)
291 291 filecontent[fn] = b"r%i\n" % id
292 292 if len(ps) > 1:
293 293 if not p2:
294 294 p2 = repo[ps[1]]
295 295 for fn in p2:
296 296 if fn.startswith(b"nf"):
297 297 files.append(fn)
298 298 filecontent[fn] = p2[fn].data()
299 299
300 300 def fctxfn(repo, cx, path):
301 301 if path in filecontent:
302 302 return context.memfilectx(
303 303 repo, cx, path, filecontent[path]
304 304 )
305 305 return None
306 306
307 307 if len(ps) == 0 or ps[0] < 0:
308 308 pars = [None, None]
309 309 elif len(ps) == 1:
310 310 pars = [nodeids[ps[0]], None]
311 311 else:
312 312 pars = [nodeids[p] for p in ps]
313 313 cx = context.memctx(
314 314 repo,
315 315 pars,
316 316 b"r%i" % id,
317 317 files,
318 318 fctxfn,
319 319 date=(id, 0),
320 320 user=b"debugbuilddag",
321 321 extra={b'branch': atbranch},
322 322 )
323 323 nodeid = repo.commitctx(cx)
324 324 nodeids.append(nodeid)
325 325 at = id
326 326 elif type == b'l':
327 327 id, name = data
328 328 ui.note((b'tag %s\n' % name))
329 329 tags.append(b"%s %s\n" % (hex(repo.changelog.node(id)), name))
330 330 elif type == b'a':
331 331 ui.note((b'branch %s\n' % data))
332 332 atbranch = data
333 333 progress.update(id)
334 334
335 335 if tags:
336 336 repo.vfs.write(b"localtags", b"".join(tags))
337 337
338 338
339 339 def _debugchangegroup(ui, gen, all=None, indent=0, **opts):
340 340 indent_string = b' ' * indent
341 341 if all:
342 342 ui.writenoi18n(
343 343 b"%sformat: id, p1, p2, cset, delta base, len(delta)\n"
344 344 % indent_string
345 345 )
346 346
347 347 def showchunks(named):
348 348 ui.write(b"\n%s%s\n" % (indent_string, named))
349 349 for deltadata in gen.deltaiter():
350 350 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
351 351 ui.write(
352 352 b"%s%s %s %s %s %s %d\n"
353 353 % (
354 354 indent_string,
355 355 hex(node),
356 356 hex(p1),
357 357 hex(p2),
358 358 hex(cs),
359 359 hex(deltabase),
360 360 len(delta),
361 361 )
362 362 )
363 363
364 364 gen.changelogheader()
365 365 showchunks(b"changelog")
366 366 gen.manifestheader()
367 367 showchunks(b"manifest")
368 368 for chunkdata in iter(gen.filelogheader, {}):
369 369 fname = chunkdata[b'filename']
370 370 showchunks(fname)
371 371 else:
372 372 if isinstance(gen, bundle2.unbundle20):
373 373 raise error.Abort(_(b'use debugbundle2 for this file'))
374 374 gen.changelogheader()
375 375 for deltadata in gen.deltaiter():
376 376 node, p1, p2, cs, deltabase, delta, flags, sidedata = deltadata
377 377 ui.write(b"%s%s\n" % (indent_string, hex(node)))
378 378
379 379
380 380 def _debugobsmarkers(ui, part, indent=0, **opts):
381 381 """display version and markers contained in 'data'"""
382 382 opts = pycompat.byteskwargs(opts)
383 383 data = part.read()
384 384 indent_string = b' ' * indent
385 385 try:
386 386 version, markers = obsolete._readmarkers(data)
387 387 except error.UnknownVersion as exc:
388 388 msg = b"%sunsupported version: %s (%d bytes)\n"
389 389 msg %= indent_string, exc.version, len(data)
390 390 ui.write(msg)
391 391 else:
392 392 msg = b"%sversion: %d (%d bytes)\n"
393 393 msg %= indent_string, version, len(data)
394 394 ui.write(msg)
395 395 fm = ui.formatter(b'debugobsolete', opts)
396 396 for rawmarker in sorted(markers):
397 397 m = obsutil.marker(None, rawmarker)
398 398 fm.startitem()
399 399 fm.plain(indent_string)
400 400 cmdutil.showmarker(fm, m)
401 401 fm.end()
402 402
403 403
404 404 def _debugphaseheads(ui, data, indent=0):
405 405 """display version and markers contained in 'data'"""
406 406 indent_string = b' ' * indent
407 407 headsbyphase = phases.binarydecode(data)
408 408 for phase in phases.allphases:
409 409 for head in headsbyphase[phase]:
410 410 ui.write(indent_string)
411 411 ui.write(b'%s %s\n' % (hex(head), phases.phasenames[phase]))
412 412
413 413
414 414 def _quasirepr(thing):
415 415 if isinstance(thing, (dict, util.sortdict, collections.OrderedDict)):
416 416 return b'{%s}' % (
417 417 b', '.join(b'%s: %s' % (k, thing[k]) for k in sorted(thing))
418 418 )
419 419 return pycompat.bytestr(repr(thing))
420 420
421 421
422 422 def _debugbundle2(ui, gen, all=None, **opts):
423 423 """lists the contents of a bundle2"""
424 424 if not isinstance(gen, bundle2.unbundle20):
425 425 raise error.Abort(_(b'not a bundle2 file'))
426 426 ui.write((b'Stream params: %s\n' % _quasirepr(gen.params)))
427 427 parttypes = opts.get('part_type', [])
428 428 for part in gen.iterparts():
429 429 if parttypes and part.type not in parttypes:
430 430 continue
431 431 msg = b'%s -- %s (mandatory: %r)\n'
432 432 ui.write((msg % (part.type, _quasirepr(part.params), part.mandatory)))
433 433 if part.type == b'changegroup':
434 434 version = part.params.get(b'version', b'01')
435 435 cg = changegroup.getunbundler(version, part, b'UN')
436 436 if not ui.quiet:
437 437 _debugchangegroup(ui, cg, all=all, indent=4, **opts)
438 438 if part.type == b'obsmarkers':
439 439 if not ui.quiet:
440 440 _debugobsmarkers(ui, part, indent=4, **opts)
441 441 if part.type == b'phase-heads':
442 442 if not ui.quiet:
443 443 _debugphaseheads(ui, part, indent=4)
444 444
445 445
446 446 @command(
447 447 b'debugbundle',
448 448 [
449 449 (b'a', b'all', None, _(b'show all details')),
450 450 (b'', b'part-type', [], _(b'show only the named part type')),
451 451 (b'', b'spec', None, _(b'print the bundlespec of the bundle')),
452 452 ],
453 453 _(b'FILE'),
454 454 norepo=True,
455 455 )
456 456 def debugbundle(ui, bundlepath, all=None, spec=None, **opts):
457 457 """lists the contents of a bundle"""
458 458 with hg.openpath(ui, bundlepath) as f:
459 459 if spec:
460 460 spec = exchange.getbundlespec(ui, f)
461 461 ui.write(b'%s\n' % spec)
462 462 return
463 463
464 464 gen = exchange.readbundle(ui, f, bundlepath)
465 465 if isinstance(gen, bundle2.unbundle20):
466 466 return _debugbundle2(ui, gen, all=all, **opts)
467 467 _debugchangegroup(ui, gen, all=all, **opts)
468 468
469 469
470 470 @command(b'debugcapabilities', [], _(b'PATH'), norepo=True)
471 471 def debugcapabilities(ui, path, **opts):
472 472 """lists the capabilities of a remote peer"""
473 473 opts = pycompat.byteskwargs(opts)
474 474 peer = hg.peer(ui, opts, path)
475 475 try:
476 476 caps = peer.capabilities()
477 477 ui.writenoi18n(b'Main capabilities:\n')
478 478 for c in sorted(caps):
479 479 ui.write(b' %s\n' % c)
480 480 b2caps = bundle2.bundle2caps(peer)
481 481 if b2caps:
482 482 ui.writenoi18n(b'Bundle2 capabilities:\n')
483 483 for key, values in sorted(pycompat.iteritems(b2caps)):
484 484 ui.write(b' %s\n' % key)
485 485 for v in values:
486 486 ui.write(b' %s\n' % v)
487 487 finally:
488 488 peer.close()
489 489
490 490
491 491 @command(
492 492 b'debugchangedfiles',
493 493 [
494 494 (
495 495 b'',
496 496 b'compute',
497 497 False,
498 498 b"compute information instead of reading it from storage",
499 499 ),
500 500 ],
501 501 b'REV',
502 502 )
503 503 def debugchangedfiles(ui, repo, rev, **opts):
504 504 """list the stored files changes for a revision"""
505 505 ctx = scmutil.revsingle(repo, rev, None)
506 506 files = None
507 507
508 508 if opts['compute']:
509 509 files = metadata.compute_all_files_changes(ctx)
510 510 else:
511 511 sd = repo.changelog.sidedata(ctx.rev())
512 512 files_block = sd.get(sidedata.SD_FILES)
513 513 if files_block is not None:
514 514 files = metadata.decode_files_sidedata(sd)
515 515 if files is not None:
516 516 for f in sorted(files.touched):
517 517 if f in files.added:
518 518 action = b"added"
519 519 elif f in files.removed:
520 520 action = b"removed"
521 521 elif f in files.merged:
522 522 action = b"merged"
523 523 elif f in files.salvaged:
524 524 action = b"salvaged"
525 525 else:
526 526 action = b"touched"
527 527
528 528 copy_parent = b""
529 529 copy_source = b""
530 530 if f in files.copied_from_p1:
531 531 copy_parent = b"p1"
532 532 copy_source = files.copied_from_p1[f]
533 533 elif f in files.copied_from_p2:
534 534 copy_parent = b"p2"
535 535 copy_source = files.copied_from_p2[f]
536 536
537 537 data = (action, copy_parent, f, copy_source)
538 538 template = b"%-8s %2s: %s, %s;\n"
539 539 ui.write(template % data)
540 540
541 541
542 542 @command(b'debugcheckstate', [], b'')
543 543 def debugcheckstate(ui, repo):
544 544 """validate the correctness of the current dirstate"""
545 545 parent1, parent2 = repo.dirstate.parents()
546 546 m1 = repo[parent1].manifest()
547 547 m2 = repo[parent2].manifest()
548 548 errors = 0
549 549 for f in repo.dirstate:
550 550 state = repo.dirstate[f]
551 551 if state in b"nr" and f not in m1:
552 552 ui.warn(_(b"%s in state %s, but not in manifest1\n") % (f, state))
553 553 errors += 1
554 554 if state in b"a" and f in m1:
555 555 ui.warn(_(b"%s in state %s, but also in manifest1\n") % (f, state))
556 556 errors += 1
557 557 if state in b"m" and f not in m1 and f not in m2:
558 558 ui.warn(
559 559 _(b"%s in state %s, but not in either manifest\n") % (f, state)
560 560 )
561 561 errors += 1
562 562 for f in m1:
563 563 state = repo.dirstate[f]
564 564 if state not in b"nrm":
565 565 ui.warn(_(b"%s in manifest1, but listed as state %s") % (f, state))
566 566 errors += 1
567 567 if errors:
568 568 errstr = _(b".hg/dirstate inconsistent with current parent's manifest")
569 569 raise error.Abort(errstr)
570 570
571 571
572 572 @command(
573 573 b'debugcolor',
574 574 [(b'', b'style', None, _(b'show all configured styles'))],
575 575 b'hg debugcolor',
576 576 )
577 577 def debugcolor(ui, repo, **opts):
578 578 """show available color, effects or style"""
579 579 ui.writenoi18n(b'color mode: %s\n' % stringutil.pprint(ui._colormode))
580 580 if opts.get('style'):
581 581 return _debugdisplaystyle(ui)
582 582 else:
583 583 return _debugdisplaycolor(ui)
584 584
585 585
586 586 def _debugdisplaycolor(ui):
587 587 ui = ui.copy()
588 588 ui._styles.clear()
589 589 for effect in color._activeeffects(ui).keys():
590 590 ui._styles[effect] = effect
591 591 if ui._terminfoparams:
592 592 for k, v in ui.configitems(b'color'):
593 593 if k.startswith(b'color.'):
594 594 ui._styles[k] = k[6:]
595 595 elif k.startswith(b'terminfo.'):
596 596 ui._styles[k] = k[9:]
597 597 ui.write(_(b'available colors:\n'))
598 598 # sort label with a '_' after the other to group '_background' entry.
599 599 items = sorted(ui._styles.items(), key=lambda i: (b'_' in i[0], i[0], i[1]))
600 600 for colorname, label in items:
601 601 ui.write(b'%s\n' % colorname, label=label)
602 602
603 603
604 604 def _debugdisplaystyle(ui):
605 605 ui.write(_(b'available style:\n'))
606 606 if not ui._styles:
607 607 return
608 608 width = max(len(s) for s in ui._styles)
609 609 for label, effects in sorted(ui._styles.items()):
610 610 ui.write(b'%s' % label, label=label)
611 611 if effects:
612 612 # 50
613 613 ui.write(b': ')
614 614 ui.write(b' ' * (max(0, width - len(label))))
615 615 ui.write(b', '.join(ui.label(e, e) for e in effects.split()))
616 616 ui.write(b'\n')
617 617
618 618
619 619 @command(b'debugcreatestreamclonebundle', [], b'FILE')
620 620 def debugcreatestreamclonebundle(ui, repo, fname):
621 621 """create a stream clone bundle file
622 622
623 623 Stream bundles are special bundles that are essentially archives of
624 624 revlog files. They are commonly used for cloning very quickly.
625 625 """
626 626 # TODO we may want to turn this into an abort when this functionality
627 627 # is moved into `hg bundle`.
628 628 if phases.hassecret(repo):
629 629 ui.warn(
630 630 _(
631 631 b'(warning: stream clone bundle will contain secret '
632 632 b'revisions)\n'
633 633 )
634 634 )
635 635
636 636 requirements, gen = streamclone.generatebundlev1(repo)
637 637 changegroup.writechunks(ui, gen, fname)
638 638
639 639 ui.write(_(b'bundle requirements: %s\n') % b', '.join(sorted(requirements)))
640 640
641 641
642 642 @command(
643 643 b'debugdag',
644 644 [
645 645 (b't', b'tags', None, _(b'use tags as labels')),
646 646 (b'b', b'branches', None, _(b'annotate with branch names')),
647 647 (b'', b'dots', None, _(b'use dots for runs')),
648 648 (b's', b'spaces', None, _(b'separate elements by spaces')),
649 649 ],
650 650 _(b'[OPTION]... [FILE [REV]...]'),
651 651 optionalrepo=True,
652 652 )
653 653 def debugdag(ui, repo, file_=None, *revs, **opts):
654 654 """format the changelog or an index DAG as a concise textual description
655 655
656 656 If you pass a revlog index, the revlog's DAG is emitted. If you list
657 657 revision numbers, they get labeled in the output as rN.
658 658
659 659 Otherwise, the changelog DAG of the current repo is emitted.
660 660 """
661 661 spaces = opts.get('spaces')
662 662 dots = opts.get('dots')
663 663 if file_:
664 664 rlog = revlog.revlog(vfsmod.vfs(encoding.getcwd(), audit=False), file_)
665 665 revs = {int(r) for r in revs}
666 666
667 667 def events():
668 668 for r in rlog:
669 669 yield b'n', (r, list(p for p in rlog.parentrevs(r) if p != -1))
670 670 if r in revs:
671 671 yield b'l', (r, b"r%i" % r)
672 672
673 673 elif repo:
674 674 cl = repo.changelog
675 675 tags = opts.get('tags')
676 676 branches = opts.get('branches')
677 677 if tags:
678 678 labels = {}
679 679 for l, n in repo.tags().items():
680 680 labels.setdefault(cl.rev(n), []).append(l)
681 681
682 682 def events():
683 683 b = b"default"
684 684 for r in cl:
685 685 if branches:
686 686 newb = cl.read(cl.node(r))[5][b'branch']
687 687 if newb != b:
688 688 yield b'a', newb
689 689 b = newb
690 690 yield b'n', (r, list(p for p in cl.parentrevs(r) if p != -1))
691 691 if tags:
692 692 ls = labels.get(r)
693 693 if ls:
694 694 for l in ls:
695 695 yield b'l', (r, l)
696 696
697 697 else:
698 698 raise error.Abort(_(b'need repo for changelog dag'))
699 699
700 700 for line in dagparser.dagtextlines(
701 701 events(),
702 702 addspaces=spaces,
703 703 wraplabels=True,
704 704 wrapannotations=True,
705 705 wrapnonlinear=dots,
706 706 usedots=dots,
707 707 maxlinewidth=70,
708 708 ):
709 709 ui.write(line)
710 710 ui.write(b"\n")
711 711
712 712
713 713 @command(b'debugdata', cmdutil.debugrevlogopts, _(b'-c|-m|FILE REV'))
714 714 def debugdata(ui, repo, file_, rev=None, **opts):
715 715 """dump the contents of a data file revision"""
716 716 opts = pycompat.byteskwargs(opts)
717 717 if opts.get(b'changelog') or opts.get(b'manifest') or opts.get(b'dir'):
718 718 if rev is not None:
719 719 raise error.CommandError(b'debugdata', _(b'invalid arguments'))
720 720 file_, rev = None, file_
721 721 elif rev is None:
722 722 raise error.CommandError(b'debugdata', _(b'invalid arguments'))
723 723 r = cmdutil.openstorage(repo, b'debugdata', file_, opts)
724 724 try:
725 725 ui.write(r.rawdata(r.lookup(rev)))
726 726 except KeyError:
727 727 raise error.Abort(_(b'invalid revision identifier %s') % rev)
728 728
729 729
730 730 @command(
731 731 b'debugdate',
732 732 [(b'e', b'extended', None, _(b'try extended date formats'))],
733 733 _(b'[-e] DATE [RANGE]'),
734 734 norepo=True,
735 735 optionalrepo=True,
736 736 )
737 737 def debugdate(ui, date, range=None, **opts):
738 738 """parse and display a date"""
739 739 if opts["extended"]:
740 740 d = dateutil.parsedate(date, dateutil.extendeddateformats)
741 741 else:
742 742 d = dateutil.parsedate(date)
743 743 ui.writenoi18n(b"internal: %d %d\n" % d)
744 744 ui.writenoi18n(b"standard: %s\n" % dateutil.datestr(d))
745 745 if range:
746 746 m = dateutil.matchdate(range)
747 747 ui.writenoi18n(b"match: %s\n" % m(d[0]))
748 748
749 749
750 750 @command(
751 751 b'debugdeltachain',
752 752 cmdutil.debugrevlogopts + cmdutil.formatteropts,
753 753 _(b'-c|-m|FILE'),
754 754 optionalrepo=True,
755 755 )
756 756 def debugdeltachain(ui, repo, file_=None, **opts):
757 757 """dump information about delta chains in a revlog
758 758
759 759 Output can be templatized. Available template keywords are:
760 760
761 761 :``rev``: revision number
762 762 :``chainid``: delta chain identifier (numbered by unique base)
763 763 :``chainlen``: delta chain length to this revision
764 764 :``prevrev``: previous revision in delta chain
765 765 :``deltatype``: role of delta / how it was computed
766 766 :``compsize``: compressed size of revision
767 767 :``uncompsize``: uncompressed size of revision
768 768 :``chainsize``: total size of compressed revisions in chain
769 769 :``chainratio``: total chain size divided by uncompressed revision size
770 770 (new delta chains typically start at ratio 2.00)
771 771 :``lindist``: linear distance from base revision in delta chain to end
772 772 of this revision
773 773 :``extradist``: total size of revisions not part of this delta chain from
774 774 base of delta chain to end of this revision; a measurement
775 775 of how much extra data we need to read/seek across to read
776 776 the delta chain for this revision
777 777 :``extraratio``: extradist divided by chainsize; another representation of
778 778 how much unrelated data is needed to load this delta chain
779 779
780 780 If the repository is configured to use the sparse read, additional keywords
781 781 are available:
782 782
783 783 :``readsize``: total size of data read from the disk for a revision
784 784 (sum of the sizes of all the blocks)
785 785 :``largestblock``: size of the largest block of data read from the disk
786 786 :``readdensity``: density of useful bytes in the data read from the disk
787 787 :``srchunks``: in how many data hunks the whole revision would be read
788 788
789 789 The sparse read can be enabled with experimental.sparse-read = True
790 790 """
791 791 opts = pycompat.byteskwargs(opts)
792 792 r = cmdutil.openrevlog(repo, b'debugdeltachain', file_, opts)
793 793 index = r.index
794 794 start = r.start
795 795 length = r.length
796 796 generaldelta = r._generaldelta
797 797 withsparseread = getattr(r, '_withsparseread', False)
798 798
799 799 def revinfo(rev):
800 800 e = index[rev]
801 801 compsize = e[1]
802 802 uncompsize = e[2]
803 803 chainsize = 0
804 804
805 805 if generaldelta:
806 806 if e[3] == e[5]:
807 807 deltatype = b'p1'
808 808 elif e[3] == e[6]:
809 809 deltatype = b'p2'
810 810 elif e[3] == rev - 1:
811 811 deltatype = b'prev'
812 812 elif e[3] == rev:
813 813 deltatype = b'base'
814 814 else:
815 815 deltatype = b'other'
816 816 else:
817 817 if e[3] == rev:
818 818 deltatype = b'base'
819 819 else:
820 820 deltatype = b'prev'
821 821
822 822 chain = r._deltachain(rev)[0]
823 823 for iterrev in chain:
824 824 e = index[iterrev]
825 825 chainsize += e[1]
826 826
827 827 return compsize, uncompsize, deltatype, chain, chainsize
828 828
829 829 fm = ui.formatter(b'debugdeltachain', opts)
830 830
831 831 fm.plain(
832 832 b' rev chain# chainlen prev delta '
833 833 b'size rawsize chainsize ratio lindist extradist '
834 834 b'extraratio'
835 835 )
836 836 if withsparseread:
837 837 fm.plain(b' readsize largestblk rddensity srchunks')
838 838 fm.plain(b'\n')
839 839
840 840 chainbases = {}
841 841 for rev in r:
842 842 comp, uncomp, deltatype, chain, chainsize = revinfo(rev)
843 843 chainbase = chain[0]
844 844 chainid = chainbases.setdefault(chainbase, len(chainbases) + 1)
845 845 basestart = start(chainbase)
846 846 revstart = start(rev)
847 847 lineardist = revstart + comp - basestart
848 848 extradist = lineardist - chainsize
849 849 try:
850 850 prevrev = chain[-2]
851 851 except IndexError:
852 852 prevrev = -1
853 853
854 854 if uncomp != 0:
855 855 chainratio = float(chainsize) / float(uncomp)
856 856 else:
857 857 chainratio = chainsize
858 858
859 859 if chainsize != 0:
860 860 extraratio = float(extradist) / float(chainsize)
861 861 else:
862 862 extraratio = extradist
863 863
864 864 fm.startitem()
865 865 fm.write(
866 866 b'rev chainid chainlen prevrev deltatype compsize '
867 867 b'uncompsize chainsize chainratio lindist extradist '
868 868 b'extraratio',
869 869 b'%7d %7d %8d %8d %7s %10d %10d %10d %9.5f %9d %9d %10.5f',
870 870 rev,
871 871 chainid,
872 872 len(chain),
873 873 prevrev,
874 874 deltatype,
875 875 comp,
876 876 uncomp,
877 877 chainsize,
878 878 chainratio,
879 879 lineardist,
880 880 extradist,
881 881 extraratio,
882 882 rev=rev,
883 883 chainid=chainid,
884 884 chainlen=len(chain),
885 885 prevrev=prevrev,
886 886 deltatype=deltatype,
887 887 compsize=comp,
888 888 uncompsize=uncomp,
889 889 chainsize=chainsize,
890 890 chainratio=chainratio,
891 891 lindist=lineardist,
892 892 extradist=extradist,
893 893 extraratio=extraratio,
894 894 )
895 895 if withsparseread:
896 896 readsize = 0
897 897 largestblock = 0
898 898 srchunks = 0
899 899
900 900 for revschunk in deltautil.slicechunk(r, chain):
901 901 srchunks += 1
902 902 blkend = start(revschunk[-1]) + length(revschunk[-1])
903 903 blksize = blkend - start(revschunk[0])
904 904
905 905 readsize += blksize
906 906 if largestblock < blksize:
907 907 largestblock = blksize
908 908
909 909 if readsize:
910 910 readdensity = float(chainsize) / float(readsize)
911 911 else:
912 912 readdensity = 1
913 913
914 914 fm.write(
915 915 b'readsize largestblock readdensity srchunks',
916 916 b' %10d %10d %9.5f %8d',
917 917 readsize,
918 918 largestblock,
919 919 readdensity,
920 920 srchunks,
921 921 readsize=readsize,
922 922 largestblock=largestblock,
923 923 readdensity=readdensity,
924 924 srchunks=srchunks,
925 925 )
926 926
927 927 fm.plain(b'\n')
928 928
929 929 fm.end()
930 930
931 931
932 932 @command(
933 933 b'debugdirstate|debugstate',
934 934 [
935 935 (
936 936 b'',
937 937 b'nodates',
938 938 None,
939 939 _(b'do not display the saved mtime (DEPRECATED)'),
940 940 ),
941 941 (b'', b'dates', True, _(b'display the saved mtime')),
942 942 (b'', b'datesort', None, _(b'sort by saved mtime')),
943 943 ],
944 944 _(b'[OPTION]...'),
945 945 )
946 946 def debugstate(ui, repo, **opts):
947 947 """show the contents of the current dirstate"""
948 948
949 949 nodates = not opts['dates']
950 950 if opts.get('nodates') is not None:
951 951 nodates = True
952 952 datesort = opts.get('datesort')
953 953
954 954 if datesort:
955 955 keyfunc = lambda x: (x[1][3], x[0]) # sort by mtime, then by filename
956 956 else:
957 957 keyfunc = None # sort by filename
958 958 for file_, ent in sorted(pycompat.iteritems(repo.dirstate), key=keyfunc):
959 959 if ent[3] == -1:
960 960 timestr = b'unset '
961 961 elif nodates:
962 962 timestr = b'set '
963 963 else:
964 964 timestr = time.strftime(
965 965 "%Y-%m-%d %H:%M:%S ", time.localtime(ent[3])
966 966 )
967 967 timestr = encoding.strtolocal(timestr)
968 968 if ent[1] & 0o20000:
969 969 mode = b'lnk'
970 970 else:
971 971 mode = b'%3o' % (ent[1] & 0o777 & ~util.umask)
972 972 ui.write(b"%c %s %10d %s%s\n" % (ent[0], mode, ent[2], timestr, file_))
973 973 for f in repo.dirstate.copies():
974 974 ui.write(_(b"copy: %s -> %s\n") % (repo.dirstate.copied(f), f))
975 975
976 976
977 977 @command(
978 978 b'debugdiscovery',
979 979 [
980 980 (b'', b'old', None, _(b'use old-style discovery')),
981 981 (
982 982 b'',
983 983 b'nonheads',
984 984 None,
985 985 _(b'use old-style discovery with non-heads included'),
986 986 ),
987 987 (b'', b'rev', [], b'restrict discovery to this set of revs'),
988 988 (b'', b'seed', b'12323', b'specify the random seed use for discovery'),
989 989 (
990 990 b'',
991 991 b'local-as-revs',
992 992 b"",
993 993 b'treat local has having these revisions only',
994 994 ),
995 995 (
996 996 b'',
997 997 b'remote-as-revs',
998 998 b"",
999 999 b'use local as remote, with only these these revisions',
1000 1000 ),
1001 1001 ]
1002 1002 + cmdutil.remoteopts
1003 1003 + cmdutil.formatteropts,
1004 1004 _(b'[--rev REV] [OTHER]'),
1005 1005 )
1006 1006 def debugdiscovery(ui, repo, remoteurl=b"default", **opts):
1007 1007 """runs the changeset discovery protocol in isolation
1008 1008
1009 1009 The local peer can be "replaced" by a subset of the local repository by
1010 1010 using the `--local-as-revs` flag. Int he same way, usual `remote` peer can
1011 1011 be "replaced" by a subset of the local repository using the
1012 1012 `--local-as-revs` flag. This is useful to efficiently debug pathological
1013 1013 discovery situation.
1014 1014
1015 1015 The following developer oriented config are relevant for people playing with this command:
1016 1016
1017 1017 * devel.discovery.exchange-heads=True
1018 1018
1019 1019 If False, the discovery will not start with
1020 1020 remote head fetching and local head querying.
1021 1021
1022 1022 * devel.discovery.grow-sample=True
1023 1023
1024 1024 If False, the sample size used in set discovery will not be increased
1025 1025 through the process
1026 1026
1027 1027 * devel.discovery.grow-sample.dynamic=True
1028 1028
1029 1029 When discovery.grow-sample.dynamic is True, the default, the sample size is
1030 1030 adapted to the shape of the undecided set (it is set to the max of:
1031 1031 <target-size>, len(roots(undecided)), len(heads(undecided)
1032 1032
1033 1033 * devel.discovery.grow-sample.rate=1.05
1034 1034
1035 1035 the rate at which the sample grow
1036 1036
1037 1037 * devel.discovery.randomize=True
1038 1038
1039 1039 If andom sampling during discovery are deterministic. It is meant for
1040 1040 integration tests.
1041 1041
1042 1042 * devel.discovery.sample-size=200
1043 1043
1044 1044 Control the initial size of the discovery sample
1045 1045
1046 1046 * devel.discovery.sample-size.initial=100
1047 1047
1048 1048 Control the initial size of the discovery for initial change
1049 1049 """
1050 1050 opts = pycompat.byteskwargs(opts)
1051 1051 unfi = repo.unfiltered()
1052 1052
1053 1053 # setup potential extra filtering
1054 1054 local_revs = opts[b"local_as_revs"]
1055 1055 remote_revs = opts[b"remote_as_revs"]
1056 1056
1057 1057 # make sure tests are repeatable
1058 1058 random.seed(int(opts[b'seed']))
1059 1059
1060 1060 if not remote_revs:
1061 1061
1062 1062 remoteurl, branches = urlutil.get_unique_pull_path(
1063 1063 b'debugdiscovery', repo, ui, remoteurl
1064 1064 )
1065 1065 remote = hg.peer(repo, opts, remoteurl)
1066 1066 ui.status(_(b'comparing with %s\n') % urlutil.hidepassword(remoteurl))
1067 1067 else:
1068 1068 branches = (None, [])
1069 1069 remote_filtered_revs = scmutil.revrange(
1070 1070 unfi, [b"not (::(%s))" % remote_revs]
1071 1071 )
1072 1072 remote_filtered_revs = frozenset(remote_filtered_revs)
1073 1073
1074 1074 def remote_func(x):
1075 1075 return remote_filtered_revs
1076 1076
1077 1077 repoview.filtertable[b'debug-discovery-remote-filter'] = remote_func
1078 1078
1079 1079 remote = repo.peer()
1080 1080 remote._repo = remote._repo.filtered(b'debug-discovery-remote-filter')
1081 1081
1082 1082 if local_revs:
1083 1083 local_filtered_revs = scmutil.revrange(
1084 1084 unfi, [b"not (::(%s))" % local_revs]
1085 1085 )
1086 1086 local_filtered_revs = frozenset(local_filtered_revs)
1087 1087
1088 1088 def local_func(x):
1089 1089 return local_filtered_revs
1090 1090
1091 1091 repoview.filtertable[b'debug-discovery-local-filter'] = local_func
1092 1092 repo = repo.filtered(b'debug-discovery-local-filter')
1093 1093
1094 1094 data = {}
1095 1095 if opts.get(b'old'):
1096 1096
1097 1097 def doit(pushedrevs, remoteheads, remote=remote):
1098 1098 if not util.safehasattr(remote, b'branches'):
1099 1099 # enable in-client legacy support
1100 1100 remote = localrepo.locallegacypeer(remote.local())
1101 1101 common, _in, hds = treediscovery.findcommonincoming(
1102 1102 repo, remote, force=True, audit=data
1103 1103 )
1104 1104 common = set(common)
1105 1105 if not opts.get(b'nonheads'):
1106 1106 ui.writenoi18n(
1107 1107 b"unpruned common: %s\n"
1108 1108 % b" ".join(sorted(short(n) for n in common))
1109 1109 )
1110 1110
1111 1111 clnode = repo.changelog.node
1112 1112 common = repo.revs(b'heads(::%ln)', common)
1113 1113 common = {clnode(r) for r in common}
1114 1114 return common, hds
1115 1115
1116 1116 else:
1117 1117
1118 1118 def doit(pushedrevs, remoteheads, remote=remote):
1119 1119 nodes = None
1120 1120 if pushedrevs:
1121 1121 revs = scmutil.revrange(repo, pushedrevs)
1122 1122 nodes = [repo[r].node() for r in revs]
1123 1123 common, any, hds = setdiscovery.findcommonheads(
1124 1124 ui, repo, remote, ancestorsof=nodes, audit=data
1125 1125 )
1126 1126 return common, hds
1127 1127
1128 1128 remoterevs, _checkout = hg.addbranchrevs(repo, remote, branches, revs=None)
1129 1129 localrevs = opts[b'rev']
1130 1130
1131 1131 fm = ui.formatter(b'debugdiscovery', opts)
1132 1132 if fm.strict_format:
1133 1133
1134 1134 @contextlib.contextmanager
1135 1135 def may_capture_output():
1136 1136 ui.pushbuffer()
1137 1137 yield
1138 1138 data[b'output'] = ui.popbuffer()
1139 1139
1140 1140 else:
1141 1141 may_capture_output = util.nullcontextmanager
1142 1142 with may_capture_output():
1143 1143 with util.timedcm('debug-discovery') as t:
1144 1144 common, hds = doit(localrevs, remoterevs)
1145 1145
1146 1146 # compute all statistics
1147 1147 heads_common = set(common)
1148 1148 heads_remote = set(hds)
1149 1149 heads_local = set(repo.heads())
1150 1150 # note: they cannot be a local or remote head that is in common and not
1151 1151 # itself a head of common.
1152 1152 heads_common_local = heads_common & heads_local
1153 1153 heads_common_remote = heads_common & heads_remote
1154 1154 heads_common_both = heads_common & heads_remote & heads_local
1155 1155
1156 1156 all = repo.revs(b'all()')
1157 1157 common = repo.revs(b'::%ln', common)
1158 1158 roots_common = repo.revs(b'roots(::%ld)', common)
1159 1159 missing = repo.revs(b'not ::%ld', common)
1160 1160 heads_missing = repo.revs(b'heads(%ld)', missing)
1161 1161 roots_missing = repo.revs(b'roots(%ld)', missing)
1162 1162 assert len(common) + len(missing) == len(all)
1163 1163
1164 1164 initial_undecided = repo.revs(
1165 1165 b'not (::%ln or %ln::)', heads_common_remote, heads_common_local
1166 1166 )
1167 1167 heads_initial_undecided = repo.revs(b'heads(%ld)', initial_undecided)
1168 1168 roots_initial_undecided = repo.revs(b'roots(%ld)', initial_undecided)
1169 1169 common_initial_undecided = initial_undecided & common
1170 1170 missing_initial_undecided = initial_undecided & missing
1171 1171
1172 1172 data[b'elapsed'] = t.elapsed
1173 1173 data[b'nb-common-heads'] = len(heads_common)
1174 1174 data[b'nb-common-heads-local'] = len(heads_common_local)
1175 1175 data[b'nb-common-heads-remote'] = len(heads_common_remote)
1176 1176 data[b'nb-common-heads-both'] = len(heads_common_both)
1177 1177 data[b'nb-common-roots'] = len(roots_common)
1178 1178 data[b'nb-head-local'] = len(heads_local)
1179 1179 data[b'nb-head-local-missing'] = len(heads_local) - len(heads_common_local)
1180 1180 data[b'nb-head-remote'] = len(heads_remote)
1181 1181 data[b'nb-head-remote-unknown'] = len(heads_remote) - len(
1182 1182 heads_common_remote
1183 1183 )
1184 1184 data[b'nb-revs'] = len(all)
1185 1185 data[b'nb-revs-common'] = len(common)
1186 1186 data[b'nb-revs-missing'] = len(missing)
1187 1187 data[b'nb-missing-heads'] = len(heads_missing)
1188 1188 data[b'nb-missing-roots'] = len(roots_missing)
1189 1189 data[b'nb-ini_und'] = len(initial_undecided)
1190 1190 data[b'nb-ini_und-heads'] = len(heads_initial_undecided)
1191 1191 data[b'nb-ini_und-roots'] = len(roots_initial_undecided)
1192 1192 data[b'nb-ini_und-common'] = len(common_initial_undecided)
1193 1193 data[b'nb-ini_und-missing'] = len(missing_initial_undecided)
1194 1194
1195 1195 fm.startitem()
1196 1196 fm.data(**pycompat.strkwargs(data))
1197 1197 # display discovery summary
1198 1198 fm.plain(b"elapsed time: %(elapsed)f seconds\n" % data)
1199 1199 fm.plain(b"round-trips: %(total-roundtrips)9d\n" % data)
1200 1200 fm.plain(b"heads summary:\n")
1201 1201 fm.plain(b" total common heads: %(nb-common-heads)9d\n" % data)
1202 1202 fm.plain(b" also local heads: %(nb-common-heads-local)9d\n" % data)
1203 1203 fm.plain(b" also remote heads: %(nb-common-heads-remote)9d\n" % data)
1204 1204 fm.plain(b" both: %(nb-common-heads-both)9d\n" % data)
1205 1205 fm.plain(b" local heads: %(nb-head-local)9d\n" % data)
1206 1206 fm.plain(b" common: %(nb-common-heads-local)9d\n" % data)
1207 1207 fm.plain(b" missing: %(nb-head-local-missing)9d\n" % data)
1208 1208 fm.plain(b" remote heads: %(nb-head-remote)9d\n" % data)
1209 1209 fm.plain(b" common: %(nb-common-heads-remote)9d\n" % data)
1210 1210 fm.plain(b" unknown: %(nb-head-remote-unknown)9d\n" % data)
1211 1211 fm.plain(b"local changesets: %(nb-revs)9d\n" % data)
1212 1212 fm.plain(b" common: %(nb-revs-common)9d\n" % data)
1213 1213 fm.plain(b" heads: %(nb-common-heads)9d\n" % data)
1214 1214 fm.plain(b" roots: %(nb-common-roots)9d\n" % data)
1215 1215 fm.plain(b" missing: %(nb-revs-missing)9d\n" % data)
1216 1216 fm.plain(b" heads: %(nb-missing-heads)9d\n" % data)
1217 1217 fm.plain(b" roots: %(nb-missing-roots)9d\n" % data)
1218 1218 fm.plain(b" first undecided set: %(nb-ini_und)9d\n" % data)
1219 1219 fm.plain(b" heads: %(nb-ini_und-heads)9d\n" % data)
1220 1220 fm.plain(b" roots: %(nb-ini_und-roots)9d\n" % data)
1221 1221 fm.plain(b" common: %(nb-ini_und-common)9d\n" % data)
1222 1222 fm.plain(b" missing: %(nb-ini_und-missing)9d\n" % data)
1223 1223
1224 1224 if ui.verbose:
1225 1225 fm.plain(
1226 1226 b"common heads: %s\n"
1227 1227 % b" ".join(sorted(short(n) for n in heads_common))
1228 1228 )
1229 1229 fm.end()
1230 1230
1231 1231
1232 1232 _chunksize = 4 << 10
1233 1233
1234 1234
1235 1235 @command(
1236 1236 b'debugdownload',
1237 1237 [
1238 1238 (b'o', b'output', b'', _(b'path')),
1239 1239 ],
1240 1240 optionalrepo=True,
1241 1241 )
1242 1242 def debugdownload(ui, repo, url, output=None, **opts):
1243 1243 """download a resource using Mercurial logic and config"""
1244 1244 fh = urlmod.open(ui, url, output)
1245 1245
1246 1246 dest = ui
1247 1247 if output:
1248 1248 dest = open(output, b"wb", _chunksize)
1249 1249 try:
1250 1250 data = fh.read(_chunksize)
1251 1251 while data:
1252 1252 dest.write(data)
1253 1253 data = fh.read(_chunksize)
1254 1254 finally:
1255 1255 if output:
1256 1256 dest.close()
1257 1257
1258 1258
1259 1259 @command(b'debugextensions', cmdutil.formatteropts, [], optionalrepo=True)
1260 1260 def debugextensions(ui, repo, **opts):
1261 1261 '''show information about active extensions'''
1262 1262 opts = pycompat.byteskwargs(opts)
1263 1263 exts = extensions.extensions(ui)
1264 1264 hgver = util.version()
1265 1265 fm = ui.formatter(b'debugextensions', opts)
1266 1266 for extname, extmod in sorted(exts, key=operator.itemgetter(0)):
1267 1267 isinternal = extensions.ismoduleinternal(extmod)
1268 1268 extsource = None
1269 1269
1270 1270 if util.safehasattr(extmod, '__file__'):
1271 1271 extsource = pycompat.fsencode(extmod.__file__)
1272 1272 elif getattr(sys, 'oxidized', False):
1273 1273 extsource = pycompat.sysexecutable
1274 1274 if isinternal:
1275 1275 exttestedwith = [] # never expose magic string to users
1276 1276 else:
1277 1277 exttestedwith = getattr(extmod, 'testedwith', b'').split()
1278 1278 extbuglink = getattr(extmod, 'buglink', None)
1279 1279
1280 1280 fm.startitem()
1281 1281
1282 1282 if ui.quiet or ui.verbose:
1283 1283 fm.write(b'name', b'%s\n', extname)
1284 1284 else:
1285 1285 fm.write(b'name', b'%s', extname)
1286 1286 if isinternal or hgver in exttestedwith:
1287 1287 fm.plain(b'\n')
1288 1288 elif not exttestedwith:
1289 1289 fm.plain(_(b' (untested!)\n'))
1290 1290 else:
1291 1291 lasttestedversion = exttestedwith[-1]
1292 1292 fm.plain(b' (%s!)\n' % lasttestedversion)
1293 1293
1294 1294 fm.condwrite(
1295 1295 ui.verbose and extsource,
1296 1296 b'source',
1297 1297 _(b' location: %s\n'),
1298 1298 extsource or b"",
1299 1299 )
1300 1300
1301 1301 if ui.verbose:
1302 1302 fm.plain(_(b' bundled: %s\n') % [b'no', b'yes'][isinternal])
1303 1303 fm.data(bundled=isinternal)
1304 1304
1305 1305 fm.condwrite(
1306 1306 ui.verbose and exttestedwith,
1307 1307 b'testedwith',
1308 1308 _(b' tested with: %s\n'),
1309 1309 fm.formatlist(exttestedwith, name=b'ver'),
1310 1310 )
1311 1311
1312 1312 fm.condwrite(
1313 1313 ui.verbose and extbuglink,
1314 1314 b'buglink',
1315 1315 _(b' bug reporting: %s\n'),
1316 1316 extbuglink or b"",
1317 1317 )
1318 1318
1319 1319 fm.end()
1320 1320
1321 1321
1322 1322 @command(
1323 1323 b'debugfileset',
1324 1324 [
1325 1325 (
1326 1326 b'r',
1327 1327 b'rev',
1328 1328 b'',
1329 1329 _(b'apply the filespec on this revision'),
1330 1330 _(b'REV'),
1331 1331 ),
1332 1332 (
1333 1333 b'',
1334 1334 b'all-files',
1335 1335 False,
1336 1336 _(b'test files from all revisions and working directory'),
1337 1337 ),
1338 1338 (
1339 1339 b's',
1340 1340 b'show-matcher',
1341 1341 None,
1342 1342 _(b'print internal representation of matcher'),
1343 1343 ),
1344 1344 (
1345 1345 b'p',
1346 1346 b'show-stage',
1347 1347 [],
1348 1348 _(b'print parsed tree at the given stage'),
1349 1349 _(b'NAME'),
1350 1350 ),
1351 1351 ],
1352 1352 _(b'[-r REV] [--all-files] [OPTION]... FILESPEC'),
1353 1353 )
1354 1354 def debugfileset(ui, repo, expr, **opts):
1355 1355 '''parse and apply a fileset specification'''
1356 1356 from . import fileset
1357 1357
1358 1358 fileset.symbols # force import of fileset so we have predicates to optimize
1359 1359 opts = pycompat.byteskwargs(opts)
1360 1360 ctx = scmutil.revsingle(repo, opts.get(b'rev'), None)
1361 1361
1362 1362 stages = [
1363 1363 (b'parsed', pycompat.identity),
1364 1364 (b'analyzed', filesetlang.analyze),
1365 1365 (b'optimized', filesetlang.optimize),
1366 1366 ]
1367 1367 stagenames = {n for n, f in stages}
1368 1368
1369 1369 showalways = set()
1370 1370 if ui.verbose and not opts[b'show_stage']:
1371 1371 # show parsed tree by --verbose (deprecated)
1372 1372 showalways.add(b'parsed')
1373 1373 if opts[b'show_stage'] == [b'all']:
1374 1374 showalways.update(stagenames)
1375 1375 else:
1376 1376 for n in opts[b'show_stage']:
1377 1377 if n not in stagenames:
1378 1378 raise error.Abort(_(b'invalid stage name: %s') % n)
1379 1379 showalways.update(opts[b'show_stage'])
1380 1380
1381 1381 tree = filesetlang.parse(expr)
1382 1382 for n, f in stages:
1383 1383 tree = f(tree)
1384 1384 if n in showalways:
1385 1385 if opts[b'show_stage'] or n != b'parsed':
1386 1386 ui.write(b"* %s:\n" % n)
1387 1387 ui.write(filesetlang.prettyformat(tree), b"\n")
1388 1388
1389 1389 files = set()
1390 1390 if opts[b'all_files']:
1391 1391 for r in repo:
1392 1392 c = repo[r]
1393 1393 files.update(c.files())
1394 1394 files.update(c.substate)
1395 1395 if opts[b'all_files'] or ctx.rev() is None:
1396 1396 wctx = repo[None]
1397 1397 files.update(
1398 1398 repo.dirstate.walk(
1399 1399 scmutil.matchall(repo),
1400 1400 subrepos=list(wctx.substate),
1401 1401 unknown=True,
1402 1402 ignored=True,
1403 1403 )
1404 1404 )
1405 1405 files.update(wctx.substate)
1406 1406 else:
1407 1407 files.update(ctx.files())
1408 1408 files.update(ctx.substate)
1409 1409
1410 1410 m = ctx.matchfileset(repo.getcwd(), expr)
1411 1411 if opts[b'show_matcher'] or (opts[b'show_matcher'] is None and ui.verbose):
1412 1412 ui.writenoi18n(b'* matcher:\n', stringutil.prettyrepr(m), b'\n')
1413 1413 for f in sorted(files):
1414 1414 if not m(f):
1415 1415 continue
1416 1416 ui.write(b"%s\n" % f)
1417 1417
1418 1418
1419 1419 @command(b'debugformat', [] + cmdutil.formatteropts)
1420 1420 def debugformat(ui, repo, **opts):
1421 1421 """display format information about the current repository
1422 1422
1423 1423 Use --verbose to get extra information about current config value and
1424 1424 Mercurial default."""
1425 1425 opts = pycompat.byteskwargs(opts)
1426 1426 maxvariantlength = max(len(fv.name) for fv in upgrade.allformatvariant)
1427 1427 maxvariantlength = max(len(b'format-variant'), maxvariantlength)
1428 1428
1429 1429 def makeformatname(name):
1430 1430 return b'%s:' + (b' ' * (maxvariantlength - len(name)))
1431 1431
1432 1432 fm = ui.formatter(b'debugformat', opts)
1433 1433 if fm.isplain():
1434 1434
1435 1435 def formatvalue(value):
1436 1436 if util.safehasattr(value, b'startswith'):
1437 1437 return value
1438 1438 if value:
1439 1439 return b'yes'
1440 1440 else:
1441 1441 return b'no'
1442 1442
1443 1443 else:
1444 1444 formatvalue = pycompat.identity
1445 1445
1446 1446 fm.plain(b'format-variant')
1447 1447 fm.plain(b' ' * (maxvariantlength - len(b'format-variant')))
1448 1448 fm.plain(b' repo')
1449 1449 if ui.verbose:
1450 1450 fm.plain(b' config default')
1451 1451 fm.plain(b'\n')
1452 1452 for fv in upgrade.allformatvariant:
1453 1453 fm.startitem()
1454 1454 repovalue = fv.fromrepo(repo)
1455 1455 configvalue = fv.fromconfig(repo)
1456 1456
1457 1457 if repovalue != configvalue:
1458 1458 namelabel = b'formatvariant.name.mismatchconfig'
1459 1459 repolabel = b'formatvariant.repo.mismatchconfig'
1460 1460 elif repovalue != fv.default:
1461 1461 namelabel = b'formatvariant.name.mismatchdefault'
1462 1462 repolabel = b'formatvariant.repo.mismatchdefault'
1463 1463 else:
1464 1464 namelabel = b'formatvariant.name.uptodate'
1465 1465 repolabel = b'formatvariant.repo.uptodate'
1466 1466
1467 1467 fm.write(b'name', makeformatname(fv.name), fv.name, label=namelabel)
1468 1468 fm.write(b'repo', b' %3s', formatvalue(repovalue), label=repolabel)
1469 1469 if fv.default != configvalue:
1470 1470 configlabel = b'formatvariant.config.special'
1471 1471 else:
1472 1472 configlabel = b'formatvariant.config.default'
1473 1473 fm.condwrite(
1474 1474 ui.verbose,
1475 1475 b'config',
1476 1476 b' %6s',
1477 1477 formatvalue(configvalue),
1478 1478 label=configlabel,
1479 1479 )
1480 1480 fm.condwrite(
1481 1481 ui.verbose,
1482 1482 b'default',
1483 1483 b' %7s',
1484 1484 formatvalue(fv.default),
1485 1485 label=b'formatvariant.default',
1486 1486 )
1487 1487 fm.plain(b'\n')
1488 1488 fm.end()
1489 1489
1490 1490
1491 1491 @command(b'debugfsinfo', [], _(b'[PATH]'), norepo=True)
1492 1492 def debugfsinfo(ui, path=b"."):
1493 1493 """show information detected about current filesystem"""
1494 1494 ui.writenoi18n(b'path: %s\n' % path)
1495 1495 ui.writenoi18n(
1496 1496 b'mounted on: %s\n' % (util.getfsmountpoint(path) or b'(unknown)')
1497 1497 )
1498 1498 ui.writenoi18n(b'exec: %s\n' % (util.checkexec(path) and b'yes' or b'no'))
1499 1499 ui.writenoi18n(b'fstype: %s\n' % (util.getfstype(path) or b'(unknown)'))
1500 1500 ui.writenoi18n(
1501 1501 b'symlink: %s\n' % (util.checklink(path) and b'yes' or b'no')
1502 1502 )
1503 1503 ui.writenoi18n(
1504 1504 b'hardlink: %s\n' % (util.checknlink(path) and b'yes' or b'no')
1505 1505 )
1506 1506 casesensitive = b'(unknown)'
1507 1507 try:
1508 1508 with pycompat.namedtempfile(prefix=b'.debugfsinfo', dir=path) as f:
1509 1509 casesensitive = util.fscasesensitive(f.name) and b'yes' or b'no'
1510 1510 except OSError:
1511 1511 pass
1512 1512 ui.writenoi18n(b'case-sensitive: %s\n' % casesensitive)
1513 1513
1514 1514
1515 1515 @command(
1516 1516 b'debuggetbundle',
1517 1517 [
1518 1518 (b'H', b'head', [], _(b'id of head node'), _(b'ID')),
1519 1519 (b'C', b'common', [], _(b'id of common node'), _(b'ID')),
1520 1520 (
1521 1521 b't',
1522 1522 b'type',
1523 1523 b'bzip2',
1524 1524 _(b'bundle compression type to use'),
1525 1525 _(b'TYPE'),
1526 1526 ),
1527 1527 ],
1528 1528 _(b'REPO FILE [-H|-C ID]...'),
1529 1529 norepo=True,
1530 1530 )
1531 1531 def debuggetbundle(ui, repopath, bundlepath, head=None, common=None, **opts):
1532 1532 """retrieves a bundle from a repo
1533 1533
1534 1534 Every ID must be a full-length hex node id string. Saves the bundle to the
1535 1535 given file.
1536 1536 """
1537 1537 opts = pycompat.byteskwargs(opts)
1538 1538 repo = hg.peer(ui, opts, repopath)
1539 1539 if not repo.capable(b'getbundle'):
1540 1540 raise error.Abort(b"getbundle() not supported by target repository")
1541 1541 args = {}
1542 1542 if common:
1543 1543 args['common'] = [bin(s) for s in common]
1544 1544 if head:
1545 1545 args['heads'] = [bin(s) for s in head]
1546 1546 # TODO: get desired bundlecaps from command line.
1547 1547 args['bundlecaps'] = None
1548 1548 bundle = repo.getbundle(b'debug', **args)
1549 1549
1550 1550 bundletype = opts.get(b'type', b'bzip2').lower()
1551 1551 btypes = {
1552 1552 b'none': b'HG10UN',
1553 1553 b'bzip2': b'HG10BZ',
1554 1554 b'gzip': b'HG10GZ',
1555 1555 b'bundle2': b'HG20',
1556 1556 }
1557 1557 bundletype = btypes.get(bundletype)
1558 1558 if bundletype not in bundle2.bundletypes:
1559 1559 raise error.Abort(_(b'unknown bundle type specified with --type'))
1560 1560 bundle2.writebundle(ui, bundle, bundlepath, bundletype)
1561 1561
1562 1562
1563 1563 @command(b'debugignore', [], b'[FILE]')
1564 1564 def debugignore(ui, repo, *files, **opts):
1565 1565 """display the combined ignore pattern and information about ignored files
1566 1566
1567 1567 With no argument display the combined ignore pattern.
1568 1568
1569 1569 Given space separated file names, shows if the given file is ignored and
1570 1570 if so, show the ignore rule (file and line number) that matched it.
1571 1571 """
1572 1572 ignore = repo.dirstate._ignore
1573 1573 if not files:
1574 1574 # Show all the patterns
1575 1575 ui.write(b"%s\n" % pycompat.byterepr(ignore))
1576 1576 else:
1577 1577 m = scmutil.match(repo[None], pats=files)
1578 1578 uipathfn = scmutil.getuipathfn(repo, legacyrelativevalue=True)
1579 1579 for f in m.files():
1580 1580 nf = util.normpath(f)
1581 1581 ignored = None
1582 1582 ignoredata = None
1583 1583 if nf != b'.':
1584 1584 if ignore(nf):
1585 1585 ignored = nf
1586 1586 ignoredata = repo.dirstate._ignorefileandline(nf)
1587 1587 else:
1588 1588 for p in pathutil.finddirs(nf):
1589 1589 if ignore(p):
1590 1590 ignored = p
1591 1591 ignoredata = repo.dirstate._ignorefileandline(p)
1592 1592 break
1593 1593 if ignored:
1594 1594 if ignored == nf:
1595 1595 ui.write(_(b"%s is ignored\n") % uipathfn(f))
1596 1596 else:
1597 1597 ui.write(
1598 1598 _(
1599 1599 b"%s is ignored because of "
1600 1600 b"containing directory %s\n"
1601 1601 )
1602 1602 % (uipathfn(f), ignored)
1603 1603 )
1604 1604 ignorefile, lineno, line = ignoredata
1605 1605 ui.write(
1606 1606 _(b"(ignore rule in %s, line %d: '%s')\n")
1607 1607 % (ignorefile, lineno, line)
1608 1608 )
1609 1609 else:
1610 1610 ui.write(_(b"%s is not ignored\n") % uipathfn(f))
1611 1611
1612 1612
1613 1613 @command(
1614 1614 b'debugindex',
1615 1615 cmdutil.debugrevlogopts + cmdutil.formatteropts,
1616 1616 _(b'-c|-m|FILE'),
1617 1617 )
1618 1618 def debugindex(ui, repo, file_=None, **opts):
1619 1619 """dump index data for a storage primitive"""
1620 1620 opts = pycompat.byteskwargs(opts)
1621 1621 store = cmdutil.openstorage(repo, b'debugindex', file_, opts)
1622 1622
1623 1623 if ui.debugflag:
1624 1624 shortfn = hex
1625 1625 else:
1626 1626 shortfn = short
1627 1627
1628 1628 idlen = 12
1629 1629 for i in store:
1630 1630 idlen = len(shortfn(store.node(i)))
1631 1631 break
1632 1632
1633 1633 fm = ui.formatter(b'debugindex', opts)
1634 1634 fm.plain(
1635 1635 b' rev linkrev %s %s p2\n'
1636 1636 % (b'nodeid'.ljust(idlen), b'p1'.ljust(idlen))
1637 1637 )
1638 1638
1639 1639 for rev in store:
1640 1640 node = store.node(rev)
1641 1641 parents = store.parents(node)
1642 1642
1643 1643 fm.startitem()
1644 1644 fm.write(b'rev', b'%6d ', rev)
1645 1645 fm.write(b'linkrev', b'%7d ', store.linkrev(rev))
1646 1646 fm.write(b'node', b'%s ', shortfn(node))
1647 1647 fm.write(b'p1', b'%s ', shortfn(parents[0]))
1648 1648 fm.write(b'p2', b'%s', shortfn(parents[1]))
1649 1649 fm.plain(b'\n')
1650 1650
1651 1651 fm.end()
1652 1652
1653 1653
1654 1654 @command(
1655 1655 b'debugindexdot',
1656 1656 cmdutil.debugrevlogopts,
1657 1657 _(b'-c|-m|FILE'),
1658 1658 optionalrepo=True,
1659 1659 )
1660 1660 def debugindexdot(ui, repo, file_=None, **opts):
1661 1661 """dump an index DAG as a graphviz dot file"""
1662 1662 opts = pycompat.byteskwargs(opts)
1663 1663 r = cmdutil.openstorage(repo, b'debugindexdot', file_, opts)
1664 1664 ui.writenoi18n(b"digraph G {\n")
1665 1665 for i in r:
1666 1666 node = r.node(i)
1667 1667 pp = r.parents(node)
1668 1668 ui.write(b"\t%d -> %d\n" % (r.rev(pp[0]), i))
1669 1669 if pp[1] != repo.nullid:
1670 1670 ui.write(b"\t%d -> %d\n" % (r.rev(pp[1]), i))
1671 1671 ui.write(b"}\n")
1672 1672
1673 1673
1674 1674 @command(b'debugindexstats', [])
1675 1675 def debugindexstats(ui, repo):
1676 1676 """show stats related to the changelog index"""
1677 1677 repo.changelog.shortest(repo.nullid, 1)
1678 1678 index = repo.changelog.index
1679 1679 if not util.safehasattr(index, b'stats'):
1680 1680 raise error.Abort(_(b'debugindexstats only works with native code'))
1681 1681 for k, v in sorted(index.stats().items()):
1682 1682 ui.write(b'%s: %d\n' % (k, v))
1683 1683
1684 1684
1685 1685 @command(b'debuginstall', [] + cmdutil.formatteropts, b'', norepo=True)
1686 1686 def debuginstall(ui, **opts):
1687 1687 """test Mercurial installation
1688 1688
1689 1689 Returns 0 on success.
1690 1690 """
1691 1691 opts = pycompat.byteskwargs(opts)
1692 1692
1693 1693 problems = 0
1694 1694
1695 1695 fm = ui.formatter(b'debuginstall', opts)
1696 1696 fm.startitem()
1697 1697
1698 1698 # encoding might be unknown or wrong. don't translate these messages.
1699 1699 fm.write(b'encoding', b"checking encoding (%s)...\n", encoding.encoding)
1700 1700 err = None
1701 1701 try:
1702 1702 codecs.lookup(pycompat.sysstr(encoding.encoding))
1703 1703 except LookupError as inst:
1704 1704 err = stringutil.forcebytestr(inst)
1705 1705 problems += 1
1706 1706 fm.condwrite(
1707 1707 err,
1708 1708 b'encodingerror',
1709 1709 b" %s\n (check that your locale is properly set)\n",
1710 1710 err,
1711 1711 )
1712 1712
1713 1713 # Python
1714 1714 pythonlib = None
1715 1715 if util.safehasattr(os, '__file__'):
1716 1716 pythonlib = os.path.dirname(pycompat.fsencode(os.__file__))
1717 1717 elif getattr(sys, 'oxidized', False):
1718 1718 pythonlib = pycompat.sysexecutable
1719 1719
1720 1720 fm.write(
1721 1721 b'pythonexe',
1722 1722 _(b"checking Python executable (%s)\n"),
1723 1723 pycompat.sysexecutable or _(b"unknown"),
1724 1724 )
1725 1725 fm.write(
1726 1726 b'pythonimplementation',
1727 1727 _(b"checking Python implementation (%s)\n"),
1728 1728 pycompat.sysbytes(platform.python_implementation()),
1729 1729 )
1730 1730 fm.write(
1731 1731 b'pythonver',
1732 1732 _(b"checking Python version (%s)\n"),
1733 1733 (b"%d.%d.%d" % sys.version_info[:3]),
1734 1734 )
1735 1735 fm.write(
1736 1736 b'pythonlib',
1737 1737 _(b"checking Python lib (%s)...\n"),
1738 1738 pythonlib or _(b"unknown"),
1739 1739 )
1740 1740
1741 1741 try:
1742 1742 from . import rustext # pytype: disable=import-error
1743 1743
1744 1744 rustext.__doc__ # trigger lazy import
1745 1745 except ImportError:
1746 1746 rustext = None
1747 1747
1748 1748 security = set(sslutil.supportedprotocols)
1749 1749 if sslutil.hassni:
1750 1750 security.add(b'sni')
1751 1751
1752 1752 fm.write(
1753 1753 b'pythonsecurity',
1754 1754 _(b"checking Python security support (%s)\n"),
1755 1755 fm.formatlist(sorted(security), name=b'protocol', fmt=b'%s', sep=b','),
1756 1756 )
1757 1757
1758 1758 # These are warnings, not errors. So don't increment problem count. This
1759 1759 # may change in the future.
1760 1760 if b'tls1.2' not in security:
1761 1761 fm.plain(
1762 1762 _(
1763 1763 b' TLS 1.2 not supported by Python install; '
1764 1764 b'network connections lack modern security\n'
1765 1765 )
1766 1766 )
1767 1767 if b'sni' not in security:
1768 1768 fm.plain(
1769 1769 _(
1770 1770 b' SNI not supported by Python install; may have '
1771 1771 b'connectivity issues with some servers\n'
1772 1772 )
1773 1773 )
1774 1774
1775 1775 fm.plain(
1776 1776 _(
1777 1777 b"checking Rust extensions (%s)\n"
1778 1778 % (b'missing' if rustext is None else b'installed')
1779 1779 ),
1780 1780 )
1781 1781
1782 1782 # TODO print CA cert info
1783 1783
1784 1784 # hg version
1785 1785 hgver = util.version()
1786 1786 fm.write(
1787 1787 b'hgver', _(b"checking Mercurial version (%s)\n"), hgver.split(b'+')[0]
1788 1788 )
1789 1789 fm.write(
1790 1790 b'hgverextra',
1791 1791 _(b"checking Mercurial custom build (%s)\n"),
1792 1792 b'+'.join(hgver.split(b'+')[1:]),
1793 1793 )
1794 1794
1795 1795 # compiled modules
1796 1796 hgmodules = None
1797 1797 if util.safehasattr(sys.modules[__name__], '__file__'):
1798 1798 hgmodules = os.path.dirname(pycompat.fsencode(__file__))
1799 1799 elif getattr(sys, 'oxidized', False):
1800 1800 hgmodules = pycompat.sysexecutable
1801 1801
1802 1802 fm.write(
1803 1803 b'hgmodulepolicy', _(b"checking module policy (%s)\n"), policy.policy
1804 1804 )
1805 1805 fm.write(
1806 1806 b'hgmodules',
1807 1807 _(b"checking installed modules (%s)...\n"),
1808 1808 hgmodules or _(b"unknown"),
1809 1809 )
1810 1810
1811 1811 rustandc = policy.policy in (b'rust+c', b'rust+c-allow')
1812 1812 rustext = rustandc # for now, that's the only case
1813 1813 cext = policy.policy in (b'c', b'allow') or rustandc
1814 1814 nopure = cext or rustext
1815 1815 if nopure:
1816 1816 err = None
1817 1817 try:
1818 1818 if cext:
1819 1819 from .cext import ( # pytype: disable=import-error
1820 1820 base85,
1821 1821 bdiff,
1822 1822 mpatch,
1823 1823 osutil,
1824 1824 )
1825 1825
1826 1826 # quiet pyflakes
1827 1827 dir(bdiff), dir(mpatch), dir(base85), dir(osutil)
1828 1828 if rustext:
1829 1829 from .rustext import ( # pytype: disable=import-error
1830 1830 ancestor,
1831 1831 dirstate,
1832 1832 )
1833 1833
1834 1834 dir(ancestor), dir(dirstate) # quiet pyflakes
1835 1835 except Exception as inst:
1836 1836 err = stringutil.forcebytestr(inst)
1837 1837 problems += 1
1838 1838 fm.condwrite(err, b'extensionserror', b" %s\n", err)
1839 1839
1840 1840 compengines = util.compengines._engines.values()
1841 1841 fm.write(
1842 1842 b'compengines',
1843 1843 _(b'checking registered compression engines (%s)\n'),
1844 1844 fm.formatlist(
1845 1845 sorted(e.name() for e in compengines),
1846 1846 name=b'compengine',
1847 1847 fmt=b'%s',
1848 1848 sep=b', ',
1849 1849 ),
1850 1850 )
1851 1851 fm.write(
1852 1852 b'compenginesavail',
1853 1853 _(b'checking available compression engines (%s)\n'),
1854 1854 fm.formatlist(
1855 1855 sorted(e.name() for e in compengines if e.available()),
1856 1856 name=b'compengine',
1857 1857 fmt=b'%s',
1858 1858 sep=b', ',
1859 1859 ),
1860 1860 )
1861 1861 wirecompengines = compression.compengines.supportedwireengines(
1862 1862 compression.SERVERROLE
1863 1863 )
1864 1864 fm.write(
1865 1865 b'compenginesserver',
1866 1866 _(
1867 1867 b'checking available compression engines '
1868 1868 b'for wire protocol (%s)\n'
1869 1869 ),
1870 1870 fm.formatlist(
1871 1871 [e.name() for e in wirecompengines if e.wireprotosupport()],
1872 1872 name=b'compengine',
1873 1873 fmt=b'%s',
1874 1874 sep=b', ',
1875 1875 ),
1876 1876 )
1877 1877 re2 = b'missing'
1878 1878 if util._re2:
1879 1879 re2 = b'available'
1880 1880 fm.plain(_(b'checking "re2" regexp engine (%s)\n') % re2)
1881 1881 fm.data(re2=bool(util._re2))
1882 1882
1883 1883 # templates
1884 1884 p = templater.templatedir()
1885 1885 fm.write(b'templatedirs', b'checking templates (%s)...\n', p or b'')
1886 1886 fm.condwrite(not p, b'', _(b" no template directories found\n"))
1887 1887 if p:
1888 1888 (m, fp) = templater.try_open_template(b"map-cmdline.default")
1889 1889 if m:
1890 1890 # template found, check if it is working
1891 1891 err = None
1892 1892 try:
1893 1893 templater.templater.frommapfile(m)
1894 1894 except Exception as inst:
1895 1895 err = stringutil.forcebytestr(inst)
1896 1896 p = None
1897 1897 fm.condwrite(err, b'defaulttemplateerror', b" %s\n", err)
1898 1898 else:
1899 1899 p = None
1900 1900 fm.condwrite(
1901 1901 p, b'defaulttemplate', _(b"checking default template (%s)\n"), m
1902 1902 )
1903 1903 fm.condwrite(
1904 1904 not m,
1905 1905 b'defaulttemplatenotfound',
1906 1906 _(b" template '%s' not found\n"),
1907 1907 b"default",
1908 1908 )
1909 1909 if not p:
1910 1910 problems += 1
1911 1911 fm.condwrite(
1912 1912 not p, b'', _(b" (templates seem to have been installed incorrectly)\n")
1913 1913 )
1914 1914
1915 1915 # editor
1916 1916 editor = ui.geteditor()
1917 1917 editor = util.expandpath(editor)
1918 1918 editorbin = procutil.shellsplit(editor)[0]
1919 1919 fm.write(b'editor', _(b"checking commit editor... (%s)\n"), editorbin)
1920 1920 cmdpath = procutil.findexe(editorbin)
1921 1921 fm.condwrite(
1922 1922 not cmdpath and editor == b'vi',
1923 1923 b'vinotfound',
1924 1924 _(
1925 1925 b" No commit editor set and can't find %s in PATH\n"
1926 1926 b" (specify a commit editor in your configuration"
1927 1927 b" file)\n"
1928 1928 ),
1929 1929 not cmdpath and editor == b'vi' and editorbin,
1930 1930 )
1931 1931 fm.condwrite(
1932 1932 not cmdpath and editor != b'vi',
1933 1933 b'editornotfound',
1934 1934 _(
1935 1935 b" Can't find editor '%s' in PATH\n"
1936 1936 b" (specify a commit editor in your configuration"
1937 1937 b" file)\n"
1938 1938 ),
1939 1939 not cmdpath and editorbin,
1940 1940 )
1941 1941 if not cmdpath and editor != b'vi':
1942 1942 problems += 1
1943 1943
1944 1944 # check username
1945 1945 username = None
1946 1946 err = None
1947 1947 try:
1948 1948 username = ui.username()
1949 1949 except error.Abort as e:
1950 1950 err = e.message
1951 1951 problems += 1
1952 1952
1953 1953 fm.condwrite(
1954 1954 username, b'username', _(b"checking username (%s)\n"), username
1955 1955 )
1956 1956 fm.condwrite(
1957 1957 err,
1958 1958 b'usernameerror',
1959 1959 _(
1960 1960 b"checking username...\n %s\n"
1961 1961 b" (specify a username in your configuration file)\n"
1962 1962 ),
1963 1963 err,
1964 1964 )
1965 1965
1966 1966 for name, mod in extensions.extensions():
1967 1967 handler = getattr(mod, 'debuginstall', None)
1968 1968 if handler is not None:
1969 1969 problems += handler(ui, fm)
1970 1970
1971 1971 fm.condwrite(not problems, b'', _(b"no problems detected\n"))
1972 1972 if not problems:
1973 1973 fm.data(problems=problems)
1974 1974 fm.condwrite(
1975 1975 problems,
1976 1976 b'problems',
1977 1977 _(b"%d problems detected, please check your install!\n"),
1978 1978 problems,
1979 1979 )
1980 1980 fm.end()
1981 1981
1982 1982 return problems
1983 1983
1984 1984
1985 1985 @command(b'debugknown', [], _(b'REPO ID...'), norepo=True)
1986 1986 def debugknown(ui, repopath, *ids, **opts):
1987 1987 """test whether node ids are known to a repo
1988 1988
1989 1989 Every ID must be a full-length hex node id string. Returns a list of 0s
1990 1990 and 1s indicating unknown/known.
1991 1991 """
1992 1992 opts = pycompat.byteskwargs(opts)
1993 1993 repo = hg.peer(ui, opts, repopath)
1994 1994 if not repo.capable(b'known'):
1995 1995 raise error.Abort(b"known() not supported by target repository")
1996 1996 flags = repo.known([bin(s) for s in ids])
1997 1997 ui.write(b"%s\n" % (b"".join([f and b"1" or b"0" for f in flags])))
1998 1998
1999 1999
2000 2000 @command(b'debuglabelcomplete', [], _(b'LABEL...'))
2001 2001 def debuglabelcomplete(ui, repo, *args):
2002 2002 '''backwards compatibility with old bash completion scripts (DEPRECATED)'''
2003 2003 debugnamecomplete(ui, repo, *args)
2004 2004
2005 2005
2006 2006 @command(
2007 2007 b'debuglocks',
2008 2008 [
2009 2009 (b'L', b'force-free-lock', None, _(b'free the store lock (DANGEROUS)')),
2010 2010 (
2011 2011 b'W',
2012 2012 b'force-free-wlock',
2013 2013 None,
2014 2014 _(b'free the working state lock (DANGEROUS)'),
2015 2015 ),
2016 2016 (b's', b'set-lock', None, _(b'set the store lock until stopped')),
2017 2017 (
2018 2018 b'S',
2019 2019 b'set-wlock',
2020 2020 None,
2021 2021 _(b'set the working state lock until stopped'),
2022 2022 ),
2023 2023 ],
2024 2024 _(b'[OPTION]...'),
2025 2025 )
2026 2026 def debuglocks(ui, repo, **opts):
2027 2027 """show or modify state of locks
2028 2028
2029 2029 By default, this command will show which locks are held. This
2030 2030 includes the user and process holding the lock, the amount of time
2031 2031 the lock has been held, and the machine name where the process is
2032 2032 running if it's not local.
2033 2033
2034 2034 Locks protect the integrity of Mercurial's data, so should be
2035 2035 treated with care. System crashes or other interruptions may cause
2036 2036 locks to not be properly released, though Mercurial will usually
2037 2037 detect and remove such stale locks automatically.
2038 2038
2039 2039 However, detecting stale locks may not always be possible (for
2040 2040 instance, on a shared filesystem). Removing locks may also be
2041 2041 blocked by filesystem permissions.
2042 2042
2043 2043 Setting a lock will prevent other commands from changing the data.
2044 2044 The command will wait until an interruption (SIGINT, SIGTERM, ...) occurs.
2045 2045 The set locks are removed when the command exits.
2046 2046
2047 2047 Returns 0 if no locks are held.
2048 2048
2049 2049 """
2050 2050
2051 2051 if opts.get('force_free_lock'):
2052 2052 repo.svfs.unlink(b'lock')
2053 2053 if opts.get('force_free_wlock'):
2054 2054 repo.vfs.unlink(b'wlock')
2055 2055 if opts.get('force_free_lock') or opts.get('force_free_wlock'):
2056 2056 return 0
2057 2057
2058 2058 locks = []
2059 2059 try:
2060 2060 if opts.get('set_wlock'):
2061 2061 try:
2062 2062 locks.append(repo.wlock(False))
2063 2063 except error.LockHeld:
2064 2064 raise error.Abort(_(b'wlock is already held'))
2065 2065 if opts.get('set_lock'):
2066 2066 try:
2067 2067 locks.append(repo.lock(False))
2068 2068 except error.LockHeld:
2069 2069 raise error.Abort(_(b'lock is already held'))
2070 2070 if len(locks):
2071 2071 ui.promptchoice(_(b"ready to release the lock (y)? $$ &Yes"))
2072 2072 return 0
2073 2073 finally:
2074 2074 release(*locks)
2075 2075
2076 2076 now = time.time()
2077 2077 held = 0
2078 2078
2079 2079 def report(vfs, name, method):
2080 2080 # this causes stale locks to get reaped for more accurate reporting
2081 2081 try:
2082 2082 l = method(False)
2083 2083 except error.LockHeld:
2084 2084 l = None
2085 2085
2086 2086 if l:
2087 2087 l.release()
2088 2088 else:
2089 2089 try:
2090 2090 st = vfs.lstat(name)
2091 2091 age = now - st[stat.ST_MTIME]
2092 2092 user = util.username(st.st_uid)
2093 2093 locker = vfs.readlock(name)
2094 2094 if b":" in locker:
2095 2095 host, pid = locker.split(b':')
2096 2096 if host == socket.gethostname():
2097 2097 locker = b'user %s, process %s' % (user or b'None', pid)
2098 2098 else:
2099 2099 locker = b'user %s, process %s, host %s' % (
2100 2100 user or b'None',
2101 2101 pid,
2102 2102 host,
2103 2103 )
2104 2104 ui.writenoi18n(b"%-6s %s (%ds)\n" % (name + b":", locker, age))
2105 2105 return 1
2106 2106 except OSError as e:
2107 2107 if e.errno != errno.ENOENT:
2108 2108 raise
2109 2109
2110 2110 ui.writenoi18n(b"%-6s free\n" % (name + b":"))
2111 2111 return 0
2112 2112
2113 2113 held += report(repo.svfs, b"lock", repo.lock)
2114 2114 held += report(repo.vfs, b"wlock", repo.wlock)
2115 2115
2116 2116 return held
2117 2117
2118 2118
2119 2119 @command(
2120 2120 b'debugmanifestfulltextcache',
2121 2121 [
2122 2122 (b'', b'clear', False, _(b'clear the cache')),
2123 2123 (
2124 2124 b'a',
2125 2125 b'add',
2126 2126 [],
2127 2127 _(b'add the given manifest nodes to the cache'),
2128 2128 _(b'NODE'),
2129 2129 ),
2130 2130 ],
2131 2131 b'',
2132 2132 )
2133 2133 def debugmanifestfulltextcache(ui, repo, add=(), **opts):
2134 2134 """show, clear or amend the contents of the manifest fulltext cache"""
2135 2135
2136 2136 def getcache():
2137 2137 r = repo.manifestlog.getstorage(b'')
2138 2138 try:
2139 2139 return r._fulltextcache
2140 2140 except AttributeError:
2141 2141 msg = _(
2142 2142 b"Current revlog implementation doesn't appear to have a "
2143 2143 b"manifest fulltext cache\n"
2144 2144 )
2145 2145 raise error.Abort(msg)
2146 2146
2147 2147 if opts.get('clear'):
2148 2148 with repo.wlock():
2149 2149 cache = getcache()
2150 2150 cache.clear(clear_persisted_data=True)
2151 2151 return
2152 2152
2153 2153 if add:
2154 2154 with repo.wlock():
2155 2155 m = repo.manifestlog
2156 2156 store = m.getstorage(b'')
2157 2157 for n in add:
2158 2158 try:
2159 2159 manifest = m[store.lookup(n)]
2160 2160 except error.LookupError as e:
2161 2161 raise error.Abort(
2162 2162 bytes(e), hint=b"Check your manifest node id"
2163 2163 )
2164 2164 manifest.read() # stores revisision in cache too
2165 2165 return
2166 2166
2167 2167 cache = getcache()
2168 2168 if not len(cache):
2169 2169 ui.write(_(b'cache empty\n'))
2170 2170 else:
2171 2171 ui.write(
2172 2172 _(
2173 2173 b'cache contains %d manifest entries, in order of most to '
2174 2174 b'least recent:\n'
2175 2175 )
2176 2176 % (len(cache),)
2177 2177 )
2178 2178 totalsize = 0
2179 2179 for nodeid in cache:
2180 2180 # Use cache.get to not update the LRU order
2181 2181 data = cache.peek(nodeid)
2182 2182 size = len(data)
2183 2183 totalsize += size + 24 # 20 bytes nodeid, 4 bytes size
2184 2184 ui.write(
2185 2185 _(b'id: %s, size %s\n') % (hex(nodeid), util.bytecount(size))
2186 2186 )
2187 2187 ondisk = cache._opener.stat(b'manifestfulltextcache').st_size
2188 2188 ui.write(
2189 2189 _(b'total cache data size %s, on-disk %s\n')
2190 2190 % (util.bytecount(totalsize), util.bytecount(ondisk))
2191 2191 )
2192 2192
2193 2193
2194 2194 @command(b'debugmergestate', [] + cmdutil.templateopts, b'')
2195 2195 def debugmergestate(ui, repo, *args, **opts):
2196 2196 """print merge state
2197 2197
2198 2198 Use --verbose to print out information about whether v1 or v2 merge state
2199 2199 was chosen."""
2200 2200
2201 2201 if ui.verbose:
2202 2202 ms = mergestatemod.mergestate(repo)
2203 2203
2204 2204 # sort so that reasonable information is on top
2205 2205 v1records = ms._readrecordsv1()
2206 2206 v2records = ms._readrecordsv2()
2207 2207
2208 2208 if not v1records and not v2records:
2209 2209 pass
2210 2210 elif not v2records:
2211 2211 ui.writenoi18n(b'no version 2 merge state\n')
2212 2212 elif ms._v1v2match(v1records, v2records):
2213 2213 ui.writenoi18n(b'v1 and v2 states match: using v2\n')
2214 2214 else:
2215 2215 ui.writenoi18n(b'v1 and v2 states mismatch: using v1\n')
2216 2216
2217 2217 opts = pycompat.byteskwargs(opts)
2218 2218 if not opts[b'template']:
2219 2219 opts[b'template'] = (
2220 2220 b'{if(commits, "", "no merge state found\n")}'
2221 2221 b'{commits % "{name}{if(label, " ({label})")}: {node}\n"}'
2222 2222 b'{files % "file: {path} (state \\"{state}\\")\n'
2223 2223 b'{if(local_path, "'
2224 2224 b' local path: {local_path} (hash {local_key}, flags \\"{local_flags}\\")\n'
2225 2225 b' ancestor path: {ancestor_path} (node {ancestor_node})\n'
2226 2226 b' other path: {other_path} (node {other_node})\n'
2227 2227 b'")}'
2228 2228 b'{if(rename_side, "'
2229 2229 b' rename side: {rename_side}\n'
2230 2230 b' renamed path: {renamed_path}\n'
2231 2231 b'")}'
2232 2232 b'{extras % " extra: {key} = {value}\n"}'
2233 2233 b'"}'
2234 2234 b'{extras % "extra: {file} ({key} = {value})\n"}'
2235 2235 )
2236 2236
2237 2237 ms = mergestatemod.mergestate.read(repo)
2238 2238
2239 2239 fm = ui.formatter(b'debugmergestate', opts)
2240 2240 fm.startitem()
2241 2241
2242 2242 fm_commits = fm.nested(b'commits')
2243 2243 if ms.active():
2244 2244 for name, node, label_index in (
2245 2245 (b'local', ms.local, 0),
2246 2246 (b'other', ms.other, 1),
2247 2247 ):
2248 2248 fm_commits.startitem()
2249 2249 fm_commits.data(name=name)
2250 2250 fm_commits.data(node=hex(node))
2251 2251 if ms._labels and len(ms._labels) > label_index:
2252 2252 fm_commits.data(label=ms._labels[label_index])
2253 2253 fm_commits.end()
2254 2254
2255 2255 fm_files = fm.nested(b'files')
2256 2256 if ms.active():
2257 2257 for f in ms:
2258 2258 fm_files.startitem()
2259 2259 fm_files.data(path=f)
2260 2260 state = ms._state[f]
2261 2261 fm_files.data(state=state[0])
2262 2262 if state[0] in (
2263 2263 mergestatemod.MERGE_RECORD_UNRESOLVED,
2264 2264 mergestatemod.MERGE_RECORD_RESOLVED,
2265 2265 ):
2266 2266 fm_files.data(local_key=state[1])
2267 2267 fm_files.data(local_path=state[2])
2268 2268 fm_files.data(ancestor_path=state[3])
2269 2269 fm_files.data(ancestor_node=state[4])
2270 2270 fm_files.data(other_path=state[5])
2271 2271 fm_files.data(other_node=state[6])
2272 2272 fm_files.data(local_flags=state[7])
2273 2273 elif state[0] in (
2274 2274 mergestatemod.MERGE_RECORD_UNRESOLVED_PATH,
2275 2275 mergestatemod.MERGE_RECORD_RESOLVED_PATH,
2276 2276 ):
2277 2277 fm_files.data(renamed_path=state[1])
2278 2278 fm_files.data(rename_side=state[2])
2279 2279 fm_extras = fm_files.nested(b'extras')
2280 2280 for k, v in sorted(ms.extras(f).items()):
2281 2281 fm_extras.startitem()
2282 2282 fm_extras.data(key=k)
2283 2283 fm_extras.data(value=v)
2284 2284 fm_extras.end()
2285 2285
2286 2286 fm_files.end()
2287 2287
2288 2288 fm_extras = fm.nested(b'extras')
2289 2289 for f, d in sorted(pycompat.iteritems(ms.allextras())):
2290 2290 if f in ms:
2291 2291 # If file is in mergestate, we have already processed it's extras
2292 2292 continue
2293 2293 for k, v in pycompat.iteritems(d):
2294 2294 fm_extras.startitem()
2295 2295 fm_extras.data(file=f)
2296 2296 fm_extras.data(key=k)
2297 2297 fm_extras.data(value=v)
2298 2298 fm_extras.end()
2299 2299
2300 2300 fm.end()
2301 2301
2302 2302
2303 2303 @command(b'debugnamecomplete', [], _(b'NAME...'))
2304 2304 def debugnamecomplete(ui, repo, *args):
2305 2305 '''complete "names" - tags, open branch names, bookmark names'''
2306 2306
2307 2307 names = set()
2308 2308 # since we previously only listed open branches, we will handle that
2309 2309 # specially (after this for loop)
2310 2310 for name, ns in pycompat.iteritems(repo.names):
2311 2311 if name != b'branches':
2312 2312 names.update(ns.listnames(repo))
2313 2313 names.update(
2314 2314 tag
2315 2315 for (tag, heads, tip, closed) in repo.branchmap().iterbranches()
2316 2316 if not closed
2317 2317 )
2318 2318 completions = set()
2319 2319 if not args:
2320 2320 args = [b'']
2321 2321 for a in args:
2322 2322 completions.update(n for n in names if n.startswith(a))
2323 2323 ui.write(b'\n'.join(sorted(completions)))
2324 2324 ui.write(b'\n')
2325 2325
2326 2326
2327 2327 @command(
2328 2328 b'debugnodemap',
2329 2329 [
2330 2330 (
2331 2331 b'',
2332 2332 b'dump-new',
2333 2333 False,
2334 2334 _(b'write a (new) persistent binary nodemap on stdout'),
2335 2335 ),
2336 2336 (b'', b'dump-disk', False, _(b'dump on-disk data on stdout')),
2337 2337 (
2338 2338 b'',
2339 2339 b'check',
2340 2340 False,
2341 2341 _(b'check that the data on disk data are correct.'),
2342 2342 ),
2343 2343 (
2344 2344 b'',
2345 2345 b'metadata',
2346 2346 False,
2347 2347 _(b'display the on disk meta data for the nodemap'),
2348 2348 ),
2349 2349 ],
2350 2350 )
2351 2351 def debugnodemap(ui, repo, **opts):
2352 2352 """write and inspect on disk nodemap"""
2353 2353 if opts['dump_new']:
2354 2354 unfi = repo.unfiltered()
2355 2355 cl = unfi.changelog
2356 2356 if util.safehasattr(cl.index, "nodemap_data_all"):
2357 2357 data = cl.index.nodemap_data_all()
2358 2358 else:
2359 2359 data = nodemap.persistent_data(cl.index)
2360 2360 ui.write(data)
2361 2361 elif opts['dump_disk']:
2362 2362 unfi = repo.unfiltered()
2363 2363 cl = unfi.changelog
2364 2364 nm_data = nodemap.persisted_data(cl)
2365 2365 if nm_data is not None:
2366 2366 docket, data = nm_data
2367 2367 ui.write(data[:])
2368 2368 elif opts['check']:
2369 2369 unfi = repo.unfiltered()
2370 2370 cl = unfi.changelog
2371 2371 nm_data = nodemap.persisted_data(cl)
2372 2372 if nm_data is not None:
2373 2373 docket, data = nm_data
2374 2374 return nodemap.check_data(ui, cl.index, data)
2375 2375 elif opts['metadata']:
2376 2376 unfi = repo.unfiltered()
2377 2377 cl = unfi.changelog
2378 2378 nm_data = nodemap.persisted_data(cl)
2379 2379 if nm_data is not None:
2380 2380 docket, data = nm_data
2381 2381 ui.write((b"uid: %s\n") % docket.uid)
2382 2382 ui.write((b"tip-rev: %d\n") % docket.tip_rev)
2383 2383 ui.write((b"tip-node: %s\n") % hex(docket.tip_node))
2384 2384 ui.write((b"data-length: %d\n") % docket.data_length)
2385 2385 ui.write((b"data-unused: %d\n") % docket.data_unused)
2386 2386 unused_perc = docket.data_unused * 100.0 / docket.data_length
2387 2387 ui.write((b"data-unused: %2.3f%%\n") % unused_perc)
2388 2388
2389 2389
2390 2390 @command(
2391 2391 b'debugobsolete',
2392 2392 [
2393 2393 (b'', b'flags', 0, _(b'markers flag')),
2394 2394 (
2395 2395 b'',
2396 2396 b'record-parents',
2397 2397 False,
2398 2398 _(b'record parent information for the precursor'),
2399 2399 ),
2400 2400 (b'r', b'rev', [], _(b'display markers relevant to REV')),
2401 2401 (
2402 2402 b'',
2403 2403 b'exclusive',
2404 2404 False,
2405 2405 _(b'restrict display to markers only relevant to REV'),
2406 2406 ),
2407 2407 (b'', b'index', False, _(b'display index of the marker')),
2408 2408 (b'', b'delete', [], _(b'delete markers specified by indices')),
2409 2409 ]
2410 2410 + cmdutil.commitopts2
2411 2411 + cmdutil.formatteropts,
2412 2412 _(b'[OBSOLETED [REPLACEMENT ...]]'),
2413 2413 )
2414 2414 def debugobsolete(ui, repo, precursor=None, *successors, **opts):
2415 2415 """create arbitrary obsolete marker
2416 2416
2417 2417 With no arguments, displays the list of obsolescence markers."""
2418 2418
2419 2419 opts = pycompat.byteskwargs(opts)
2420 2420
2421 2421 def parsenodeid(s):
2422 2422 try:
2423 2423 # We do not use revsingle/revrange functions here to accept
2424 2424 # arbitrary node identifiers, possibly not present in the
2425 2425 # local repository.
2426 2426 n = bin(s)
2427 2427 if len(n) != repo.nodeconstants.nodelen:
2428 2428 raise TypeError()
2429 2429 return n
2430 2430 except TypeError:
2431 2431 raise error.InputError(
2432 2432 b'changeset references must be full hexadecimal '
2433 2433 b'node identifiers'
2434 2434 )
2435 2435
2436 2436 if opts.get(b'delete'):
2437 2437 indices = []
2438 2438 for v in opts.get(b'delete'):
2439 2439 try:
2440 2440 indices.append(int(v))
2441 2441 except ValueError:
2442 2442 raise error.InputError(
2443 2443 _(b'invalid index value: %r') % v,
2444 2444 hint=_(b'use integers for indices'),
2445 2445 )
2446 2446
2447 2447 if repo.currenttransaction():
2448 2448 raise error.Abort(
2449 2449 _(b'cannot delete obsmarkers in the middle of transaction.')
2450 2450 )
2451 2451
2452 2452 with repo.lock():
2453 2453 n = repair.deleteobsmarkers(repo.obsstore, indices)
2454 2454 ui.write(_(b'deleted %i obsolescence markers\n') % n)
2455 2455
2456 2456 return
2457 2457
2458 2458 if precursor is not None:
2459 2459 if opts[b'rev']:
2460 2460 raise error.InputError(
2461 2461 b'cannot select revision when creating marker'
2462 2462 )
2463 2463 metadata = {}
2464 2464 metadata[b'user'] = encoding.fromlocal(opts[b'user'] or ui.username())
2465 2465 succs = tuple(parsenodeid(succ) for succ in successors)
2466 2466 l = repo.lock()
2467 2467 try:
2468 2468 tr = repo.transaction(b'debugobsolete')
2469 2469 try:
2470 2470 date = opts.get(b'date')
2471 2471 if date:
2472 2472 date = dateutil.parsedate(date)
2473 2473 else:
2474 2474 date = None
2475 2475 prec = parsenodeid(precursor)
2476 2476 parents = None
2477 2477 if opts[b'record_parents']:
2478 2478 if prec not in repo.unfiltered():
2479 2479 raise error.Abort(
2480 2480 b'cannot used --record-parents on '
2481 2481 b'unknown changesets'
2482 2482 )
2483 2483 parents = repo.unfiltered()[prec].parents()
2484 2484 parents = tuple(p.node() for p in parents)
2485 2485 repo.obsstore.create(
2486 2486 tr,
2487 2487 prec,
2488 2488 succs,
2489 2489 opts[b'flags'],
2490 2490 parents=parents,
2491 2491 date=date,
2492 2492 metadata=metadata,
2493 2493 ui=ui,
2494 2494 )
2495 2495 tr.close()
2496 2496 except ValueError as exc:
2497 2497 raise error.Abort(
2498 2498 _(b'bad obsmarker input: %s') % stringutil.forcebytestr(exc)
2499 2499 )
2500 2500 finally:
2501 2501 tr.release()
2502 2502 finally:
2503 2503 l.release()
2504 2504 else:
2505 2505 if opts[b'rev']:
2506 2506 revs = scmutil.revrange(repo, opts[b'rev'])
2507 2507 nodes = [repo[r].node() for r in revs]
2508 2508 markers = list(
2509 2509 obsutil.getmarkers(
2510 2510 repo, nodes=nodes, exclusive=opts[b'exclusive']
2511 2511 )
2512 2512 )
2513 2513 markers.sort(key=lambda x: x._data)
2514 2514 else:
2515 2515 markers = obsutil.getmarkers(repo)
2516 2516
2517 2517 markerstoiter = markers
2518 2518 isrelevant = lambda m: True
2519 2519 if opts.get(b'rev') and opts.get(b'index'):
2520 2520 markerstoiter = obsutil.getmarkers(repo)
2521 2521 markerset = set(markers)
2522 2522 isrelevant = lambda m: m in markerset
2523 2523
2524 2524 fm = ui.formatter(b'debugobsolete', opts)
2525 2525 for i, m in enumerate(markerstoiter):
2526 2526 if not isrelevant(m):
2527 2527 # marker can be irrelevant when we're iterating over a set
2528 2528 # of markers (markerstoiter) which is bigger than the set
2529 2529 # of markers we want to display (markers)
2530 2530 # this can happen if both --index and --rev options are
2531 2531 # provided and thus we need to iterate over all of the markers
2532 2532 # to get the correct indices, but only display the ones that
2533 2533 # are relevant to --rev value
2534 2534 continue
2535 2535 fm.startitem()
2536 2536 ind = i if opts.get(b'index') else None
2537 2537 cmdutil.showmarker(fm, m, index=ind)
2538 2538 fm.end()
2539 2539
2540 2540
2541 2541 @command(
2542 2542 b'debugp1copies',
2543 2543 [(b'r', b'rev', b'', _(b'revision to debug'), _(b'REV'))],
2544 2544 _(b'[-r REV]'),
2545 2545 )
2546 2546 def debugp1copies(ui, repo, **opts):
2547 2547 """dump copy information compared to p1"""
2548 2548
2549 2549 opts = pycompat.byteskwargs(opts)
2550 2550 ctx = scmutil.revsingle(repo, opts.get(b'rev'), default=None)
2551 2551 for dst, src in ctx.p1copies().items():
2552 2552 ui.write(b'%s -> %s\n' % (src, dst))
2553 2553
2554 2554
2555 2555 @command(
2556 2556 b'debugp2copies',
2557 2557 [(b'r', b'rev', b'', _(b'revision to debug'), _(b'REV'))],
2558 2558 _(b'[-r REV]'),
2559 2559 )
2560 2560 def debugp1copies(ui, repo, **opts):
2561 2561 """dump copy information compared to p2"""
2562 2562
2563 2563 opts = pycompat.byteskwargs(opts)
2564 2564 ctx = scmutil.revsingle(repo, opts.get(b'rev'), default=None)
2565 2565 for dst, src in ctx.p2copies().items():
2566 2566 ui.write(b'%s -> %s\n' % (src, dst))
2567 2567
2568 2568
2569 2569 @command(
2570 2570 b'debugpathcomplete',
2571 2571 [
2572 2572 (b'f', b'full', None, _(b'complete an entire path')),
2573 2573 (b'n', b'normal', None, _(b'show only normal files')),
2574 2574 (b'a', b'added', None, _(b'show only added files')),
2575 2575 (b'r', b'removed', None, _(b'show only removed files')),
2576 2576 ],
2577 2577 _(b'FILESPEC...'),
2578 2578 )
2579 2579 def debugpathcomplete(ui, repo, *specs, **opts):
2580 2580 """complete part or all of a tracked path
2581 2581
2582 2582 This command supports shells that offer path name completion. It
2583 2583 currently completes only files already known to the dirstate.
2584 2584
2585 2585 Completion extends only to the next path segment unless
2586 2586 --full is specified, in which case entire paths are used."""
2587 2587
2588 2588 def complete(path, acceptable):
2589 2589 dirstate = repo.dirstate
2590 2590 spec = os.path.normpath(os.path.join(encoding.getcwd(), path))
2591 2591 rootdir = repo.root + pycompat.ossep
2592 2592 if spec != repo.root and not spec.startswith(rootdir):
2593 2593 return [], []
2594 2594 if os.path.isdir(spec):
2595 2595 spec += b'/'
2596 2596 spec = spec[len(rootdir) :]
2597 2597 fixpaths = pycompat.ossep != b'/'
2598 2598 if fixpaths:
2599 2599 spec = spec.replace(pycompat.ossep, b'/')
2600 2600 speclen = len(spec)
2601 2601 fullpaths = opts['full']
2602 2602 files, dirs = set(), set()
2603 2603 adddir, addfile = dirs.add, files.add
2604 2604 for f, st in pycompat.iteritems(dirstate):
2605 2605 if f.startswith(spec) and st[0] in acceptable:
2606 2606 if fixpaths:
2607 2607 f = f.replace(b'/', pycompat.ossep)
2608 2608 if fullpaths:
2609 2609 addfile(f)
2610 2610 continue
2611 2611 s = f.find(pycompat.ossep, speclen)
2612 2612 if s >= 0:
2613 2613 adddir(f[:s])
2614 2614 else:
2615 2615 addfile(f)
2616 2616 return files, dirs
2617 2617
2618 2618 acceptable = b''
2619 2619 if opts['normal']:
2620 2620 acceptable += b'nm'
2621 2621 if opts['added']:
2622 2622 acceptable += b'a'
2623 2623 if opts['removed']:
2624 2624 acceptable += b'r'
2625 2625 cwd = repo.getcwd()
2626 2626 if not specs:
2627 2627 specs = [b'.']
2628 2628
2629 2629 files, dirs = set(), set()
2630 2630 for spec in specs:
2631 2631 f, d = complete(spec, acceptable or b'nmar')
2632 2632 files.update(f)
2633 2633 dirs.update(d)
2634 2634 files.update(dirs)
2635 2635 ui.write(b'\n'.join(repo.pathto(p, cwd) for p in sorted(files)))
2636 2636 ui.write(b'\n')
2637 2637
2638 2638
2639 2639 @command(
2640 2640 b'debugpathcopies',
2641 2641 cmdutil.walkopts,
2642 2642 b'hg debugpathcopies REV1 REV2 [FILE]',
2643 2643 inferrepo=True,
2644 2644 )
2645 2645 def debugpathcopies(ui, repo, rev1, rev2, *pats, **opts):
2646 2646 """show copies between two revisions"""
2647 2647 ctx1 = scmutil.revsingle(repo, rev1)
2648 2648 ctx2 = scmutil.revsingle(repo, rev2)
2649 2649 m = scmutil.match(ctx1, pats, opts)
2650 2650 for dst, src in sorted(copies.pathcopies(ctx1, ctx2, m).items()):
2651 2651 ui.write(b'%s -> %s\n' % (src, dst))
2652 2652
2653 2653
2654 2654 @command(b'debugpeer', [], _(b'PATH'), norepo=True)
2655 2655 def debugpeer(ui, path):
2656 2656 """establish a connection to a peer repository"""
2657 2657 # Always enable peer request logging. Requires --debug to display
2658 2658 # though.
2659 2659 overrides = {
2660 2660 (b'devel', b'debug.peer-request'): True,
2661 2661 }
2662 2662
2663 2663 with ui.configoverride(overrides):
2664 2664 peer = hg.peer(ui, {}, path)
2665 2665
2666 2666 try:
2667 2667 local = peer.local() is not None
2668 2668 canpush = peer.canpush()
2669 2669
2670 2670 ui.write(_(b'url: %s\n') % peer.url())
2671 2671 ui.write(_(b'local: %s\n') % (_(b'yes') if local else _(b'no')))
2672 2672 ui.write(
2673 2673 _(b'pushable: %s\n') % (_(b'yes') if canpush else _(b'no'))
2674 2674 )
2675 2675 finally:
2676 2676 peer.close()
2677 2677
2678 2678
2679 2679 @command(
2680 2680 b'debugpickmergetool',
2681 2681 [
2682 2682 (b'r', b'rev', b'', _(b'check for files in this revision'), _(b'REV')),
2683 2683 (b'', b'changedelete', None, _(b'emulate merging change and delete')),
2684 2684 ]
2685 2685 + cmdutil.walkopts
2686 2686 + cmdutil.mergetoolopts,
2687 2687 _(b'[PATTERN]...'),
2688 2688 inferrepo=True,
2689 2689 )
2690 2690 def debugpickmergetool(ui, repo, *pats, **opts):
2691 2691 """examine which merge tool is chosen for specified file
2692 2692
2693 2693 As described in :hg:`help merge-tools`, Mercurial examines
2694 2694 configurations below in this order to decide which merge tool is
2695 2695 chosen for specified file.
2696 2696
2697 2697 1. ``--tool`` option
2698 2698 2. ``HGMERGE`` environment variable
2699 2699 3. configurations in ``merge-patterns`` section
2700 2700 4. configuration of ``ui.merge``
2701 2701 5. configurations in ``merge-tools`` section
2702 2702 6. ``hgmerge`` tool (for historical reason only)
2703 2703 7. default tool for fallback (``:merge`` or ``:prompt``)
2704 2704
2705 2705 This command writes out examination result in the style below::
2706 2706
2707 2707 FILE = MERGETOOL
2708 2708
2709 2709 By default, all files known in the first parent context of the
2710 2710 working directory are examined. Use file patterns and/or -I/-X
2711 2711 options to limit target files. -r/--rev is also useful to examine
2712 2712 files in another context without actual updating to it.
2713 2713
2714 2714 With --debug, this command shows warning messages while matching
2715 2715 against ``merge-patterns`` and so on, too. It is recommended to
2716 2716 use this option with explicit file patterns and/or -I/-X options,
2717 2717 because this option increases amount of output per file according
2718 2718 to configurations in hgrc.
2719 2719
2720 2720 With -v/--verbose, this command shows configurations below at
2721 2721 first (only if specified).
2722 2722
2723 2723 - ``--tool`` option
2724 2724 - ``HGMERGE`` environment variable
2725 2725 - configuration of ``ui.merge``
2726 2726
2727 2727 If merge tool is chosen before matching against
2728 2728 ``merge-patterns``, this command can't show any helpful
2729 2729 information, even with --debug. In such case, information above is
2730 2730 useful to know why a merge tool is chosen.
2731 2731 """
2732 2732 opts = pycompat.byteskwargs(opts)
2733 2733 overrides = {}
2734 2734 if opts[b'tool']:
2735 2735 overrides[(b'ui', b'forcemerge')] = opts[b'tool']
2736 2736 ui.notenoi18n(b'with --tool %r\n' % (pycompat.bytestr(opts[b'tool'])))
2737 2737
2738 2738 with ui.configoverride(overrides, b'debugmergepatterns'):
2739 2739 hgmerge = encoding.environ.get(b"HGMERGE")
2740 2740 if hgmerge is not None:
2741 2741 ui.notenoi18n(b'with HGMERGE=%r\n' % (pycompat.bytestr(hgmerge)))
2742 2742 uimerge = ui.config(b"ui", b"merge")
2743 2743 if uimerge:
2744 2744 ui.notenoi18n(b'with ui.merge=%r\n' % (pycompat.bytestr(uimerge)))
2745 2745
2746 2746 ctx = scmutil.revsingle(repo, opts.get(b'rev'))
2747 2747 m = scmutil.match(ctx, pats, opts)
2748 2748 changedelete = opts[b'changedelete']
2749 2749 for path in ctx.walk(m):
2750 2750 fctx = ctx[path]
2751 2751 try:
2752 2752 if not ui.debugflag:
2753 2753 ui.pushbuffer(error=True)
2754 2754 tool, toolpath = filemerge._picktool(
2755 2755 repo,
2756 2756 ui,
2757 2757 path,
2758 2758 fctx.isbinary(),
2759 2759 b'l' in fctx.flags(),
2760 2760 changedelete,
2761 2761 )
2762 2762 finally:
2763 2763 if not ui.debugflag:
2764 2764 ui.popbuffer()
2765 2765 ui.write(b'%s = %s\n' % (path, tool))
2766 2766
2767 2767
2768 2768 @command(b'debugpushkey', [], _(b'REPO NAMESPACE [KEY OLD NEW]'), norepo=True)
2769 2769 def debugpushkey(ui, repopath, namespace, *keyinfo, **opts):
2770 2770 """access the pushkey key/value protocol
2771 2771
2772 2772 With two args, list the keys in the given namespace.
2773 2773
2774 2774 With five args, set a key to new if it currently is set to old.
2775 2775 Reports success or failure.
2776 2776 """
2777 2777
2778 2778 target = hg.peer(ui, {}, repopath)
2779 2779 try:
2780 2780 if keyinfo:
2781 2781 key, old, new = keyinfo
2782 2782 with target.commandexecutor() as e:
2783 2783 r = e.callcommand(
2784 2784 b'pushkey',
2785 2785 {
2786 2786 b'namespace': namespace,
2787 2787 b'key': key,
2788 2788 b'old': old,
2789 2789 b'new': new,
2790 2790 },
2791 2791 ).result()
2792 2792
2793 2793 ui.status(pycompat.bytestr(r) + b'\n')
2794 2794 return not r
2795 2795 else:
2796 2796 for k, v in sorted(pycompat.iteritems(target.listkeys(namespace))):
2797 2797 ui.write(
2798 2798 b"%s\t%s\n"
2799 2799 % (stringutil.escapestr(k), stringutil.escapestr(v))
2800 2800 )
2801 2801 finally:
2802 2802 target.close()
2803 2803
2804 2804
2805 2805 @command(b'debugpvec', [], _(b'A B'))
2806 2806 def debugpvec(ui, repo, a, b=None):
2807 2807 ca = scmutil.revsingle(repo, a)
2808 2808 cb = scmutil.revsingle(repo, b)
2809 2809 pa = pvec.ctxpvec(ca)
2810 2810 pb = pvec.ctxpvec(cb)
2811 2811 if pa == pb:
2812 2812 rel = b"="
2813 2813 elif pa > pb:
2814 2814 rel = b">"
2815 2815 elif pa < pb:
2816 2816 rel = b"<"
2817 2817 elif pa | pb:
2818 2818 rel = b"|"
2819 2819 ui.write(_(b"a: %s\n") % pa)
2820 2820 ui.write(_(b"b: %s\n") % pb)
2821 2821 ui.write(_(b"depth(a): %d depth(b): %d\n") % (pa._depth, pb._depth))
2822 2822 ui.write(
2823 2823 _(b"delta: %d hdist: %d distance: %d relation: %s\n")
2824 2824 % (
2825 2825 abs(pa._depth - pb._depth),
2826 2826 pvec._hamming(pa._vec, pb._vec),
2827 2827 pa.distance(pb),
2828 2828 rel,
2829 2829 )
2830 2830 )
2831 2831
2832 2832
2833 2833 @command(
2834 2834 b'debugrebuilddirstate|debugrebuildstate',
2835 2835 [
2836 2836 (b'r', b'rev', b'', _(b'revision to rebuild to'), _(b'REV')),
2837 2837 (
2838 2838 b'',
2839 2839 b'minimal',
2840 2840 None,
2841 2841 _(
2842 2842 b'only rebuild files that are inconsistent with '
2843 2843 b'the working copy parent'
2844 2844 ),
2845 2845 ),
2846 2846 ],
2847 2847 _(b'[-r REV]'),
2848 2848 )
2849 2849 def debugrebuilddirstate(ui, repo, rev, **opts):
2850 2850 """rebuild the dirstate as it would look like for the given revision
2851 2851
2852 2852 If no revision is specified the first current parent will be used.
2853 2853
2854 2854 The dirstate will be set to the files of the given revision.
2855 2855 The actual working directory content or existing dirstate
2856 2856 information such as adds or removes is not considered.
2857 2857
2858 2858 ``minimal`` will only rebuild the dirstate status for files that claim to be
2859 2859 tracked but are not in the parent manifest, or that exist in the parent
2860 2860 manifest but are not in the dirstate. It will not change adds, removes, or
2861 2861 modified files that are in the working copy parent.
2862 2862
2863 2863 One use of this command is to make the next :hg:`status` invocation
2864 2864 check the actual file content.
2865 2865 """
2866 2866 ctx = scmutil.revsingle(repo, rev)
2867 2867 with repo.wlock():
2868 2868 dirstate = repo.dirstate
2869 2869 changedfiles = None
2870 2870 # See command doc for what minimal does.
2871 2871 if opts.get('minimal'):
2872 2872 manifestfiles = set(ctx.manifest().keys())
2873 2873 dirstatefiles = set(dirstate)
2874 2874 manifestonly = manifestfiles - dirstatefiles
2875 2875 dsonly = dirstatefiles - manifestfiles
2876 2876 dsnotadded = {f for f in dsonly if dirstate[f] != b'a'}
2877 2877 changedfiles = manifestonly | dsnotadded
2878 2878
2879 2879 dirstate.rebuild(ctx.node(), ctx.manifest(), changedfiles)
2880 2880
2881 2881
2882 2882 @command(b'debugrebuildfncache', [], b'')
2883 2883 def debugrebuildfncache(ui, repo):
2884 2884 """rebuild the fncache file"""
2885 2885 repair.rebuildfncache(ui, repo)
2886 2886
2887 2887
2888 2888 @command(
2889 2889 b'debugrename',
2890 2890 [(b'r', b'rev', b'', _(b'revision to debug'), _(b'REV'))],
2891 2891 _(b'[-r REV] [FILE]...'),
2892 2892 )
2893 2893 def debugrename(ui, repo, *pats, **opts):
2894 2894 """dump rename information"""
2895 2895
2896 2896 opts = pycompat.byteskwargs(opts)
2897 2897 ctx = scmutil.revsingle(repo, opts.get(b'rev'))
2898 2898 m = scmutil.match(ctx, pats, opts)
2899 2899 for abs in ctx.walk(m):
2900 2900 fctx = ctx[abs]
2901 2901 o = fctx.filelog().renamed(fctx.filenode())
2902 2902 rel = repo.pathto(abs)
2903 2903 if o:
2904 2904 ui.write(_(b"%s renamed from %s:%s\n") % (rel, o[0], hex(o[1])))
2905 2905 else:
2906 2906 ui.write(_(b"%s not renamed\n") % rel)
2907 2907
2908 2908
2909 2909 @command(b'debugrequires|debugrequirements', [], b'')
2910 2910 def debugrequirements(ui, repo):
2911 2911 """print the current repo requirements"""
2912 2912 for r in sorted(repo.requirements):
2913 2913 ui.write(b"%s\n" % r)
2914 2914
2915 2915
2916 2916 @command(
2917 2917 b'debugrevlog',
2918 2918 cmdutil.debugrevlogopts + [(b'd', b'dump', False, _(b'dump index data'))],
2919 2919 _(b'-c|-m|FILE'),
2920 2920 optionalrepo=True,
2921 2921 )
2922 2922 def debugrevlog(ui, repo, file_=None, **opts):
2923 2923 """show data and statistics about a revlog"""
2924 2924 opts = pycompat.byteskwargs(opts)
2925 2925 r = cmdutil.openrevlog(repo, b'debugrevlog', file_, opts)
2926 2926
2927 2927 if opts.get(b"dump"):
2928 2928 numrevs = len(r)
2929 2929 ui.write(
2930 2930 (
2931 2931 b"# rev p1rev p2rev start end deltastart base p1 p2"
2932 2932 b" rawsize totalsize compression heads chainlen\n"
2933 2933 )
2934 2934 )
2935 2935 ts = 0
2936 2936 heads = set()
2937 2937
2938 2938 for rev in pycompat.xrange(numrevs):
2939 2939 dbase = r.deltaparent(rev)
2940 2940 if dbase == -1:
2941 2941 dbase = rev
2942 2942 cbase = r.chainbase(rev)
2943 2943 clen = r.chainlen(rev)
2944 2944 p1, p2 = r.parentrevs(rev)
2945 2945 rs = r.rawsize(rev)
2946 2946 ts = ts + rs
2947 2947 heads -= set(r.parentrevs(rev))
2948 2948 heads.add(rev)
2949 2949 try:
2950 2950 compression = ts / r.end(rev)
2951 2951 except ZeroDivisionError:
2952 2952 compression = 0
2953 2953 ui.write(
2954 2954 b"%5d %5d %5d %5d %5d %10d %4d %4d %4d %7d %9d "
2955 2955 b"%11d %5d %8d\n"
2956 2956 % (
2957 2957 rev,
2958 2958 p1,
2959 2959 p2,
2960 2960 r.start(rev),
2961 2961 r.end(rev),
2962 2962 r.start(dbase),
2963 2963 r.start(cbase),
2964 2964 r.start(p1),
2965 2965 r.start(p2),
2966 2966 rs,
2967 2967 ts,
2968 2968 compression,
2969 2969 len(heads),
2970 2970 clen,
2971 2971 )
2972 2972 )
2973 2973 return 0
2974 2974
2975 v = r.version
2976 format = v & 0xFFFF
2975 format = r._format_version
2976 v = r._format_flags
2977 2977 flags = []
2978 2978 gdelta = False
2979 2979 if v & revlog.FLAG_INLINE_DATA:
2980 2980 flags.append(b'inline')
2981 2981 if v & revlog.FLAG_GENERALDELTA:
2982 2982 gdelta = True
2983 2983 flags.append(b'generaldelta')
2984 2984 if not flags:
2985 2985 flags = [b'(none)']
2986 2986
2987 2987 ### tracks merge vs single parent
2988 2988 nummerges = 0
2989 2989
2990 2990 ### tracks ways the "delta" are build
2991 2991 # nodelta
2992 2992 numempty = 0
2993 2993 numemptytext = 0
2994 2994 numemptydelta = 0
2995 2995 # full file content
2996 2996 numfull = 0
2997 2997 # intermediate snapshot against a prior snapshot
2998 2998 numsemi = 0
2999 2999 # snapshot count per depth
3000 3000 numsnapdepth = collections.defaultdict(lambda: 0)
3001 3001 # delta against previous revision
3002 3002 numprev = 0
3003 3003 # delta against first or second parent (not prev)
3004 3004 nump1 = 0
3005 3005 nump2 = 0
3006 3006 # delta against neither prev nor parents
3007 3007 numother = 0
3008 3008 # delta against prev that are also first or second parent
3009 3009 # (details of `numprev`)
3010 3010 nump1prev = 0
3011 3011 nump2prev = 0
3012 3012
3013 3013 # data about delta chain of each revs
3014 3014 chainlengths = []
3015 3015 chainbases = []
3016 3016 chainspans = []
3017 3017
3018 3018 # data about each revision
3019 3019 datasize = [None, 0, 0]
3020 3020 fullsize = [None, 0, 0]
3021 3021 semisize = [None, 0, 0]
3022 3022 # snapshot count per depth
3023 3023 snapsizedepth = collections.defaultdict(lambda: [None, 0, 0])
3024 3024 deltasize = [None, 0, 0]
3025 3025 chunktypecounts = {}
3026 3026 chunktypesizes = {}
3027 3027
3028 3028 def addsize(size, l):
3029 3029 if l[0] is None or size < l[0]:
3030 3030 l[0] = size
3031 3031 if size > l[1]:
3032 3032 l[1] = size
3033 3033 l[2] += size
3034 3034
3035 3035 numrevs = len(r)
3036 3036 for rev in pycompat.xrange(numrevs):
3037 3037 p1, p2 = r.parentrevs(rev)
3038 3038 delta = r.deltaparent(rev)
3039 3039 if format > 0:
3040 3040 addsize(r.rawsize(rev), datasize)
3041 3041 if p2 != nullrev:
3042 3042 nummerges += 1
3043 3043 size = r.length(rev)
3044 3044 if delta == nullrev:
3045 3045 chainlengths.append(0)
3046 3046 chainbases.append(r.start(rev))
3047 3047 chainspans.append(size)
3048 3048 if size == 0:
3049 3049 numempty += 1
3050 3050 numemptytext += 1
3051 3051 else:
3052 3052 numfull += 1
3053 3053 numsnapdepth[0] += 1
3054 3054 addsize(size, fullsize)
3055 3055 addsize(size, snapsizedepth[0])
3056 3056 else:
3057 3057 chainlengths.append(chainlengths[delta] + 1)
3058 3058 baseaddr = chainbases[delta]
3059 3059 revaddr = r.start(rev)
3060 3060 chainbases.append(baseaddr)
3061 3061 chainspans.append((revaddr - baseaddr) + size)
3062 3062 if size == 0:
3063 3063 numempty += 1
3064 3064 numemptydelta += 1
3065 3065 elif r.issnapshot(rev):
3066 3066 addsize(size, semisize)
3067 3067 numsemi += 1
3068 3068 depth = r.snapshotdepth(rev)
3069 3069 numsnapdepth[depth] += 1
3070 3070 addsize(size, snapsizedepth[depth])
3071 3071 else:
3072 3072 addsize(size, deltasize)
3073 3073 if delta == rev - 1:
3074 3074 numprev += 1
3075 3075 if delta == p1:
3076 3076 nump1prev += 1
3077 3077 elif delta == p2:
3078 3078 nump2prev += 1
3079 3079 elif delta == p1:
3080 3080 nump1 += 1
3081 3081 elif delta == p2:
3082 3082 nump2 += 1
3083 3083 elif delta != nullrev:
3084 3084 numother += 1
3085 3085
3086 3086 # Obtain data on the raw chunks in the revlog.
3087 3087 if util.safehasattr(r, b'_getsegmentforrevs'):
3088 3088 segment = r._getsegmentforrevs(rev, rev)[1]
3089 3089 else:
3090 3090 segment = r._revlog._getsegmentforrevs(rev, rev)[1]
3091 3091 if segment:
3092 3092 chunktype = bytes(segment[0:1])
3093 3093 else:
3094 3094 chunktype = b'empty'
3095 3095
3096 3096 if chunktype not in chunktypecounts:
3097 3097 chunktypecounts[chunktype] = 0
3098 3098 chunktypesizes[chunktype] = 0
3099 3099
3100 3100 chunktypecounts[chunktype] += 1
3101 3101 chunktypesizes[chunktype] += size
3102 3102
3103 3103 # Adjust size min value for empty cases
3104 3104 for size in (datasize, fullsize, semisize, deltasize):
3105 3105 if size[0] is None:
3106 3106 size[0] = 0
3107 3107
3108 3108 numdeltas = numrevs - numfull - numempty - numsemi
3109 3109 numoprev = numprev - nump1prev - nump2prev
3110 3110 totalrawsize = datasize[2]
3111 3111 datasize[2] /= numrevs
3112 3112 fulltotal = fullsize[2]
3113 3113 if numfull == 0:
3114 3114 fullsize[2] = 0
3115 3115 else:
3116 3116 fullsize[2] /= numfull
3117 3117 semitotal = semisize[2]
3118 3118 snaptotal = {}
3119 3119 if numsemi > 0:
3120 3120 semisize[2] /= numsemi
3121 3121 for depth in snapsizedepth:
3122 3122 snaptotal[depth] = snapsizedepth[depth][2]
3123 3123 snapsizedepth[depth][2] /= numsnapdepth[depth]
3124 3124
3125 3125 deltatotal = deltasize[2]
3126 3126 if numdeltas > 0:
3127 3127 deltasize[2] /= numdeltas
3128 3128 totalsize = fulltotal + semitotal + deltatotal
3129 3129 avgchainlen = sum(chainlengths) / numrevs
3130 3130 maxchainlen = max(chainlengths)
3131 3131 maxchainspan = max(chainspans)
3132 3132 compratio = 1
3133 3133 if totalsize:
3134 3134 compratio = totalrawsize / totalsize
3135 3135
3136 3136 basedfmtstr = b'%%%dd\n'
3137 3137 basepcfmtstr = b'%%%dd %s(%%5.2f%%%%)\n'
3138 3138
3139 3139 def dfmtstr(max):
3140 3140 return basedfmtstr % len(str(max))
3141 3141
3142 3142 def pcfmtstr(max, padding=0):
3143 3143 return basepcfmtstr % (len(str(max)), b' ' * padding)
3144 3144
3145 3145 def pcfmt(value, total):
3146 3146 if total:
3147 3147 return (value, 100 * float(value) / total)
3148 3148 else:
3149 3149 return value, 100.0
3150 3150
3151 3151 ui.writenoi18n(b'format : %d\n' % format)
3152 3152 ui.writenoi18n(b'flags : %s\n' % b', '.join(flags))
3153 3153
3154 3154 ui.write(b'\n')
3155 3155 fmt = pcfmtstr(totalsize)
3156 3156 fmt2 = dfmtstr(totalsize)
3157 3157 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
3158 3158 ui.writenoi18n(b' merges : ' + fmt % pcfmt(nummerges, numrevs))
3159 3159 ui.writenoi18n(
3160 3160 b' normal : ' + fmt % pcfmt(numrevs - nummerges, numrevs)
3161 3161 )
3162 3162 ui.writenoi18n(b'revisions : ' + fmt2 % numrevs)
3163 3163 ui.writenoi18n(b' empty : ' + fmt % pcfmt(numempty, numrevs))
3164 3164 ui.writenoi18n(
3165 3165 b' text : '
3166 3166 + fmt % pcfmt(numemptytext, numemptytext + numemptydelta)
3167 3167 )
3168 3168 ui.writenoi18n(
3169 3169 b' delta : '
3170 3170 + fmt % pcfmt(numemptydelta, numemptytext + numemptydelta)
3171 3171 )
3172 3172 ui.writenoi18n(
3173 3173 b' snapshot : ' + fmt % pcfmt(numfull + numsemi, numrevs)
3174 3174 )
3175 3175 for depth in sorted(numsnapdepth):
3176 3176 ui.write(
3177 3177 (b' lvl-%-3d : ' % depth)
3178 3178 + fmt % pcfmt(numsnapdepth[depth], numrevs)
3179 3179 )
3180 3180 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(numdeltas, numrevs))
3181 3181 ui.writenoi18n(b'revision size : ' + fmt2 % totalsize)
3182 3182 ui.writenoi18n(
3183 3183 b' snapshot : ' + fmt % pcfmt(fulltotal + semitotal, totalsize)
3184 3184 )
3185 3185 for depth in sorted(numsnapdepth):
3186 3186 ui.write(
3187 3187 (b' lvl-%-3d : ' % depth)
3188 3188 + fmt % pcfmt(snaptotal[depth], totalsize)
3189 3189 )
3190 3190 ui.writenoi18n(b' deltas : ' + fmt % pcfmt(deltatotal, totalsize))
3191 3191
3192 3192 def fmtchunktype(chunktype):
3193 3193 if chunktype == b'empty':
3194 3194 return b' %s : ' % chunktype
3195 3195 elif chunktype in pycompat.bytestr(string.ascii_letters):
3196 3196 return b' 0x%s (%s) : ' % (hex(chunktype), chunktype)
3197 3197 else:
3198 3198 return b' 0x%s : ' % hex(chunktype)
3199 3199
3200 3200 ui.write(b'\n')
3201 3201 ui.writenoi18n(b'chunks : ' + fmt2 % numrevs)
3202 3202 for chunktype in sorted(chunktypecounts):
3203 3203 ui.write(fmtchunktype(chunktype))
3204 3204 ui.write(fmt % pcfmt(chunktypecounts[chunktype], numrevs))
3205 3205 ui.writenoi18n(b'chunks size : ' + fmt2 % totalsize)
3206 3206 for chunktype in sorted(chunktypecounts):
3207 3207 ui.write(fmtchunktype(chunktype))
3208 3208 ui.write(fmt % pcfmt(chunktypesizes[chunktype], totalsize))
3209 3209
3210 3210 ui.write(b'\n')
3211 3211 fmt = dfmtstr(max(avgchainlen, maxchainlen, maxchainspan, compratio))
3212 3212 ui.writenoi18n(b'avg chain length : ' + fmt % avgchainlen)
3213 3213 ui.writenoi18n(b'max chain length : ' + fmt % maxchainlen)
3214 3214 ui.writenoi18n(b'max chain reach : ' + fmt % maxchainspan)
3215 3215 ui.writenoi18n(b'compression ratio : ' + fmt % compratio)
3216 3216
3217 3217 if format > 0:
3218 3218 ui.write(b'\n')
3219 3219 ui.writenoi18n(
3220 3220 b'uncompressed data size (min/max/avg) : %d / %d / %d\n'
3221 3221 % tuple(datasize)
3222 3222 )
3223 3223 ui.writenoi18n(
3224 3224 b'full revision size (min/max/avg) : %d / %d / %d\n'
3225 3225 % tuple(fullsize)
3226 3226 )
3227 3227 ui.writenoi18n(
3228 3228 b'inter-snapshot size (min/max/avg) : %d / %d / %d\n'
3229 3229 % tuple(semisize)
3230 3230 )
3231 3231 for depth in sorted(snapsizedepth):
3232 3232 if depth == 0:
3233 3233 continue
3234 3234 ui.writenoi18n(
3235 3235 b' level-%-3d (min/max/avg) : %d / %d / %d\n'
3236 3236 % ((depth,) + tuple(snapsizedepth[depth]))
3237 3237 )
3238 3238 ui.writenoi18n(
3239 3239 b'delta size (min/max/avg) : %d / %d / %d\n'
3240 3240 % tuple(deltasize)
3241 3241 )
3242 3242
3243 3243 if numdeltas > 0:
3244 3244 ui.write(b'\n')
3245 3245 fmt = pcfmtstr(numdeltas)
3246 3246 fmt2 = pcfmtstr(numdeltas, 4)
3247 3247 ui.writenoi18n(
3248 3248 b'deltas against prev : ' + fmt % pcfmt(numprev, numdeltas)
3249 3249 )
3250 3250 if numprev > 0:
3251 3251 ui.writenoi18n(
3252 3252 b' where prev = p1 : ' + fmt2 % pcfmt(nump1prev, numprev)
3253 3253 )
3254 3254 ui.writenoi18n(
3255 3255 b' where prev = p2 : ' + fmt2 % pcfmt(nump2prev, numprev)
3256 3256 )
3257 3257 ui.writenoi18n(
3258 3258 b' other : ' + fmt2 % pcfmt(numoprev, numprev)
3259 3259 )
3260 3260 if gdelta:
3261 3261 ui.writenoi18n(
3262 3262 b'deltas against p1 : ' + fmt % pcfmt(nump1, numdeltas)
3263 3263 )
3264 3264 ui.writenoi18n(
3265 3265 b'deltas against p2 : ' + fmt % pcfmt(nump2, numdeltas)
3266 3266 )
3267 3267 ui.writenoi18n(
3268 3268 b'deltas against other : ' + fmt % pcfmt(numother, numdeltas)
3269 3269 )
3270 3270
3271 3271
3272 3272 @command(
3273 3273 b'debugrevlogindex',
3274 3274 cmdutil.debugrevlogopts
3275 3275 + [(b'f', b'format', 0, _(b'revlog format'), _(b'FORMAT'))],
3276 3276 _(b'[-f FORMAT] -c|-m|FILE'),
3277 3277 optionalrepo=True,
3278 3278 )
3279 3279 def debugrevlogindex(ui, repo, file_=None, **opts):
3280 3280 """dump the contents of a revlog index"""
3281 3281 opts = pycompat.byteskwargs(opts)
3282 3282 r = cmdutil.openrevlog(repo, b'debugrevlogindex', file_, opts)
3283 3283 format = opts.get(b'format', 0)
3284 3284 if format not in (0, 1):
3285 3285 raise error.Abort(_(b"unknown format %d") % format)
3286 3286
3287 3287 if ui.debugflag:
3288 3288 shortfn = hex
3289 3289 else:
3290 3290 shortfn = short
3291 3291
3292 3292 # There might not be anything in r, so have a sane default
3293 3293 idlen = 12
3294 3294 for i in r:
3295 3295 idlen = len(shortfn(r.node(i)))
3296 3296 break
3297 3297
3298 3298 if format == 0:
3299 3299 if ui.verbose:
3300 3300 ui.writenoi18n(
3301 3301 b" rev offset length linkrev %s %s p2\n"
3302 3302 % (b"nodeid".ljust(idlen), b"p1".ljust(idlen))
3303 3303 )
3304 3304 else:
3305 3305 ui.writenoi18n(
3306 3306 b" rev linkrev %s %s p2\n"
3307 3307 % (b"nodeid".ljust(idlen), b"p1".ljust(idlen))
3308 3308 )
3309 3309 elif format == 1:
3310 3310 if ui.verbose:
3311 3311 ui.writenoi18n(
3312 3312 (
3313 3313 b" rev flag offset length size link p1"
3314 3314 b" p2 %s\n"
3315 3315 )
3316 3316 % b"nodeid".rjust(idlen)
3317 3317 )
3318 3318 else:
3319 3319 ui.writenoi18n(
3320 3320 b" rev flag size link p1 p2 %s\n"
3321 3321 % b"nodeid".rjust(idlen)
3322 3322 )
3323 3323
3324 3324 for i in r:
3325 3325 node = r.node(i)
3326 3326 if format == 0:
3327 3327 try:
3328 3328 pp = r.parents(node)
3329 3329 except Exception:
3330 3330 pp = [repo.nullid, repo.nullid]
3331 3331 if ui.verbose:
3332 3332 ui.write(
3333 3333 b"% 6d % 9d % 7d % 7d %s %s %s\n"
3334 3334 % (
3335 3335 i,
3336 3336 r.start(i),
3337 3337 r.length(i),
3338 3338 r.linkrev(i),
3339 3339 shortfn(node),
3340 3340 shortfn(pp[0]),
3341 3341 shortfn(pp[1]),
3342 3342 )
3343 3343 )
3344 3344 else:
3345 3345 ui.write(
3346 3346 b"% 6d % 7d %s %s %s\n"
3347 3347 % (
3348 3348 i,
3349 3349 r.linkrev(i),
3350 3350 shortfn(node),
3351 3351 shortfn(pp[0]),
3352 3352 shortfn(pp[1]),
3353 3353 )
3354 3354 )
3355 3355 elif format == 1:
3356 3356 pr = r.parentrevs(i)
3357 3357 if ui.verbose:
3358 3358 ui.write(
3359 3359 b"% 6d %04x % 8d % 8d % 8d % 6d % 6d % 6d %s\n"
3360 3360 % (
3361 3361 i,
3362 3362 r.flags(i),
3363 3363 r.start(i),
3364 3364 r.length(i),
3365 3365 r.rawsize(i),
3366 3366 r.linkrev(i),
3367 3367 pr[0],
3368 3368 pr[1],
3369 3369 shortfn(node),
3370 3370 )
3371 3371 )
3372 3372 else:
3373 3373 ui.write(
3374 3374 b"% 6d %04x % 8d % 6d % 6d % 6d %s\n"
3375 3375 % (
3376 3376 i,
3377 3377 r.flags(i),
3378 3378 r.rawsize(i),
3379 3379 r.linkrev(i),
3380 3380 pr[0],
3381 3381 pr[1],
3382 3382 shortfn(node),
3383 3383 )
3384 3384 )
3385 3385
3386 3386
3387 3387 @command(
3388 3388 b'debugrevspec',
3389 3389 [
3390 3390 (
3391 3391 b'',
3392 3392 b'optimize',
3393 3393 None,
3394 3394 _(b'print parsed tree after optimizing (DEPRECATED)'),
3395 3395 ),
3396 3396 (
3397 3397 b'',
3398 3398 b'show-revs',
3399 3399 True,
3400 3400 _(b'print list of result revisions (default)'),
3401 3401 ),
3402 3402 (
3403 3403 b's',
3404 3404 b'show-set',
3405 3405 None,
3406 3406 _(b'print internal representation of result set'),
3407 3407 ),
3408 3408 (
3409 3409 b'p',
3410 3410 b'show-stage',
3411 3411 [],
3412 3412 _(b'print parsed tree at the given stage'),
3413 3413 _(b'NAME'),
3414 3414 ),
3415 3415 (b'', b'no-optimized', False, _(b'evaluate tree without optimization')),
3416 3416 (b'', b'verify-optimized', False, _(b'verify optimized result')),
3417 3417 ],
3418 3418 b'REVSPEC',
3419 3419 )
3420 3420 def debugrevspec(ui, repo, expr, **opts):
3421 3421 """parse and apply a revision specification
3422 3422
3423 3423 Use -p/--show-stage option to print the parsed tree at the given stages.
3424 3424 Use -p all to print tree at every stage.
3425 3425
3426 3426 Use --no-show-revs option with -s or -p to print only the set
3427 3427 representation or the parsed tree respectively.
3428 3428
3429 3429 Use --verify-optimized to compare the optimized result with the unoptimized
3430 3430 one. Returns 1 if the optimized result differs.
3431 3431 """
3432 3432 opts = pycompat.byteskwargs(opts)
3433 3433 aliases = ui.configitems(b'revsetalias')
3434 3434 stages = [
3435 3435 (b'parsed', lambda tree: tree),
3436 3436 (
3437 3437 b'expanded',
3438 3438 lambda tree: revsetlang.expandaliases(tree, aliases, ui.warn),
3439 3439 ),
3440 3440 (b'concatenated', revsetlang.foldconcat),
3441 3441 (b'analyzed', revsetlang.analyze),
3442 3442 (b'optimized', revsetlang.optimize),
3443 3443 ]
3444 3444 if opts[b'no_optimized']:
3445 3445 stages = stages[:-1]
3446 3446 if opts[b'verify_optimized'] and opts[b'no_optimized']:
3447 3447 raise error.Abort(
3448 3448 _(b'cannot use --verify-optimized with --no-optimized')
3449 3449 )
3450 3450 stagenames = {n for n, f in stages}
3451 3451
3452 3452 showalways = set()
3453 3453 showchanged = set()
3454 3454 if ui.verbose and not opts[b'show_stage']:
3455 3455 # show parsed tree by --verbose (deprecated)
3456 3456 showalways.add(b'parsed')
3457 3457 showchanged.update([b'expanded', b'concatenated'])
3458 3458 if opts[b'optimize']:
3459 3459 showalways.add(b'optimized')
3460 3460 if opts[b'show_stage'] and opts[b'optimize']:
3461 3461 raise error.Abort(_(b'cannot use --optimize with --show-stage'))
3462 3462 if opts[b'show_stage'] == [b'all']:
3463 3463 showalways.update(stagenames)
3464 3464 else:
3465 3465 for n in opts[b'show_stage']:
3466 3466 if n not in stagenames:
3467 3467 raise error.Abort(_(b'invalid stage name: %s') % n)
3468 3468 showalways.update(opts[b'show_stage'])
3469 3469
3470 3470 treebystage = {}
3471 3471 printedtree = None
3472 3472 tree = revsetlang.parse(expr, lookup=revset.lookupfn(repo))
3473 3473 for n, f in stages:
3474 3474 treebystage[n] = tree = f(tree)
3475 3475 if n in showalways or (n in showchanged and tree != printedtree):
3476 3476 if opts[b'show_stage'] or n != b'parsed':
3477 3477 ui.write(b"* %s:\n" % n)
3478 3478 ui.write(revsetlang.prettyformat(tree), b"\n")
3479 3479 printedtree = tree
3480 3480
3481 3481 if opts[b'verify_optimized']:
3482 3482 arevs = revset.makematcher(treebystage[b'analyzed'])(repo)
3483 3483 brevs = revset.makematcher(treebystage[b'optimized'])(repo)
3484 3484 if opts[b'show_set'] or (opts[b'show_set'] is None and ui.verbose):
3485 3485 ui.writenoi18n(
3486 3486 b"* analyzed set:\n", stringutil.prettyrepr(arevs), b"\n"
3487 3487 )
3488 3488 ui.writenoi18n(
3489 3489 b"* optimized set:\n", stringutil.prettyrepr(brevs), b"\n"
3490 3490 )
3491 3491 arevs = list(arevs)
3492 3492 brevs = list(brevs)
3493 3493 if arevs == brevs:
3494 3494 return 0
3495 3495 ui.writenoi18n(b'--- analyzed\n', label=b'diff.file_a')
3496 3496 ui.writenoi18n(b'+++ optimized\n', label=b'diff.file_b')
3497 3497 sm = difflib.SequenceMatcher(None, arevs, brevs)
3498 3498 for tag, alo, ahi, blo, bhi in sm.get_opcodes():
3499 3499 if tag in ('delete', 'replace'):
3500 3500 for c in arevs[alo:ahi]:
3501 3501 ui.write(b'-%d\n' % c, label=b'diff.deleted')
3502 3502 if tag in ('insert', 'replace'):
3503 3503 for c in brevs[blo:bhi]:
3504 3504 ui.write(b'+%d\n' % c, label=b'diff.inserted')
3505 3505 if tag == 'equal':
3506 3506 for c in arevs[alo:ahi]:
3507 3507 ui.write(b' %d\n' % c)
3508 3508 return 1
3509 3509
3510 3510 func = revset.makematcher(tree)
3511 3511 revs = func(repo)
3512 3512 if opts[b'show_set'] or (opts[b'show_set'] is None and ui.verbose):
3513 3513 ui.writenoi18n(b"* set:\n", stringutil.prettyrepr(revs), b"\n")
3514 3514 if not opts[b'show_revs']:
3515 3515 return
3516 3516 for c in revs:
3517 3517 ui.write(b"%d\n" % c)
3518 3518
3519 3519
3520 3520 @command(
3521 3521 b'debugserve',
3522 3522 [
3523 3523 (
3524 3524 b'',
3525 3525 b'sshstdio',
3526 3526 False,
3527 3527 _(b'run an SSH server bound to process handles'),
3528 3528 ),
3529 3529 (b'', b'logiofd', b'', _(b'file descriptor to log server I/O to')),
3530 3530 (b'', b'logiofile', b'', _(b'file to log server I/O to')),
3531 3531 ],
3532 3532 b'',
3533 3533 )
3534 3534 def debugserve(ui, repo, **opts):
3535 3535 """run a server with advanced settings
3536 3536
3537 3537 This command is similar to :hg:`serve`. It exists partially as a
3538 3538 workaround to the fact that ``hg serve --stdio`` must have specific
3539 3539 arguments for security reasons.
3540 3540 """
3541 3541 opts = pycompat.byteskwargs(opts)
3542 3542
3543 3543 if not opts[b'sshstdio']:
3544 3544 raise error.Abort(_(b'only --sshstdio is currently supported'))
3545 3545
3546 3546 logfh = None
3547 3547
3548 3548 if opts[b'logiofd'] and opts[b'logiofile']:
3549 3549 raise error.Abort(_(b'cannot use both --logiofd and --logiofile'))
3550 3550
3551 3551 if opts[b'logiofd']:
3552 3552 # Ideally we would be line buffered. But line buffering in binary
3553 3553 # mode isn't supported and emits a warning in Python 3.8+. Disabling
3554 3554 # buffering could have performance impacts. But since this isn't
3555 3555 # performance critical code, it should be fine.
3556 3556 try:
3557 3557 logfh = os.fdopen(int(opts[b'logiofd']), 'ab', 0)
3558 3558 except OSError as e:
3559 3559 if e.errno != errno.ESPIPE:
3560 3560 raise
3561 3561 # can't seek a pipe, so `ab` mode fails on py3
3562 3562 logfh = os.fdopen(int(opts[b'logiofd']), 'wb', 0)
3563 3563 elif opts[b'logiofile']:
3564 3564 logfh = open(opts[b'logiofile'], b'ab', 0)
3565 3565
3566 3566 s = wireprotoserver.sshserver(ui, repo, logfh=logfh)
3567 3567 s.serve_forever()
3568 3568
3569 3569
3570 3570 @command(b'debugsetparents', [], _(b'REV1 [REV2]'))
3571 3571 def debugsetparents(ui, repo, rev1, rev2=None):
3572 3572 """manually set the parents of the current working directory (DANGEROUS)
3573 3573
3574 3574 This command is not what you are looking for and should not be used. Using
3575 3575 this command will most certainly results in slight corruption of the file
3576 3576 level histories withing your repository. DO NOT USE THIS COMMAND.
3577 3577
3578 3578 The command update the p1 and p2 field in the dirstate, and not touching
3579 3579 anything else. This useful for writing repository conversion tools, but
3580 3580 should be used with extreme care. For example, neither the working
3581 3581 directory nor the dirstate is updated, so file status may be incorrect
3582 3582 after running this command. Only used if you are one of the few people that
3583 3583 deeply unstand both conversion tools and file level histories. If you are
3584 3584 reading this help, you are not one of this people (most of them sailed west
3585 3585 from Mithlond anyway.
3586 3586
3587 3587 So one last time DO NOT USE THIS COMMAND.
3588 3588
3589 3589 Returns 0 on success.
3590 3590 """
3591 3591
3592 3592 node1 = scmutil.revsingle(repo, rev1).node()
3593 3593 node2 = scmutil.revsingle(repo, rev2, b'null').node()
3594 3594
3595 3595 with repo.wlock():
3596 3596 repo.setparents(node1, node2)
3597 3597
3598 3598
3599 3599 @command(b'debugsidedata', cmdutil.debugrevlogopts, _(b'-c|-m|FILE REV'))
3600 3600 def debugsidedata(ui, repo, file_, rev=None, **opts):
3601 3601 """dump the side data for a cl/manifest/file revision
3602 3602
3603 3603 Use --verbose to dump the sidedata content."""
3604 3604 opts = pycompat.byteskwargs(opts)
3605 3605 if opts.get(b'changelog') or opts.get(b'manifest') or opts.get(b'dir'):
3606 3606 if rev is not None:
3607 3607 raise error.CommandError(b'debugdata', _(b'invalid arguments'))
3608 3608 file_, rev = None, file_
3609 3609 elif rev is None:
3610 3610 raise error.CommandError(b'debugdata', _(b'invalid arguments'))
3611 3611 r = cmdutil.openstorage(repo, b'debugdata', file_, opts)
3612 3612 r = getattr(r, '_revlog', r)
3613 3613 try:
3614 3614 sidedata = r.sidedata(r.lookup(rev))
3615 3615 except KeyError:
3616 3616 raise error.Abort(_(b'invalid revision identifier %s') % rev)
3617 3617 if sidedata:
3618 3618 sidedata = list(sidedata.items())
3619 3619 sidedata.sort()
3620 3620 ui.writenoi18n(b'%d sidedata entries\n' % len(sidedata))
3621 3621 for key, value in sidedata:
3622 3622 ui.writenoi18n(b' entry-%04o size %d\n' % (key, len(value)))
3623 3623 if ui.verbose:
3624 3624 ui.writenoi18n(b' %s\n' % stringutil.pprint(value))
3625 3625
3626 3626
3627 3627 @command(b'debugssl', [], b'[SOURCE]', optionalrepo=True)
3628 3628 def debugssl(ui, repo, source=None, **opts):
3629 3629 """test a secure connection to a server
3630 3630
3631 3631 This builds the certificate chain for the server on Windows, installing the
3632 3632 missing intermediates and trusted root via Windows Update if necessary. It
3633 3633 does nothing on other platforms.
3634 3634
3635 3635 If SOURCE is omitted, the 'default' path will be used. If a URL is given,
3636 3636 that server is used. See :hg:`help urls` for more information.
3637 3637
3638 3638 If the update succeeds, retry the original operation. Otherwise, the cause
3639 3639 of the SSL error is likely another issue.
3640 3640 """
3641 3641 if not pycompat.iswindows:
3642 3642 raise error.Abort(
3643 3643 _(b'certificate chain building is only possible on Windows')
3644 3644 )
3645 3645
3646 3646 if not source:
3647 3647 if not repo:
3648 3648 raise error.Abort(
3649 3649 _(
3650 3650 b"there is no Mercurial repository here, and no "
3651 3651 b"server specified"
3652 3652 )
3653 3653 )
3654 3654 source = b"default"
3655 3655
3656 3656 source, branches = urlutil.get_unique_pull_path(
3657 3657 b'debugssl', repo, ui, source
3658 3658 )
3659 3659 url = urlutil.url(source)
3660 3660
3661 3661 defaultport = {b'https': 443, b'ssh': 22}
3662 3662 if url.scheme in defaultport:
3663 3663 try:
3664 3664 addr = (url.host, int(url.port or defaultport[url.scheme]))
3665 3665 except ValueError:
3666 3666 raise error.Abort(_(b"malformed port number in URL"))
3667 3667 else:
3668 3668 raise error.Abort(_(b"only https and ssh connections are supported"))
3669 3669
3670 3670 from . import win32
3671 3671
3672 3672 s = ssl.wrap_socket(
3673 3673 socket.socket(),
3674 3674 ssl_version=ssl.PROTOCOL_TLS,
3675 3675 cert_reqs=ssl.CERT_NONE,
3676 3676 ca_certs=None,
3677 3677 )
3678 3678
3679 3679 try:
3680 3680 s.connect(addr)
3681 3681 cert = s.getpeercert(True)
3682 3682
3683 3683 ui.status(_(b'checking the certificate chain for %s\n') % url.host)
3684 3684
3685 3685 complete = win32.checkcertificatechain(cert, build=False)
3686 3686
3687 3687 if not complete:
3688 3688 ui.status(_(b'certificate chain is incomplete, updating... '))
3689 3689
3690 3690 if not win32.checkcertificatechain(cert):
3691 3691 ui.status(_(b'failed.\n'))
3692 3692 else:
3693 3693 ui.status(_(b'done.\n'))
3694 3694 else:
3695 3695 ui.status(_(b'full certificate chain is available\n'))
3696 3696 finally:
3697 3697 s.close()
3698 3698
3699 3699
3700 3700 @command(
3701 3701 b"debugbackupbundle",
3702 3702 [
3703 3703 (
3704 3704 b"",
3705 3705 b"recover",
3706 3706 b"",
3707 3707 b"brings the specified changeset back into the repository",
3708 3708 )
3709 3709 ]
3710 3710 + cmdutil.logopts,
3711 3711 _(b"hg debugbackupbundle [--recover HASH]"),
3712 3712 )
3713 3713 def debugbackupbundle(ui, repo, *pats, **opts):
3714 3714 """lists the changesets available in backup bundles
3715 3715
3716 3716 Without any arguments, this command prints a list of the changesets in each
3717 3717 backup bundle.
3718 3718
3719 3719 --recover takes a changeset hash and unbundles the first bundle that
3720 3720 contains that hash, which puts that changeset back in your repository.
3721 3721
3722 3722 --verbose will print the entire commit message and the bundle path for that
3723 3723 backup.
3724 3724 """
3725 3725 backups = list(
3726 3726 filter(
3727 3727 os.path.isfile, glob.glob(repo.vfs.join(b"strip-backup") + b"/*.hg")
3728 3728 )
3729 3729 )
3730 3730 backups.sort(key=lambda x: os.path.getmtime(x), reverse=True)
3731 3731
3732 3732 opts = pycompat.byteskwargs(opts)
3733 3733 opts[b"bundle"] = b""
3734 3734 opts[b"force"] = None
3735 3735 limit = logcmdutil.getlimit(opts)
3736 3736
3737 3737 def display(other, chlist, displayer):
3738 3738 if opts.get(b"newest_first"):
3739 3739 chlist.reverse()
3740 3740 count = 0
3741 3741 for n in chlist:
3742 3742 if limit is not None and count >= limit:
3743 3743 break
3744 3744 parents = [
3745 3745 True for p in other.changelog.parents(n) if p != repo.nullid
3746 3746 ]
3747 3747 if opts.get(b"no_merges") and len(parents) == 2:
3748 3748 continue
3749 3749 count += 1
3750 3750 displayer.show(other[n])
3751 3751
3752 3752 recovernode = opts.get(b"recover")
3753 3753 if recovernode:
3754 3754 if scmutil.isrevsymbol(repo, recovernode):
3755 3755 ui.warn(_(b"%s already exists in the repo\n") % recovernode)
3756 3756 return
3757 3757 elif backups:
3758 3758 msg = _(
3759 3759 b"Recover changesets using: hg debugbackupbundle --recover "
3760 3760 b"<changeset hash>\n\nAvailable backup changesets:"
3761 3761 )
3762 3762 ui.status(msg, label=b"status.removed")
3763 3763 else:
3764 3764 ui.status(_(b"no backup changesets found\n"))
3765 3765 return
3766 3766
3767 3767 for backup in backups:
3768 3768 # Much of this is copied from the hg incoming logic
3769 3769 source = os.path.relpath(backup, encoding.getcwd())
3770 3770 source, branches = urlutil.get_unique_pull_path(
3771 3771 b'debugbackupbundle',
3772 3772 repo,
3773 3773 ui,
3774 3774 source,
3775 3775 default_branches=opts.get(b'branch'),
3776 3776 )
3777 3777 try:
3778 3778 other = hg.peer(repo, opts, source)
3779 3779 except error.LookupError as ex:
3780 3780 msg = _(b"\nwarning: unable to open bundle %s") % source
3781 3781 hint = _(b"\n(missing parent rev %s)\n") % short(ex.name)
3782 3782 ui.warn(msg, hint=hint)
3783 3783 continue
3784 3784 revs, checkout = hg.addbranchrevs(
3785 3785 repo, other, branches, opts.get(b"rev")
3786 3786 )
3787 3787
3788 3788 if revs:
3789 3789 revs = [other.lookup(rev) for rev in revs]
3790 3790
3791 3791 quiet = ui.quiet
3792 3792 try:
3793 3793 ui.quiet = True
3794 3794 other, chlist, cleanupfn = bundlerepo.getremotechanges(
3795 3795 ui, repo, other, revs, opts[b"bundle"], opts[b"force"]
3796 3796 )
3797 3797 except error.LookupError:
3798 3798 continue
3799 3799 finally:
3800 3800 ui.quiet = quiet
3801 3801
3802 3802 try:
3803 3803 if not chlist:
3804 3804 continue
3805 3805 if recovernode:
3806 3806 with repo.lock(), repo.transaction(b"unbundle") as tr:
3807 3807 if scmutil.isrevsymbol(other, recovernode):
3808 3808 ui.status(_(b"Unbundling %s\n") % (recovernode))
3809 3809 f = hg.openpath(ui, source)
3810 3810 gen = exchange.readbundle(ui, f, source)
3811 3811 if isinstance(gen, bundle2.unbundle20):
3812 3812 bundle2.applybundle(
3813 3813 repo,
3814 3814 gen,
3815 3815 tr,
3816 3816 source=b"unbundle",
3817 3817 url=b"bundle:" + source,
3818 3818 )
3819 3819 else:
3820 3820 gen.apply(repo, b"unbundle", b"bundle:" + source)
3821 3821 break
3822 3822 else:
3823 3823 backupdate = encoding.strtolocal(
3824 3824 time.strftime(
3825 3825 "%a %H:%M, %Y-%m-%d",
3826 3826 time.localtime(os.path.getmtime(source)),
3827 3827 )
3828 3828 )
3829 3829 ui.status(b"\n%s\n" % (backupdate.ljust(50)))
3830 3830 if ui.verbose:
3831 3831 ui.status(b"%s%s\n" % (b"bundle:".ljust(13), source))
3832 3832 else:
3833 3833 opts[
3834 3834 b"template"
3835 3835 ] = b"{label('status.modified', node|short)} {desc|firstline}\n"
3836 3836 displayer = logcmdutil.changesetdisplayer(
3837 3837 ui, other, opts, False
3838 3838 )
3839 3839 display(other, chlist, displayer)
3840 3840 displayer.close()
3841 3841 finally:
3842 3842 cleanupfn()
3843 3843
3844 3844
3845 3845 @command(
3846 3846 b'debugsub',
3847 3847 [(b'r', b'rev', b'', _(b'revision to check'), _(b'REV'))],
3848 3848 _(b'[-r REV] [REV]'),
3849 3849 )
3850 3850 def debugsub(ui, repo, rev=None):
3851 3851 ctx = scmutil.revsingle(repo, rev, None)
3852 3852 for k, v in sorted(ctx.substate.items()):
3853 3853 ui.writenoi18n(b'path %s\n' % k)
3854 3854 ui.writenoi18n(b' source %s\n' % v[0])
3855 3855 ui.writenoi18n(b' revision %s\n' % v[1])
3856 3856
3857 3857
3858 3858 @command(b'debugshell', optionalrepo=True)
3859 3859 def debugshell(ui, repo):
3860 3860 """run an interactive Python interpreter
3861 3861
3862 3862 The local namespace is provided with a reference to the ui and
3863 3863 the repo instance (if available).
3864 3864 """
3865 3865 import code
3866 3866
3867 3867 imported_objects = {
3868 3868 'ui': ui,
3869 3869 'repo': repo,
3870 3870 }
3871 3871
3872 3872 code.interact(local=imported_objects)
3873 3873
3874 3874
3875 3875 @command(
3876 3876 b'debugsuccessorssets',
3877 3877 [(b'', b'closest', False, _(b'return closest successors sets only'))],
3878 3878 _(b'[REV]'),
3879 3879 )
3880 3880 def debugsuccessorssets(ui, repo, *revs, **opts):
3881 3881 """show set of successors for revision
3882 3882
3883 3883 A successors set of changeset A is a consistent group of revisions that
3884 3884 succeed A. It contains non-obsolete changesets only unless closests
3885 3885 successors set is set.
3886 3886
3887 3887 In most cases a changeset A has a single successors set containing a single
3888 3888 successor (changeset A replaced by A').
3889 3889
3890 3890 A changeset that is made obsolete with no successors are called "pruned".
3891 3891 Such changesets have no successors sets at all.
3892 3892
3893 3893 A changeset that has been "split" will have a successors set containing
3894 3894 more than one successor.
3895 3895
3896 3896 A changeset that has been rewritten in multiple different ways is called
3897 3897 "divergent". Such changesets have multiple successor sets (each of which
3898 3898 may also be split, i.e. have multiple successors).
3899 3899
3900 3900 Results are displayed as follows::
3901 3901
3902 3902 <rev1>
3903 3903 <successors-1A>
3904 3904 <rev2>
3905 3905 <successors-2A>
3906 3906 <successors-2B1> <successors-2B2> <successors-2B3>
3907 3907
3908 3908 Here rev2 has two possible (i.e. divergent) successors sets. The first
3909 3909 holds one element, whereas the second holds three (i.e. the changeset has
3910 3910 been split).
3911 3911 """
3912 3912 # passed to successorssets caching computation from one call to another
3913 3913 cache = {}
3914 3914 ctx2str = bytes
3915 3915 node2str = short
3916 3916 for rev in scmutil.revrange(repo, revs):
3917 3917 ctx = repo[rev]
3918 3918 ui.write(b'%s\n' % ctx2str(ctx))
3919 3919 for succsset in obsutil.successorssets(
3920 3920 repo, ctx.node(), closest=opts['closest'], cache=cache
3921 3921 ):
3922 3922 if succsset:
3923 3923 ui.write(b' ')
3924 3924 ui.write(node2str(succsset[0]))
3925 3925 for node in succsset[1:]:
3926 3926 ui.write(b' ')
3927 3927 ui.write(node2str(node))
3928 3928 ui.write(b'\n')
3929 3929
3930 3930
3931 3931 @command(b'debugtagscache', [])
3932 3932 def debugtagscache(ui, repo):
3933 3933 """display the contents of .hg/cache/hgtagsfnodes1"""
3934 3934 cache = tagsmod.hgtagsfnodescache(repo.unfiltered())
3935 3935 flog = repo.file(b'.hgtags')
3936 3936 for r in repo:
3937 3937 node = repo[r].node()
3938 3938 tagsnode = cache.getfnode(node, computemissing=False)
3939 3939 if tagsnode:
3940 3940 tagsnodedisplay = hex(tagsnode)
3941 3941 if not flog.hasnode(tagsnode):
3942 3942 tagsnodedisplay += b' (unknown node)'
3943 3943 elif tagsnode is None:
3944 3944 tagsnodedisplay = b'missing'
3945 3945 else:
3946 3946 tagsnodedisplay = b'invalid'
3947 3947
3948 3948 ui.write(b'%d %s %s\n' % (r, hex(node), tagsnodedisplay))
3949 3949
3950 3950
3951 3951 @command(
3952 3952 b'debugtemplate',
3953 3953 [
3954 3954 (b'r', b'rev', [], _(b'apply template on changesets'), _(b'REV')),
3955 3955 (b'D', b'define', [], _(b'define template keyword'), _(b'KEY=VALUE')),
3956 3956 ],
3957 3957 _(b'[-r REV]... [-D KEY=VALUE]... TEMPLATE'),
3958 3958 optionalrepo=True,
3959 3959 )
3960 3960 def debugtemplate(ui, repo, tmpl, **opts):
3961 3961 """parse and apply a template
3962 3962
3963 3963 If -r/--rev is given, the template is processed as a log template and
3964 3964 applied to the given changesets. Otherwise, it is processed as a generic
3965 3965 template.
3966 3966
3967 3967 Use --verbose to print the parsed tree.
3968 3968 """
3969 3969 revs = None
3970 3970 if opts['rev']:
3971 3971 if repo is None:
3972 3972 raise error.RepoError(
3973 3973 _(b'there is no Mercurial repository here (.hg not found)')
3974 3974 )
3975 3975 revs = scmutil.revrange(repo, opts['rev'])
3976 3976
3977 3977 props = {}
3978 3978 for d in opts['define']:
3979 3979 try:
3980 3980 k, v = (e.strip() for e in d.split(b'=', 1))
3981 3981 if not k or k == b'ui':
3982 3982 raise ValueError
3983 3983 props[k] = v
3984 3984 except ValueError:
3985 3985 raise error.Abort(_(b'malformed keyword definition: %s') % d)
3986 3986
3987 3987 if ui.verbose:
3988 3988 aliases = ui.configitems(b'templatealias')
3989 3989 tree = templater.parse(tmpl)
3990 3990 ui.note(templater.prettyformat(tree), b'\n')
3991 3991 newtree = templater.expandaliases(tree, aliases)
3992 3992 if newtree != tree:
3993 3993 ui.notenoi18n(
3994 3994 b"* expanded:\n", templater.prettyformat(newtree), b'\n'
3995 3995 )
3996 3996
3997 3997 if revs is None:
3998 3998 tres = formatter.templateresources(ui, repo)
3999 3999 t = formatter.maketemplater(ui, tmpl, resources=tres)
4000 4000 if ui.verbose:
4001 4001 kwds, funcs = t.symbolsuseddefault()
4002 4002 ui.writenoi18n(b"* keywords: %s\n" % b', '.join(sorted(kwds)))
4003 4003 ui.writenoi18n(b"* functions: %s\n" % b', '.join(sorted(funcs)))
4004 4004 ui.write(t.renderdefault(props))
4005 4005 else:
4006 4006 displayer = logcmdutil.maketemplater(ui, repo, tmpl)
4007 4007 if ui.verbose:
4008 4008 kwds, funcs = displayer.t.symbolsuseddefault()
4009 4009 ui.writenoi18n(b"* keywords: %s\n" % b', '.join(sorted(kwds)))
4010 4010 ui.writenoi18n(b"* functions: %s\n" % b', '.join(sorted(funcs)))
4011 4011 for r in revs:
4012 4012 displayer.show(repo[r], **pycompat.strkwargs(props))
4013 4013 displayer.close()
4014 4014
4015 4015
4016 4016 @command(
4017 4017 b'debuguigetpass',
4018 4018 [
4019 4019 (b'p', b'prompt', b'', _(b'prompt text'), _(b'TEXT')),
4020 4020 ],
4021 4021 _(b'[-p TEXT]'),
4022 4022 norepo=True,
4023 4023 )
4024 4024 def debuguigetpass(ui, prompt=b''):
4025 4025 """show prompt to type password"""
4026 4026 r = ui.getpass(prompt)
4027 4027 if r is None:
4028 4028 r = b"<default response>"
4029 4029 ui.writenoi18n(b'response: %s\n' % r)
4030 4030
4031 4031
4032 4032 @command(
4033 4033 b'debuguiprompt',
4034 4034 [
4035 4035 (b'p', b'prompt', b'', _(b'prompt text'), _(b'TEXT')),
4036 4036 ],
4037 4037 _(b'[-p TEXT]'),
4038 4038 norepo=True,
4039 4039 )
4040 4040 def debuguiprompt(ui, prompt=b''):
4041 4041 """show plain prompt"""
4042 4042 r = ui.prompt(prompt)
4043 4043 ui.writenoi18n(b'response: %s\n' % r)
4044 4044
4045 4045
4046 4046 @command(b'debugupdatecaches', [])
4047 4047 def debugupdatecaches(ui, repo, *pats, **opts):
4048 4048 """warm all known caches in the repository"""
4049 4049 with repo.wlock(), repo.lock():
4050 4050 repo.updatecaches(full=True)
4051 4051
4052 4052
4053 4053 @command(
4054 4054 b'debugupgraderepo',
4055 4055 [
4056 4056 (
4057 4057 b'o',
4058 4058 b'optimize',
4059 4059 [],
4060 4060 _(b'extra optimization to perform'),
4061 4061 _(b'NAME'),
4062 4062 ),
4063 4063 (b'', b'run', False, _(b'performs an upgrade')),
4064 4064 (b'', b'backup', True, _(b'keep the old repository content around')),
4065 4065 (b'', b'changelog', None, _(b'select the changelog for upgrade')),
4066 4066 (b'', b'manifest', None, _(b'select the manifest for upgrade')),
4067 4067 (b'', b'filelogs', None, _(b'select all filelogs for upgrade')),
4068 4068 ],
4069 4069 )
4070 4070 def debugupgraderepo(ui, repo, run=False, optimize=None, backup=True, **opts):
4071 4071 """upgrade a repository to use different features
4072 4072
4073 4073 If no arguments are specified, the repository is evaluated for upgrade
4074 4074 and a list of problems and potential optimizations is printed.
4075 4075
4076 4076 With ``--run``, a repository upgrade is performed. Behavior of the upgrade
4077 4077 can be influenced via additional arguments. More details will be provided
4078 4078 by the command output when run without ``--run``.
4079 4079
4080 4080 During the upgrade, the repository will be locked and no writes will be
4081 4081 allowed.
4082 4082
4083 4083 At the end of the upgrade, the repository may not be readable while new
4084 4084 repository data is swapped in. This window will be as long as it takes to
4085 4085 rename some directories inside the ``.hg`` directory. On most machines, this
4086 4086 should complete almost instantaneously and the chances of a consumer being
4087 4087 unable to access the repository should be low.
4088 4088
4089 4089 By default, all revlogs will be upgraded. You can restrict this using flags
4090 4090 such as `--manifest`:
4091 4091
4092 4092 * `--manifest`: only optimize the manifest
4093 4093 * `--no-manifest`: optimize all revlog but the manifest
4094 4094 * `--changelog`: optimize the changelog only
4095 4095 * `--no-changelog --no-manifest`: optimize filelogs only
4096 4096 * `--filelogs`: optimize the filelogs only
4097 4097 * `--no-changelog --no-manifest --no-filelogs`: skip all revlog optimizations
4098 4098 """
4099 4099 return upgrade.upgraderepo(
4100 4100 ui, repo, run=run, optimize=set(optimize), backup=backup, **opts
4101 4101 )
4102 4102
4103 4103
4104 4104 @command(
4105 4105 b'debugwalk', cmdutil.walkopts, _(b'[OPTION]... [FILE]...'), inferrepo=True
4106 4106 )
4107 4107 def debugwalk(ui, repo, *pats, **opts):
4108 4108 """show how files match on given patterns"""
4109 4109 opts = pycompat.byteskwargs(opts)
4110 4110 m = scmutil.match(repo[None], pats, opts)
4111 4111 if ui.verbose:
4112 4112 ui.writenoi18n(b'* matcher:\n', stringutil.prettyrepr(m), b'\n')
4113 4113 items = list(repo[None].walk(m))
4114 4114 if not items:
4115 4115 return
4116 4116 f = lambda fn: fn
4117 4117 if ui.configbool(b'ui', b'slash') and pycompat.ossep != b'/':
4118 4118 f = lambda fn: util.normpath(fn)
4119 4119 fmt = b'f %%-%ds %%-%ds %%s' % (
4120 4120 max([len(abs) for abs in items]),
4121 4121 max([len(repo.pathto(abs)) for abs in items]),
4122 4122 )
4123 4123 for abs in items:
4124 4124 line = fmt % (
4125 4125 abs,
4126 4126 f(repo.pathto(abs)),
4127 4127 m.exact(abs) and b'exact' or b'',
4128 4128 )
4129 4129 ui.write(b"%s\n" % line.rstrip())
4130 4130
4131 4131
4132 4132 @command(b'debugwhyunstable', [], _(b'REV'))
4133 4133 def debugwhyunstable(ui, repo, rev):
4134 4134 """explain instabilities of a changeset"""
4135 4135 for entry in obsutil.whyunstable(repo, scmutil.revsingle(repo, rev)):
4136 4136 dnodes = b''
4137 4137 if entry.get(b'divergentnodes'):
4138 4138 dnodes = (
4139 4139 b' '.join(
4140 4140 b'%s (%s)' % (ctx.hex(), ctx.phasestr())
4141 4141 for ctx in entry[b'divergentnodes']
4142 4142 )
4143 4143 + b' '
4144 4144 )
4145 4145 ui.write(
4146 4146 b'%s: %s%s %s\n'
4147 4147 % (entry[b'instability'], dnodes, entry[b'reason'], entry[b'node'])
4148 4148 )
4149 4149
4150 4150
4151 4151 @command(
4152 4152 b'debugwireargs',
4153 4153 [
4154 4154 (b'', b'three', b'', b'three'),
4155 4155 (b'', b'four', b'', b'four'),
4156 4156 (b'', b'five', b'', b'five'),
4157 4157 ]
4158 4158 + cmdutil.remoteopts,
4159 4159 _(b'REPO [OPTIONS]... [ONE [TWO]]'),
4160 4160 norepo=True,
4161 4161 )
4162 4162 def debugwireargs(ui, repopath, *vals, **opts):
4163 4163 opts = pycompat.byteskwargs(opts)
4164 4164 repo = hg.peer(ui, opts, repopath)
4165 4165 try:
4166 4166 for opt in cmdutil.remoteopts:
4167 4167 del opts[opt[1]]
4168 4168 args = {}
4169 4169 for k, v in pycompat.iteritems(opts):
4170 4170 if v:
4171 4171 args[k] = v
4172 4172 args = pycompat.strkwargs(args)
4173 4173 # run twice to check that we don't mess up the stream for the next command
4174 4174 res1 = repo.debugwireargs(*vals, **args)
4175 4175 res2 = repo.debugwireargs(*vals, **args)
4176 4176 ui.write(b"%s\n" % res1)
4177 4177 if res1 != res2:
4178 4178 ui.warn(b"%s\n" % res2)
4179 4179 finally:
4180 4180 repo.close()
4181 4181
4182 4182
4183 4183 def _parsewirelangblocks(fh):
4184 4184 activeaction = None
4185 4185 blocklines = []
4186 4186 lastindent = 0
4187 4187
4188 4188 for line in fh:
4189 4189 line = line.rstrip()
4190 4190 if not line:
4191 4191 continue
4192 4192
4193 4193 if line.startswith(b'#'):
4194 4194 continue
4195 4195
4196 4196 if not line.startswith(b' '):
4197 4197 # New block. Flush previous one.
4198 4198 if activeaction:
4199 4199 yield activeaction, blocklines
4200 4200
4201 4201 activeaction = line
4202 4202 blocklines = []
4203 4203 lastindent = 0
4204 4204 continue
4205 4205
4206 4206 # Else we start with an indent.
4207 4207
4208 4208 if not activeaction:
4209 4209 raise error.Abort(_(b'indented line outside of block'))
4210 4210
4211 4211 indent = len(line) - len(line.lstrip())
4212 4212
4213 4213 # If this line is indented more than the last line, concatenate it.
4214 4214 if indent > lastindent and blocklines:
4215 4215 blocklines[-1] += line.lstrip()
4216 4216 else:
4217 4217 blocklines.append(line)
4218 4218 lastindent = indent
4219 4219
4220 4220 # Flush last block.
4221 4221 if activeaction:
4222 4222 yield activeaction, blocklines
4223 4223
4224 4224
4225 4225 @command(
4226 4226 b'debugwireproto',
4227 4227 [
4228 4228 (b'', b'localssh', False, _(b'start an SSH server for this repo')),
4229 4229 (b'', b'peer', b'', _(b'construct a specific version of the peer')),
4230 4230 (
4231 4231 b'',
4232 4232 b'noreadstderr',
4233 4233 False,
4234 4234 _(b'do not read from stderr of the remote'),
4235 4235 ),
4236 4236 (
4237 4237 b'',
4238 4238 b'nologhandshake',
4239 4239 False,
4240 4240 _(b'do not log I/O related to the peer handshake'),
4241 4241 ),
4242 4242 ]
4243 4243 + cmdutil.remoteopts,
4244 4244 _(b'[PATH]'),
4245 4245 optionalrepo=True,
4246 4246 )
4247 4247 def debugwireproto(ui, repo, path=None, **opts):
4248 4248 """send wire protocol commands to a server
4249 4249
4250 4250 This command can be used to issue wire protocol commands to remote
4251 4251 peers and to debug the raw data being exchanged.
4252 4252
4253 4253 ``--localssh`` will start an SSH server against the current repository
4254 4254 and connect to that. By default, the connection will perform a handshake
4255 4255 and establish an appropriate peer instance.
4256 4256
4257 4257 ``--peer`` can be used to bypass the handshake protocol and construct a
4258 4258 peer instance using the specified class type. Valid values are ``raw``,
4259 4259 ``http2``, ``ssh1``, and ``ssh2``. ``raw`` instances only allow sending
4260 4260 raw data payloads and don't support higher-level command actions.
4261 4261
4262 4262 ``--noreadstderr`` can be used to disable automatic reading from stderr
4263 4263 of the peer (for SSH connections only). Disabling automatic reading of
4264 4264 stderr is useful for making output more deterministic.
4265 4265
4266 4266 Commands are issued via a mini language which is specified via stdin.
4267 4267 The language consists of individual actions to perform. An action is
4268 4268 defined by a block. A block is defined as a line with no leading
4269 4269 space followed by 0 or more lines with leading space. Blocks are
4270 4270 effectively a high-level command with additional metadata.
4271 4271
4272 4272 Lines beginning with ``#`` are ignored.
4273 4273
4274 4274 The following sections denote available actions.
4275 4275
4276 4276 raw
4277 4277 ---
4278 4278
4279 4279 Send raw data to the server.
4280 4280
4281 4281 The block payload contains the raw data to send as one atomic send
4282 4282 operation. The data may not actually be delivered in a single system
4283 4283 call: it depends on the abilities of the transport being used.
4284 4284
4285 4285 Each line in the block is de-indented and concatenated. Then, that
4286 4286 value is evaluated as a Python b'' literal. This allows the use of
4287 4287 backslash escaping, etc.
4288 4288
4289 4289 raw+
4290 4290 ----
4291 4291
4292 4292 Behaves like ``raw`` except flushes output afterwards.
4293 4293
4294 4294 command <X>
4295 4295 -----------
4296 4296
4297 4297 Send a request to run a named command, whose name follows the ``command``
4298 4298 string.
4299 4299
4300 4300 Arguments to the command are defined as lines in this block. The format of
4301 4301 each line is ``<key> <value>``. e.g.::
4302 4302
4303 4303 command listkeys
4304 4304 namespace bookmarks
4305 4305
4306 4306 If the value begins with ``eval:``, it will be interpreted as a Python
4307 4307 literal expression. Otherwise values are interpreted as Python b'' literals.
4308 4308 This allows sending complex types and encoding special byte sequences via
4309 4309 backslash escaping.
4310 4310
4311 4311 The following arguments have special meaning:
4312 4312
4313 4313 ``PUSHFILE``
4314 4314 When defined, the *push* mechanism of the peer will be used instead
4315 4315 of the static request-response mechanism and the content of the
4316 4316 file specified in the value of this argument will be sent as the
4317 4317 command payload.
4318 4318
4319 4319 This can be used to submit a local bundle file to the remote.
4320 4320
4321 4321 batchbegin
4322 4322 ----------
4323 4323
4324 4324 Instruct the peer to begin a batched send.
4325 4325
4326 4326 All ``command`` blocks are queued for execution until the next
4327 4327 ``batchsubmit`` block.
4328 4328
4329 4329 batchsubmit
4330 4330 -----------
4331 4331
4332 4332 Submit previously queued ``command`` blocks as a batch request.
4333 4333
4334 4334 This action MUST be paired with a ``batchbegin`` action.
4335 4335
4336 4336 httprequest <method> <path>
4337 4337 ---------------------------
4338 4338
4339 4339 (HTTP peer only)
4340 4340
4341 4341 Send an HTTP request to the peer.
4342 4342
4343 4343 The HTTP request line follows the ``httprequest`` action. e.g. ``GET /foo``.
4344 4344
4345 4345 Arguments of the form ``<key>: <value>`` are interpreted as HTTP request
4346 4346 headers to add to the request. e.g. ``Accept: foo``.
4347 4347
4348 4348 The following arguments are special:
4349 4349
4350 4350 ``BODYFILE``
4351 4351 The content of the file defined as the value to this argument will be
4352 4352 transferred verbatim as the HTTP request body.
4353 4353
4354 4354 ``frame <type> <flags> <payload>``
4355 4355 Send a unified protocol frame as part of the request body.
4356 4356
4357 4357 All frames will be collected and sent as the body to the HTTP
4358 4358 request.
4359 4359
4360 4360 close
4361 4361 -----
4362 4362
4363 4363 Close the connection to the server.
4364 4364
4365 4365 flush
4366 4366 -----
4367 4367
4368 4368 Flush data written to the server.
4369 4369
4370 4370 readavailable
4371 4371 -------------
4372 4372
4373 4373 Close the write end of the connection and read all available data from
4374 4374 the server.
4375 4375
4376 4376 If the connection to the server encompasses multiple pipes, we poll both
4377 4377 pipes and read available data.
4378 4378
4379 4379 readline
4380 4380 --------
4381 4381
4382 4382 Read a line of output from the server. If there are multiple output
4383 4383 pipes, reads only the main pipe.
4384 4384
4385 4385 ereadline
4386 4386 ---------
4387 4387
4388 4388 Like ``readline``, but read from the stderr pipe, if available.
4389 4389
4390 4390 read <X>
4391 4391 --------
4392 4392
4393 4393 ``read()`` N bytes from the server's main output pipe.
4394 4394
4395 4395 eread <X>
4396 4396 ---------
4397 4397
4398 4398 ``read()`` N bytes from the server's stderr pipe, if available.
4399 4399
4400 4400 Specifying Unified Frame-Based Protocol Frames
4401 4401 ----------------------------------------------
4402 4402
4403 4403 It is possible to emit a *Unified Frame-Based Protocol* by using special
4404 4404 syntax.
4405 4405
4406 4406 A frame is composed as a type, flags, and payload. These can be parsed
4407 4407 from a string of the form:
4408 4408
4409 4409 <request-id> <stream-id> <stream-flags> <type> <flags> <payload>
4410 4410
4411 4411 ``request-id`` and ``stream-id`` are integers defining the request and
4412 4412 stream identifiers.
4413 4413
4414 4414 ``type`` can be an integer value for the frame type or the string name
4415 4415 of the type. The strings are defined in ``wireprotoframing.py``. e.g.
4416 4416 ``command-name``.
4417 4417
4418 4418 ``stream-flags`` and ``flags`` are a ``|`` delimited list of flag
4419 4419 components. Each component (and there can be just one) can be an integer
4420 4420 or a flag name for stream flags or frame flags, respectively. Values are
4421 4421 resolved to integers and then bitwise OR'd together.
4422 4422
4423 4423 ``payload`` represents the raw frame payload. If it begins with
4424 4424 ``cbor:``, the following string is evaluated as Python code and the
4425 4425 resulting object is fed into a CBOR encoder. Otherwise it is interpreted
4426 4426 as a Python byte string literal.
4427 4427 """
4428 4428 opts = pycompat.byteskwargs(opts)
4429 4429
4430 4430 if opts[b'localssh'] and not repo:
4431 4431 raise error.Abort(_(b'--localssh requires a repository'))
4432 4432
4433 4433 if opts[b'peer'] and opts[b'peer'] not in (
4434 4434 b'raw',
4435 4435 b'http2',
4436 4436 b'ssh1',
4437 4437 b'ssh2',
4438 4438 ):
4439 4439 raise error.Abort(
4440 4440 _(b'invalid value for --peer'),
4441 4441 hint=_(b'valid values are "raw", "ssh1", and "ssh2"'),
4442 4442 )
4443 4443
4444 4444 if path and opts[b'localssh']:
4445 4445 raise error.Abort(_(b'cannot specify --localssh with an explicit path'))
4446 4446
4447 4447 if ui.interactive():
4448 4448 ui.write(_(b'(waiting for commands on stdin)\n'))
4449 4449
4450 4450 blocks = list(_parsewirelangblocks(ui.fin))
4451 4451
4452 4452 proc = None
4453 4453 stdin = None
4454 4454 stdout = None
4455 4455 stderr = None
4456 4456 opener = None
4457 4457
4458 4458 if opts[b'localssh']:
4459 4459 # We start the SSH server in its own process so there is process
4460 4460 # separation. This prevents a whole class of potential bugs around
4461 4461 # shared state from interfering with server operation.
4462 4462 args = procutil.hgcmd() + [
4463 4463 b'-R',
4464 4464 repo.root,
4465 4465 b'debugserve',
4466 4466 b'--sshstdio',
4467 4467 ]
4468 4468 proc = subprocess.Popen(
4469 4469 pycompat.rapply(procutil.tonativestr, args),
4470 4470 stdin=subprocess.PIPE,
4471 4471 stdout=subprocess.PIPE,
4472 4472 stderr=subprocess.PIPE,
4473 4473 bufsize=0,
4474 4474 )
4475 4475
4476 4476 stdin = proc.stdin
4477 4477 stdout = proc.stdout
4478 4478 stderr = proc.stderr
4479 4479
4480 4480 # We turn the pipes into observers so we can log I/O.
4481 4481 if ui.verbose or opts[b'peer'] == b'raw':
4482 4482 stdin = util.makeloggingfileobject(
4483 4483 ui, proc.stdin, b'i', logdata=True
4484 4484 )
4485 4485 stdout = util.makeloggingfileobject(
4486 4486 ui, proc.stdout, b'o', logdata=True
4487 4487 )
4488 4488 stderr = util.makeloggingfileobject(
4489 4489 ui, proc.stderr, b'e', logdata=True
4490 4490 )
4491 4491
4492 4492 # --localssh also implies the peer connection settings.
4493 4493
4494 4494 url = b'ssh://localserver'
4495 4495 autoreadstderr = not opts[b'noreadstderr']
4496 4496
4497 4497 if opts[b'peer'] == b'ssh1':
4498 4498 ui.write(_(b'creating ssh peer for wire protocol version 1\n'))
4499 4499 peer = sshpeer.sshv1peer(
4500 4500 ui,
4501 4501 url,
4502 4502 proc,
4503 4503 stdin,
4504 4504 stdout,
4505 4505 stderr,
4506 4506 None,
4507 4507 autoreadstderr=autoreadstderr,
4508 4508 )
4509 4509 elif opts[b'peer'] == b'ssh2':
4510 4510 ui.write(_(b'creating ssh peer for wire protocol version 2\n'))
4511 4511 peer = sshpeer.sshv2peer(
4512 4512 ui,
4513 4513 url,
4514 4514 proc,
4515 4515 stdin,
4516 4516 stdout,
4517 4517 stderr,
4518 4518 None,
4519 4519 autoreadstderr=autoreadstderr,
4520 4520 )
4521 4521 elif opts[b'peer'] == b'raw':
4522 4522 ui.write(_(b'using raw connection to peer\n'))
4523 4523 peer = None
4524 4524 else:
4525 4525 ui.write(_(b'creating ssh peer from handshake results\n'))
4526 4526 peer = sshpeer.makepeer(
4527 4527 ui,
4528 4528 url,
4529 4529 proc,
4530 4530 stdin,
4531 4531 stdout,
4532 4532 stderr,
4533 4533 autoreadstderr=autoreadstderr,
4534 4534 )
4535 4535
4536 4536 elif path:
4537 4537 # We bypass hg.peer() so we can proxy the sockets.
4538 4538 # TODO consider not doing this because we skip
4539 4539 # ``hg.wirepeersetupfuncs`` and potentially other useful functionality.
4540 4540 u = urlutil.url(path)
4541 4541 if u.scheme != b'http':
4542 4542 raise error.Abort(_(b'only http:// paths are currently supported'))
4543 4543
4544 4544 url, authinfo = u.authinfo()
4545 4545 openerargs = {
4546 4546 'useragent': b'Mercurial debugwireproto',
4547 4547 }
4548 4548
4549 4549 # Turn pipes/sockets into observers so we can log I/O.
4550 4550 if ui.verbose:
4551 4551 openerargs.update(
4552 4552 {
4553 4553 'loggingfh': ui,
4554 4554 'loggingname': b's',
4555 4555 'loggingopts': {
4556 4556 'logdata': True,
4557 4557 'logdataapis': False,
4558 4558 },
4559 4559 }
4560 4560 )
4561 4561
4562 4562 if ui.debugflag:
4563 4563 openerargs['loggingopts']['logdataapis'] = True
4564 4564
4565 4565 # Don't send default headers when in raw mode. This allows us to
4566 4566 # bypass most of the behavior of our URL handling code so we can
4567 4567 # have near complete control over what's sent on the wire.
4568 4568 if opts[b'peer'] == b'raw':
4569 4569 openerargs['sendaccept'] = False
4570 4570
4571 4571 opener = urlmod.opener(ui, authinfo, **openerargs)
4572 4572
4573 4573 if opts[b'peer'] == b'http2':
4574 4574 ui.write(_(b'creating http peer for wire protocol version 2\n'))
4575 4575 # We go through makepeer() because we need an API descriptor for
4576 4576 # the peer instance to be useful.
4577 4577 with ui.configoverride(
4578 4578 {(b'experimental', b'httppeer.advertise-v2'): True}
4579 4579 ):
4580 4580 if opts[b'nologhandshake']:
4581 4581 ui.pushbuffer()
4582 4582
4583 4583 peer = httppeer.makepeer(ui, path, opener=opener)
4584 4584
4585 4585 if opts[b'nologhandshake']:
4586 4586 ui.popbuffer()
4587 4587
4588 4588 if not isinstance(peer, httppeer.httpv2peer):
4589 4589 raise error.Abort(
4590 4590 _(
4591 4591 b'could not instantiate HTTP peer for '
4592 4592 b'wire protocol version 2'
4593 4593 ),
4594 4594 hint=_(
4595 4595 b'the server may not have the feature '
4596 4596 b'enabled or is not allowing this '
4597 4597 b'client version'
4598 4598 ),
4599 4599 )
4600 4600
4601 4601 elif opts[b'peer'] == b'raw':
4602 4602 ui.write(_(b'using raw connection to peer\n'))
4603 4603 peer = None
4604 4604 elif opts[b'peer']:
4605 4605 raise error.Abort(
4606 4606 _(b'--peer %s not supported with HTTP peers') % opts[b'peer']
4607 4607 )
4608 4608 else:
4609 4609 peer = httppeer.makepeer(ui, path, opener=opener)
4610 4610
4611 4611 # We /could/ populate stdin/stdout with sock.makefile()...
4612 4612 else:
4613 4613 raise error.Abort(_(b'unsupported connection configuration'))
4614 4614
4615 4615 batchedcommands = None
4616 4616
4617 4617 # Now perform actions based on the parsed wire language instructions.
4618 4618 for action, lines in blocks:
4619 4619 if action in (b'raw', b'raw+'):
4620 4620 if not stdin:
4621 4621 raise error.Abort(_(b'cannot call raw/raw+ on this peer'))
4622 4622
4623 4623 # Concatenate the data together.
4624 4624 data = b''.join(l.lstrip() for l in lines)
4625 4625 data = stringutil.unescapestr(data)
4626 4626 stdin.write(data)
4627 4627
4628 4628 if action == b'raw+':
4629 4629 stdin.flush()
4630 4630 elif action == b'flush':
4631 4631 if not stdin:
4632 4632 raise error.Abort(_(b'cannot call flush on this peer'))
4633 4633 stdin.flush()
4634 4634 elif action.startswith(b'command'):
4635 4635 if not peer:
4636 4636 raise error.Abort(
4637 4637 _(
4638 4638 b'cannot send commands unless peer instance '
4639 4639 b'is available'
4640 4640 )
4641 4641 )
4642 4642
4643 4643 command = action.split(b' ', 1)[1]
4644 4644
4645 4645 args = {}
4646 4646 for line in lines:
4647 4647 # We need to allow empty values.
4648 4648 fields = line.lstrip().split(b' ', 1)
4649 4649 if len(fields) == 1:
4650 4650 key = fields[0]
4651 4651 value = b''
4652 4652 else:
4653 4653 key, value = fields
4654 4654
4655 4655 if value.startswith(b'eval:'):
4656 4656 value = stringutil.evalpythonliteral(value[5:])
4657 4657 else:
4658 4658 value = stringutil.unescapestr(value)
4659 4659
4660 4660 args[key] = value
4661 4661
4662 4662 if batchedcommands is not None:
4663 4663 batchedcommands.append((command, args))
4664 4664 continue
4665 4665
4666 4666 ui.status(_(b'sending %s command\n') % command)
4667 4667
4668 4668 if b'PUSHFILE' in args:
4669 4669 with open(args[b'PUSHFILE'], 'rb') as fh:
4670 4670 del args[b'PUSHFILE']
4671 4671 res, output = peer._callpush(
4672 4672 command, fh, **pycompat.strkwargs(args)
4673 4673 )
4674 4674 ui.status(_(b'result: %s\n') % stringutil.escapestr(res))
4675 4675 ui.status(
4676 4676 _(b'remote output: %s\n') % stringutil.escapestr(output)
4677 4677 )
4678 4678 else:
4679 4679 with peer.commandexecutor() as e:
4680 4680 res = e.callcommand(command, args).result()
4681 4681
4682 4682 if isinstance(res, wireprotov2peer.commandresponse):
4683 4683 val = res.objects()
4684 4684 ui.status(
4685 4685 _(b'response: %s\n')
4686 4686 % stringutil.pprint(val, bprefix=True, indent=2)
4687 4687 )
4688 4688 else:
4689 4689 ui.status(
4690 4690 _(b'response: %s\n')
4691 4691 % stringutil.pprint(res, bprefix=True, indent=2)
4692 4692 )
4693 4693
4694 4694 elif action == b'batchbegin':
4695 4695 if batchedcommands is not None:
4696 4696 raise error.Abort(_(b'nested batchbegin not allowed'))
4697 4697
4698 4698 batchedcommands = []
4699 4699 elif action == b'batchsubmit':
4700 4700 # There is a batching API we could go through. But it would be
4701 4701 # difficult to normalize requests into function calls. It is easier
4702 4702 # to bypass this layer and normalize to commands + args.
4703 4703 ui.status(
4704 4704 _(b'sending batch with %d sub-commands\n')
4705 4705 % len(batchedcommands)
4706 4706 )
4707 4707 assert peer is not None
4708 4708 for i, chunk in enumerate(peer._submitbatch(batchedcommands)):
4709 4709 ui.status(
4710 4710 _(b'response #%d: %s\n') % (i, stringutil.escapestr(chunk))
4711 4711 )
4712 4712
4713 4713 batchedcommands = None
4714 4714
4715 4715 elif action.startswith(b'httprequest '):
4716 4716 if not opener:
4717 4717 raise error.Abort(
4718 4718 _(b'cannot use httprequest without an HTTP peer')
4719 4719 )
4720 4720
4721 4721 request = action.split(b' ', 2)
4722 4722 if len(request) != 3:
4723 4723 raise error.Abort(
4724 4724 _(
4725 4725 b'invalid httprequest: expected format is '
4726 4726 b'"httprequest <method> <path>'
4727 4727 )
4728 4728 )
4729 4729
4730 4730 method, httppath = request[1:]
4731 4731 headers = {}
4732 4732 body = None
4733 4733 frames = []
4734 4734 for line in lines:
4735 4735 line = line.lstrip()
4736 4736 m = re.match(b'^([a-zA-Z0-9_-]+): (.*)$', line)
4737 4737 if m:
4738 4738 # Headers need to use native strings.
4739 4739 key = pycompat.strurl(m.group(1))
4740 4740 value = pycompat.strurl(m.group(2))
4741 4741 headers[key] = value
4742 4742 continue
4743 4743
4744 4744 if line.startswith(b'BODYFILE '):
4745 4745 with open(line.split(b' ', 1), b'rb') as fh:
4746 4746 body = fh.read()
4747 4747 elif line.startswith(b'frame '):
4748 4748 frame = wireprotoframing.makeframefromhumanstring(
4749 4749 line[len(b'frame ') :]
4750 4750 )
4751 4751
4752 4752 frames.append(frame)
4753 4753 else:
4754 4754 raise error.Abort(
4755 4755 _(b'unknown argument to httprequest: %s') % line
4756 4756 )
4757 4757
4758 4758 url = path + httppath
4759 4759
4760 4760 if frames:
4761 4761 body = b''.join(bytes(f) for f in frames)
4762 4762
4763 4763 req = urlmod.urlreq.request(pycompat.strurl(url), body, headers)
4764 4764
4765 4765 # urllib.Request insists on using has_data() as a proxy for
4766 4766 # determining the request method. Override that to use our
4767 4767 # explicitly requested method.
4768 4768 req.get_method = lambda: pycompat.sysstr(method)
4769 4769
4770 4770 try:
4771 4771 res = opener.open(req)
4772 4772 body = res.read()
4773 4773 except util.urlerr.urlerror as e:
4774 4774 # read() method must be called, but only exists in Python 2
4775 4775 getattr(e, 'read', lambda: None)()
4776 4776 continue
4777 4777
4778 4778 ct = res.headers.get('Content-Type')
4779 4779 if ct == 'application/mercurial-cbor':
4780 4780 ui.write(
4781 4781 _(b'cbor> %s\n')
4782 4782 % stringutil.pprint(
4783 4783 cborutil.decodeall(body), bprefix=True, indent=2
4784 4784 )
4785 4785 )
4786 4786
4787 4787 elif action == b'close':
4788 4788 assert peer is not None
4789 4789 peer.close()
4790 4790 elif action == b'readavailable':
4791 4791 if not stdout or not stderr:
4792 4792 raise error.Abort(
4793 4793 _(b'readavailable not available on this peer')
4794 4794 )
4795 4795
4796 4796 stdin.close()
4797 4797 stdout.read()
4798 4798 stderr.read()
4799 4799
4800 4800 elif action == b'readline':
4801 4801 if not stdout:
4802 4802 raise error.Abort(_(b'readline not available on this peer'))
4803 4803 stdout.readline()
4804 4804 elif action == b'ereadline':
4805 4805 if not stderr:
4806 4806 raise error.Abort(_(b'ereadline not available on this peer'))
4807 4807 stderr.readline()
4808 4808 elif action.startswith(b'read '):
4809 4809 count = int(action.split(b' ', 1)[1])
4810 4810 if not stdout:
4811 4811 raise error.Abort(_(b'read not available on this peer'))
4812 4812 stdout.read(count)
4813 4813 elif action.startswith(b'eread '):
4814 4814 count = int(action.split(b' ', 1)[1])
4815 4815 if not stderr:
4816 4816 raise error.Abort(_(b'eread not available on this peer'))
4817 4817 stderr.read(count)
4818 4818 else:
4819 4819 raise error.Abort(_(b'unknown action: %s') % action)
4820 4820
4821 4821 if batchedcommands is not None:
4822 4822 raise error.Abort(_(b'unclosed "batchbegin" request'))
4823 4823
4824 4824 if peer:
4825 4825 peer.close()
4826 4826
4827 4827 if proc:
4828 4828 proc.kill()
@@ -1,2030 +1,2023
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_HASCOPIESINFO = 1 << 12
31 31
32 32 REVISION_FLAGS_KNOWN = (
33 33 REVISION_FLAG_CENSORED
34 34 | REVISION_FLAG_ELLIPSIS
35 35 | REVISION_FLAG_EXTSTORED
36 36 | REVISION_FLAG_HASCOPIESINFO
37 37 )
38 38
39 39 CG_DELTAMODE_STD = b'default'
40 40 CG_DELTAMODE_PREV = b'previous'
41 41 CG_DELTAMODE_FULL = b'fulltext'
42 42 CG_DELTAMODE_P1 = b'p1'
43 43
44 44
45 45 class ipeerconnection(interfaceutil.Interface):
46 46 """Represents a "connection" to a repository.
47 47
48 48 This is the base interface for representing a connection to a repository.
49 49 It holds basic properties and methods applicable to all peer types.
50 50
51 51 This is not a complete interface definition and should not be used
52 52 outside of this module.
53 53 """
54 54
55 55 ui = interfaceutil.Attribute("""ui.ui instance""")
56 56
57 57 def url():
58 58 """Returns a URL string representing this peer.
59 59
60 60 Currently, implementations expose the raw URL used to construct the
61 61 instance. It may contain credentials as part of the URL. The
62 62 expectations of the value aren't well-defined and this could lead to
63 63 data leakage.
64 64
65 65 TODO audit/clean consumers and more clearly define the contents of this
66 66 value.
67 67 """
68 68
69 69 def local():
70 70 """Returns a local repository instance.
71 71
72 72 If the peer represents a local repository, returns an object that
73 73 can be used to interface with it. Otherwise returns ``None``.
74 74 """
75 75
76 76 def peer():
77 77 """Returns an object conforming to this interface.
78 78
79 79 Most implementations will ``return self``.
80 80 """
81 81
82 82 def canpush():
83 83 """Returns a boolean indicating if this peer can be pushed to."""
84 84
85 85 def close():
86 86 """Close the connection to this peer.
87 87
88 88 This is called when the peer will no longer be used. Resources
89 89 associated with the peer should be cleaned up.
90 90 """
91 91
92 92
93 93 class ipeercapabilities(interfaceutil.Interface):
94 94 """Peer sub-interface related to capabilities."""
95 95
96 96 def capable(name):
97 97 """Determine support for a named capability.
98 98
99 99 Returns ``False`` if capability not supported.
100 100
101 101 Returns ``True`` if boolean capability is supported. Returns a string
102 102 if capability support is non-boolean.
103 103
104 104 Capability strings may or may not map to wire protocol capabilities.
105 105 """
106 106
107 107 def requirecap(name, purpose):
108 108 """Require a capability to be present.
109 109
110 110 Raises a ``CapabilityError`` if the capability isn't present.
111 111 """
112 112
113 113
114 114 class ipeercommands(interfaceutil.Interface):
115 115 """Client-side interface for communicating over the wire protocol.
116 116
117 117 This interface is used as a gateway to the Mercurial wire protocol.
118 118 methods commonly call wire protocol commands of the same name.
119 119 """
120 120
121 121 def branchmap():
122 122 """Obtain heads in named branches.
123 123
124 124 Returns a dict mapping branch name to an iterable of nodes that are
125 125 heads on that branch.
126 126 """
127 127
128 128 def capabilities():
129 129 """Obtain capabilities of the peer.
130 130
131 131 Returns a set of string capabilities.
132 132 """
133 133
134 134 def clonebundles():
135 135 """Obtains the clone bundles manifest for the repo.
136 136
137 137 Returns the manifest as unparsed bytes.
138 138 """
139 139
140 140 def debugwireargs(one, two, three=None, four=None, five=None):
141 141 """Used to facilitate debugging of arguments passed over the wire."""
142 142
143 143 def getbundle(source, **kwargs):
144 144 """Obtain remote repository data as a bundle.
145 145
146 146 This command is how the bulk of repository data is transferred from
147 147 the peer to the local repository
148 148
149 149 Returns a generator of bundle data.
150 150 """
151 151
152 152 def heads():
153 153 """Determine all known head revisions in the peer.
154 154
155 155 Returns an iterable of binary nodes.
156 156 """
157 157
158 158 def known(nodes):
159 159 """Determine whether multiple nodes are known.
160 160
161 161 Accepts an iterable of nodes whose presence to check for.
162 162
163 163 Returns an iterable of booleans indicating of the corresponding node
164 164 at that index is known to the peer.
165 165 """
166 166
167 167 def listkeys(namespace):
168 168 """Obtain all keys in a pushkey namespace.
169 169
170 170 Returns an iterable of key names.
171 171 """
172 172
173 173 def lookup(key):
174 174 """Resolve a value to a known revision.
175 175
176 176 Returns a binary node of the resolved revision on success.
177 177 """
178 178
179 179 def pushkey(namespace, key, old, new):
180 180 """Set a value using the ``pushkey`` protocol.
181 181
182 182 Arguments correspond to the pushkey namespace and key to operate on and
183 183 the old and new values for that key.
184 184
185 185 Returns a string with the peer result. The value inside varies by the
186 186 namespace.
187 187 """
188 188
189 189 def stream_out():
190 190 """Obtain streaming clone data.
191 191
192 192 Successful result should be a generator of data chunks.
193 193 """
194 194
195 195 def unbundle(bundle, heads, url):
196 196 """Transfer repository data to the peer.
197 197
198 198 This is how the bulk of data during a push is transferred.
199 199
200 200 Returns the integer number of heads added to the peer.
201 201 """
202 202
203 203
204 204 class ipeerlegacycommands(interfaceutil.Interface):
205 205 """Interface for implementing support for legacy wire protocol commands.
206 206
207 207 Wire protocol commands transition to legacy status when they are no longer
208 208 used by modern clients. To facilitate identifying which commands are
209 209 legacy, the interfaces are split.
210 210 """
211 211
212 212 def between(pairs):
213 213 """Obtain nodes between pairs of nodes.
214 214
215 215 ``pairs`` is an iterable of node pairs.
216 216
217 217 Returns an iterable of iterables of nodes corresponding to each
218 218 requested pair.
219 219 """
220 220
221 221 def branches(nodes):
222 222 """Obtain ancestor changesets of specific nodes back to a branch point.
223 223
224 224 For each requested node, the peer finds the first ancestor node that is
225 225 a DAG root or is a merge.
226 226
227 227 Returns an iterable of iterables with the resolved values for each node.
228 228 """
229 229
230 230 def changegroup(nodes, source):
231 231 """Obtain a changegroup with data for descendants of specified nodes."""
232 232
233 233 def changegroupsubset(bases, heads, source):
234 234 pass
235 235
236 236
237 237 class ipeercommandexecutor(interfaceutil.Interface):
238 238 """Represents a mechanism to execute remote commands.
239 239
240 240 This is the primary interface for requesting that wire protocol commands
241 241 be executed. Instances of this interface are active in a context manager
242 242 and have a well-defined lifetime. When the context manager exits, all
243 243 outstanding requests are waited on.
244 244 """
245 245
246 246 def callcommand(name, args):
247 247 """Request that a named command be executed.
248 248
249 249 Receives the command name and a dictionary of command arguments.
250 250
251 251 Returns a ``concurrent.futures.Future`` that will resolve to the
252 252 result of that command request. That exact value is left up to
253 253 the implementation and possibly varies by command.
254 254
255 255 Not all commands can coexist with other commands in an executor
256 256 instance: it depends on the underlying wire protocol transport being
257 257 used and the command itself.
258 258
259 259 Implementations MAY call ``sendcommands()`` automatically if the
260 260 requested command can not coexist with other commands in this executor.
261 261
262 262 Implementations MAY call ``sendcommands()`` automatically when the
263 263 future's ``result()`` is called. So, consumers using multiple
264 264 commands with an executor MUST ensure that ``result()`` is not called
265 265 until all command requests have been issued.
266 266 """
267 267
268 268 def sendcommands():
269 269 """Trigger submission of queued command requests.
270 270
271 271 Not all transports submit commands as soon as they are requested to
272 272 run. When called, this method forces queued command requests to be
273 273 issued. It will no-op if all commands have already been sent.
274 274
275 275 When called, no more new commands may be issued with this executor.
276 276 """
277 277
278 278 def close():
279 279 """Signal that this command request is finished.
280 280
281 281 When called, no more new commands may be issued. All outstanding
282 282 commands that have previously been issued are waited on before
283 283 returning. This not only includes waiting for the futures to resolve,
284 284 but also waiting for all response data to arrive. In other words,
285 285 calling this waits for all on-wire state for issued command requests
286 286 to finish.
287 287
288 288 When used as a context manager, this method is called when exiting the
289 289 context manager.
290 290
291 291 This method may call ``sendcommands()`` if there are buffered commands.
292 292 """
293 293
294 294
295 295 class ipeerrequests(interfaceutil.Interface):
296 296 """Interface for executing commands on a peer."""
297 297
298 298 limitedarguments = interfaceutil.Attribute(
299 299 """True if the peer cannot receive large argument value for commands."""
300 300 )
301 301
302 302 def commandexecutor():
303 303 """A context manager that resolves to an ipeercommandexecutor.
304 304
305 305 The object this resolves to can be used to issue command requests
306 306 to the peer.
307 307
308 308 Callers should call its ``callcommand`` method to issue command
309 309 requests.
310 310
311 311 A new executor should be obtained for each distinct set of commands
312 312 (possibly just a single command) that the consumer wants to execute
313 313 as part of a single operation or round trip. This is because some
314 314 peers are half-duplex and/or don't support persistent connections.
315 315 e.g. in the case of HTTP peers, commands sent to an executor represent
316 316 a single HTTP request. While some peers may support multiple command
317 317 sends over the wire per executor, consumers need to code to the least
318 318 capable peer. So it should be assumed that command executors buffer
319 319 called commands until they are told to send them and that each
320 320 command executor could result in a new connection or wire-level request
321 321 being issued.
322 322 """
323 323
324 324
325 325 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
326 326 """Unified interface for peer repositories.
327 327
328 328 All peer instances must conform to this interface.
329 329 """
330 330
331 331
332 332 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
333 333 """Unified peer interface for wire protocol version 2 peers."""
334 334
335 335 apidescriptor = interfaceutil.Attribute(
336 336 """Data structure holding description of server API."""
337 337 )
338 338
339 339
340 340 @interfaceutil.implementer(ipeerbase)
341 341 class peer(object):
342 342 """Base class for peer repositories."""
343 343
344 344 limitedarguments = False
345 345
346 346 def capable(self, name):
347 347 caps = self.capabilities()
348 348 if name in caps:
349 349 return True
350 350
351 351 name = b'%s=' % name
352 352 for cap in caps:
353 353 if cap.startswith(name):
354 354 return cap[len(name) :]
355 355
356 356 return False
357 357
358 358 def requirecap(self, name, purpose):
359 359 if self.capable(name):
360 360 return
361 361
362 362 raise error.CapabilityError(
363 363 _(
364 364 b'cannot %s; remote repository does not support the '
365 365 b'\'%s\' capability'
366 366 )
367 367 % (purpose, name)
368 368 )
369 369
370 370
371 371 class iverifyproblem(interfaceutil.Interface):
372 372 """Represents a problem with the integrity of the repository.
373 373
374 374 Instances of this interface are emitted to describe an integrity issue
375 375 with a repository (e.g. corrupt storage, missing data, etc).
376 376
377 377 Instances are essentially messages associated with severity.
378 378 """
379 379
380 380 warning = interfaceutil.Attribute(
381 381 """Message indicating a non-fatal problem."""
382 382 )
383 383
384 384 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
385 385
386 386 node = interfaceutil.Attribute(
387 387 """Revision encountering the problem.
388 388
389 389 ``None`` means the problem doesn't apply to a single revision.
390 390 """
391 391 )
392 392
393 393
394 394 class irevisiondelta(interfaceutil.Interface):
395 395 """Represents a delta between one revision and another.
396 396
397 397 Instances convey enough information to allow a revision to be exchanged
398 398 with another repository.
399 399
400 400 Instances represent the fulltext revision data or a delta against
401 401 another revision. Therefore the ``revision`` and ``delta`` attributes
402 402 are mutually exclusive.
403 403
404 404 Typically used for changegroup generation.
405 405 """
406 406
407 407 node = interfaceutil.Attribute("""20 byte node of this revision.""")
408 408
409 409 p1node = interfaceutil.Attribute(
410 410 """20 byte node of 1st parent of this revision."""
411 411 )
412 412
413 413 p2node = interfaceutil.Attribute(
414 414 """20 byte node of 2nd parent of this revision."""
415 415 )
416 416
417 417 linknode = interfaceutil.Attribute(
418 418 """20 byte node of the changelog revision this node is linked to."""
419 419 )
420 420
421 421 flags = interfaceutil.Attribute(
422 422 """2 bytes of integer flags that apply to this revision.
423 423
424 424 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
425 425 """
426 426 )
427 427
428 428 basenode = interfaceutil.Attribute(
429 429 """20 byte node of the revision this data is a delta against.
430 430
431 431 ``nullid`` indicates that the revision is a full revision and not
432 432 a delta.
433 433 """
434 434 )
435 435
436 436 baserevisionsize = interfaceutil.Attribute(
437 437 """Size of base revision this delta is against.
438 438
439 439 May be ``None`` if ``basenode`` is ``nullid``.
440 440 """
441 441 )
442 442
443 443 revision = interfaceutil.Attribute(
444 444 """Raw fulltext of revision data for this node."""
445 445 )
446 446
447 447 delta = interfaceutil.Attribute(
448 448 """Delta between ``basenode`` and ``node``.
449 449
450 450 Stored in the bdiff delta format.
451 451 """
452 452 )
453 453
454 454 sidedata = interfaceutil.Attribute(
455 455 """Raw sidedata bytes for the given revision."""
456 456 )
457 457
458 458 protocol_flags = interfaceutil.Attribute(
459 459 """Single byte of integer flags that can influence the protocol.
460 460
461 461 This is a bitwise composition of the ``storageutil.CG_FLAG*`` constants.
462 462 """
463 463 )
464 464
465 465
466 466 class ifilerevisionssequence(interfaceutil.Interface):
467 467 """Contains index data for all revisions of a file.
468 468
469 469 Types implementing this behave like lists of tuples. The index
470 470 in the list corresponds to the revision number. The values contain
471 471 index metadata.
472 472
473 473 The *null* revision (revision number -1) is always the last item
474 474 in the index.
475 475 """
476 476
477 477 def __len__():
478 478 """The total number of revisions."""
479 479
480 480 def __getitem__(rev):
481 481 """Returns the object having a specific revision number.
482 482
483 483 Returns an 8-tuple with the following fields:
484 484
485 485 offset+flags
486 486 Contains the offset and flags for the revision. 64-bit unsigned
487 487 integer where first 6 bytes are the offset and the next 2 bytes
488 488 are flags. The offset can be 0 if it is not used by the store.
489 489 compressed size
490 490 Size of the revision data in the store. It can be 0 if it isn't
491 491 needed by the store.
492 492 uncompressed size
493 493 Fulltext size. It can be 0 if it isn't needed by the store.
494 494 base revision
495 495 Revision number of revision the delta for storage is encoded
496 496 against. -1 indicates not encoded against a base revision.
497 497 link revision
498 498 Revision number of changelog revision this entry is related to.
499 499 p1 revision
500 500 Revision number of 1st parent. -1 if no 1st parent.
501 501 p2 revision
502 502 Revision number of 2nd parent. -1 if no 1st parent.
503 503 node
504 504 Binary node value for this revision number.
505 505
506 506 Negative values should index off the end of the sequence. ``-1``
507 507 should return the null revision. ``-2`` should return the most
508 508 recent revision.
509 509 """
510 510
511 511 def __contains__(rev):
512 512 """Whether a revision number exists."""
513 513
514 514 def insert(self, i, entry):
515 515 """Add an item to the index at specific revision."""
516 516
517 517
518 518 class ifileindex(interfaceutil.Interface):
519 519 """Storage interface for index data of a single file.
520 520
521 521 File storage data is divided into index metadata and data storage.
522 522 This interface defines the index portion of the interface.
523 523
524 524 The index logically consists of:
525 525
526 526 * A mapping between revision numbers and nodes.
527 527 * DAG data (storing and querying the relationship between nodes).
528 528 * Metadata to facilitate storage.
529 529 """
530 530
531 531 nullid = interfaceutil.Attribute(
532 532 """node for the null revision for use as delta base."""
533 533 )
534 534
535 535 def __len__():
536 536 """Obtain the number of revisions stored for this file."""
537 537
538 538 def __iter__():
539 539 """Iterate over revision numbers for this file."""
540 540
541 541 def hasnode(node):
542 542 """Returns a bool indicating if a node is known to this store.
543 543
544 544 Implementations must only return True for full, binary node values:
545 545 hex nodes, revision numbers, and partial node matches must be
546 546 rejected.
547 547
548 548 The null node is never present.
549 549 """
550 550
551 551 def revs(start=0, stop=None):
552 552 """Iterate over revision numbers for this file, with control."""
553 553
554 554 def parents(node):
555 555 """Returns a 2-tuple of parent nodes for a revision.
556 556
557 557 Values will be ``nullid`` if the parent is empty.
558 558 """
559 559
560 560 def parentrevs(rev):
561 561 """Like parents() but operates on revision numbers."""
562 562
563 563 def rev(node):
564 564 """Obtain the revision number given a node.
565 565
566 566 Raises ``error.LookupError`` if the node is not known.
567 567 """
568 568
569 569 def node(rev):
570 570 """Obtain the node value given a revision number.
571 571
572 572 Raises ``IndexError`` if the node is not known.
573 573 """
574 574
575 575 def lookup(node):
576 576 """Attempt to resolve a value to a node.
577 577
578 578 Value can be a binary node, hex node, revision number, or a string
579 579 that can be converted to an integer.
580 580
581 581 Raises ``error.LookupError`` if a node could not be resolved.
582 582 """
583 583
584 584 def linkrev(rev):
585 585 """Obtain the changeset revision number a revision is linked to."""
586 586
587 587 def iscensored(rev):
588 588 """Return whether a revision's content has been censored."""
589 589
590 590 def commonancestorsheads(node1, node2):
591 591 """Obtain an iterable of nodes containing heads of common ancestors.
592 592
593 593 See ``ancestor.commonancestorsheads()``.
594 594 """
595 595
596 596 def descendants(revs):
597 597 """Obtain descendant revision numbers for a set of revision numbers.
598 598
599 599 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
600 600 """
601 601
602 602 def heads(start=None, stop=None):
603 603 """Obtain a list of nodes that are DAG heads, with control.
604 604
605 605 The set of revisions examined can be limited by specifying
606 606 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
607 607 iterable of nodes. DAG traversal starts at earlier revision
608 608 ``start`` and iterates forward until any node in ``stop`` is
609 609 encountered.
610 610 """
611 611
612 612 def children(node):
613 613 """Obtain nodes that are children of a node.
614 614
615 615 Returns a list of nodes.
616 616 """
617 617
618 618
619 619 class ifiledata(interfaceutil.Interface):
620 620 """Storage interface for data storage of a specific file.
621 621
622 622 This complements ``ifileindex`` and provides an interface for accessing
623 623 data for a tracked file.
624 624 """
625 625
626 626 def size(rev):
627 627 """Obtain the fulltext size of file data.
628 628
629 629 Any metadata is excluded from size measurements.
630 630 """
631 631
632 632 def revision(node, raw=False):
633 633 """Obtain fulltext data for a node.
634 634
635 635 By default, any storage transformations are applied before the data
636 636 is returned. If ``raw`` is True, non-raw storage transformations
637 637 are not applied.
638 638
639 639 The fulltext data may contain a header containing metadata. Most
640 640 consumers should use ``read()`` to obtain the actual file data.
641 641 """
642 642
643 643 def rawdata(node):
644 644 """Obtain raw data for a node."""
645 645
646 646 def read(node):
647 647 """Resolve file fulltext data.
648 648
649 649 This is similar to ``revision()`` except any metadata in the data
650 650 headers is stripped.
651 651 """
652 652
653 653 def renamed(node):
654 654 """Obtain copy metadata for a node.
655 655
656 656 Returns ``False`` if no copy metadata is stored or a 2-tuple of
657 657 (path, node) from which this revision was copied.
658 658 """
659 659
660 660 def cmp(node, fulltext):
661 661 """Compare fulltext to another revision.
662 662
663 663 Returns True if the fulltext is different from what is stored.
664 664
665 665 This takes copy metadata into account.
666 666
667 667 TODO better document the copy metadata and censoring logic.
668 668 """
669 669
670 670 def emitrevisions(
671 671 nodes,
672 672 nodesorder=None,
673 673 revisiondata=False,
674 674 assumehaveparentrevisions=False,
675 675 deltamode=CG_DELTAMODE_STD,
676 676 ):
677 677 """Produce ``irevisiondelta`` for revisions.
678 678
679 679 Given an iterable of nodes, emits objects conforming to the
680 680 ``irevisiondelta`` interface that describe revisions in storage.
681 681
682 682 This method is a generator.
683 683
684 684 The input nodes may be unordered. Implementations must ensure that a
685 685 node's parents are emitted before the node itself. Transitively, this
686 686 means that a node may only be emitted once all its ancestors in
687 687 ``nodes`` have also been emitted.
688 688
689 689 By default, emits "index" data (the ``node``, ``p1node``, and
690 690 ``p2node`` attributes). If ``revisiondata`` is set, revision data
691 691 will also be present on the emitted objects.
692 692
693 693 With default argument values, implementations can choose to emit
694 694 either fulltext revision data or a delta. When emitting deltas,
695 695 implementations must consider whether the delta's base revision
696 696 fulltext is available to the receiver.
697 697
698 698 The base revision fulltext is guaranteed to be available if any of
699 699 the following are met:
700 700
701 701 * Its fulltext revision was emitted by this method call.
702 702 * A delta for that revision was emitted by this method call.
703 703 * ``assumehaveparentrevisions`` is True and the base revision is a
704 704 parent of the node.
705 705
706 706 ``nodesorder`` can be used to control the order that revisions are
707 707 emitted. By default, revisions can be reordered as long as they are
708 708 in DAG topological order (see above). If the value is ``nodes``,
709 709 the iteration order from ``nodes`` should be used. If the value is
710 710 ``storage``, then the native order from the backing storage layer
711 711 is used. (Not all storage layers will have strong ordering and behavior
712 712 of this mode is storage-dependent.) ``nodes`` ordering can force
713 713 revisions to be emitted before their ancestors, so consumers should
714 714 use it with care.
715 715
716 716 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
717 717 be set and it is the caller's responsibility to resolve it, if needed.
718 718
719 719 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
720 720 all revision data should be emitted as deltas against the revision
721 721 emitted just prior. The initial revision should be a delta against its
722 722 1st parent.
723 723 """
724 724
725 725
726 726 class ifilemutation(interfaceutil.Interface):
727 727 """Storage interface for mutation events of a tracked file."""
728 728
729 729 def add(filedata, meta, transaction, linkrev, p1, p2):
730 730 """Add a new revision to the store.
731 731
732 732 Takes file data, dictionary of metadata, a transaction, linkrev,
733 733 and parent nodes.
734 734
735 735 Returns the node that was added.
736 736
737 737 May no-op if a revision matching the supplied data is already stored.
738 738 """
739 739
740 740 def addrevision(
741 741 revisiondata,
742 742 transaction,
743 743 linkrev,
744 744 p1,
745 745 p2,
746 746 node=None,
747 747 flags=0,
748 748 cachedelta=None,
749 749 ):
750 750 """Add a new revision to the store and return its number.
751 751
752 752 This is similar to ``add()`` except it operates at a lower level.
753 753
754 754 The data passed in already contains a metadata header, if any.
755 755
756 756 ``node`` and ``flags`` can be used to define the expected node and
757 757 the flags to use with storage. ``flags`` is a bitwise value composed
758 758 of the various ``REVISION_FLAG_*`` constants.
759 759
760 760 ``add()`` is usually called when adding files from e.g. the working
761 761 directory. ``addrevision()`` is often called by ``add()`` and for
762 762 scenarios where revision data has already been computed, such as when
763 763 applying raw data from a peer repo.
764 764 """
765 765
766 766 def addgroup(
767 767 deltas,
768 768 linkmapper,
769 769 transaction,
770 770 addrevisioncb=None,
771 771 duplicaterevisioncb=None,
772 772 maybemissingparents=False,
773 773 ):
774 774 """Process a series of deltas for storage.
775 775
776 776 ``deltas`` is an iterable of 7-tuples of
777 777 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
778 778 to add.
779 779
780 780 The ``delta`` field contains ``mpatch`` data to apply to a base
781 781 revision, identified by ``deltabase``. The base node can be
782 782 ``nullid``, in which case the header from the delta can be ignored
783 783 and the delta used as the fulltext.
784 784
785 785 ``alwayscache`` instructs the lower layers to cache the content of the
786 786 newly added revision, even if it needs to be explicitly computed.
787 787 This used to be the default when ``addrevisioncb`` was provided up to
788 788 Mercurial 5.8.
789 789
790 790 ``addrevisioncb`` should be called for each new rev as it is committed.
791 791 ``duplicaterevisioncb`` should be called for all revs with a
792 792 pre-existing node.
793 793
794 794 ``maybemissingparents`` is a bool indicating whether the incoming
795 795 data may reference parents/ancestor revisions that aren't present.
796 796 This flag is set when receiving data into a "shallow" store that
797 797 doesn't hold all history.
798 798
799 799 Returns a list of nodes that were processed. A node will be in the list
800 800 even if it existed in the store previously.
801 801 """
802 802
803 803 def censorrevision(tr, node, tombstone=b''):
804 804 """Remove the content of a single revision.
805 805
806 806 The specified ``node`` will have its content purged from storage.
807 807 Future attempts to access the revision data for this node will
808 808 result in failure.
809 809
810 810 A ``tombstone`` message can optionally be stored. This message may be
811 811 displayed to users when they attempt to access the missing revision
812 812 data.
813 813
814 814 Storage backends may have stored deltas against the previous content
815 815 in this revision. As part of censoring a revision, these storage
816 816 backends are expected to rewrite any internally stored deltas such
817 817 that they no longer reference the deleted content.
818 818 """
819 819
820 820 def getstrippoint(minlink):
821 821 """Find the minimum revision that must be stripped to strip a linkrev.
822 822
823 823 Returns a 2-tuple containing the minimum revision number and a set
824 824 of all revisions numbers that would be broken by this strip.
825 825
826 826 TODO this is highly revlog centric and should be abstracted into
827 827 a higher-level deletion API. ``repair.strip()`` relies on this.
828 828 """
829 829
830 830 def strip(minlink, transaction):
831 831 """Remove storage of items starting at a linkrev.
832 832
833 833 This uses ``getstrippoint()`` to determine the first node to remove.
834 834 Then it effectively truncates storage for all revisions after that.
835 835
836 836 TODO this is highly revlog centric and should be abstracted into a
837 837 higher-level deletion API.
838 838 """
839 839
840 840
841 841 class ifilestorage(ifileindex, ifiledata, ifilemutation):
842 842 """Complete storage interface for a single tracked file."""
843 843
844 844 def files():
845 845 """Obtain paths that are backing storage for this file.
846 846
847 847 TODO this is used heavily by verify code and there should probably
848 848 be a better API for that.
849 849 """
850 850
851 851 def storageinfo(
852 852 exclusivefiles=False,
853 853 sharedfiles=False,
854 854 revisionscount=False,
855 855 trackedsize=False,
856 856 storedsize=False,
857 857 ):
858 858 """Obtain information about storage for this file's data.
859 859
860 860 Returns a dict describing storage for this tracked path. The keys
861 861 in the dict map to arguments of the same. The arguments are bools
862 862 indicating whether to calculate and obtain that data.
863 863
864 864 exclusivefiles
865 865 Iterable of (vfs, path) describing files that are exclusively
866 866 used to back storage for this tracked path.
867 867
868 868 sharedfiles
869 869 Iterable of (vfs, path) describing files that are used to back
870 870 storage for this tracked path. Those files may also provide storage
871 871 for other stored entities.
872 872
873 873 revisionscount
874 874 Number of revisions available for retrieval.
875 875
876 876 trackedsize
877 877 Total size in bytes of all tracked revisions. This is a sum of the
878 878 length of the fulltext of all revisions.
879 879
880 880 storedsize
881 881 Total size in bytes used to store data for all tracked revisions.
882 882 This is commonly less than ``trackedsize`` due to internal usage
883 883 of deltas rather than fulltext revisions.
884 884
885 885 Not all storage backends may support all queries are have a reasonable
886 886 value to use. In that case, the value should be set to ``None`` and
887 887 callers are expected to handle this special value.
888 888 """
889 889
890 890 def verifyintegrity(state):
891 891 """Verifies the integrity of file storage.
892 892
893 893 ``state`` is a dict holding state of the verifier process. It can be
894 894 used to communicate data between invocations of multiple storage
895 895 primitives.
896 896
897 897 If individual revisions cannot have their revision content resolved,
898 898 the method is expected to set the ``skipread`` key to a set of nodes
899 899 that encountered problems. If set, the method can also add the node(s)
900 900 to ``safe_renamed`` in order to indicate nodes that may perform the
901 901 rename checks with currently accessible data.
902 902
903 903 The method yields objects conforming to the ``iverifyproblem``
904 904 interface.
905 905 """
906 906
907 907
908 908 class idirs(interfaceutil.Interface):
909 909 """Interface representing a collection of directories from paths.
910 910
911 911 This interface is essentially a derived data structure representing
912 912 directories from a collection of paths.
913 913 """
914 914
915 915 def addpath(path):
916 916 """Add a path to the collection.
917 917
918 918 All directories in the path will be added to the collection.
919 919 """
920 920
921 921 def delpath(path):
922 922 """Remove a path from the collection.
923 923
924 924 If the removal was the last path in a particular directory, the
925 925 directory is removed from the collection.
926 926 """
927 927
928 928 def __iter__():
929 929 """Iterate over the directories in this collection of paths."""
930 930
931 931 def __contains__(path):
932 932 """Whether a specific directory is in this collection."""
933 933
934 934
935 935 class imanifestdict(interfaceutil.Interface):
936 936 """Interface representing a manifest data structure.
937 937
938 938 A manifest is effectively a dict mapping paths to entries. Each entry
939 939 consists of a binary node and extra flags affecting that entry.
940 940 """
941 941
942 942 def __getitem__(path):
943 943 """Returns the binary node value for a path in the manifest.
944 944
945 945 Raises ``KeyError`` if the path does not exist in the manifest.
946 946
947 947 Equivalent to ``self.find(path)[0]``.
948 948 """
949 949
950 950 def find(path):
951 951 """Returns the entry for a path in the manifest.
952 952
953 953 Returns a 2-tuple of (node, flags).
954 954
955 955 Raises ``KeyError`` if the path does not exist in the manifest.
956 956 """
957 957
958 958 def __len__():
959 959 """Return the number of entries in the manifest."""
960 960
961 961 def __nonzero__():
962 962 """Returns True if the manifest has entries, False otherwise."""
963 963
964 964 __bool__ = __nonzero__
965 965
966 966 def __setitem__(path, node):
967 967 """Define the node value for a path in the manifest.
968 968
969 969 If the path is already in the manifest, its flags will be copied to
970 970 the new entry.
971 971 """
972 972
973 973 def __contains__(path):
974 974 """Whether a path exists in the manifest."""
975 975
976 976 def __delitem__(path):
977 977 """Remove a path from the manifest.
978 978
979 979 Raises ``KeyError`` if the path is not in the manifest.
980 980 """
981 981
982 982 def __iter__():
983 983 """Iterate over paths in the manifest."""
984 984
985 985 def iterkeys():
986 986 """Iterate over paths in the manifest."""
987 987
988 988 def keys():
989 989 """Obtain a list of paths in the manifest."""
990 990
991 991 def filesnotin(other, match=None):
992 992 """Obtain the set of paths in this manifest but not in another.
993 993
994 994 ``match`` is an optional matcher function to be applied to both
995 995 manifests.
996 996
997 997 Returns a set of paths.
998 998 """
999 999
1000 1000 def dirs():
1001 1001 """Returns an object implementing the ``idirs`` interface."""
1002 1002
1003 1003 def hasdir(dir):
1004 1004 """Returns a bool indicating if a directory is in this manifest."""
1005 1005
1006 1006 def walk(match):
1007 1007 """Generator of paths in manifest satisfying a matcher.
1008 1008
1009 1009 If the matcher has explicit files listed and they don't exist in
1010 1010 the manifest, ``match.bad()`` is called for each missing file.
1011 1011 """
1012 1012
1013 1013 def diff(other, match=None, clean=False):
1014 1014 """Find differences between this manifest and another.
1015 1015
1016 1016 This manifest is compared to ``other``.
1017 1017
1018 1018 If ``match`` is provided, the two manifests are filtered against this
1019 1019 matcher and only entries satisfying the matcher are compared.
1020 1020
1021 1021 If ``clean`` is True, unchanged files are included in the returned
1022 1022 object.
1023 1023
1024 1024 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1025 1025 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1026 1026 represents the node and flags for this manifest and ``(node2, flag2)``
1027 1027 are the same for the other manifest.
1028 1028 """
1029 1029
1030 1030 def setflag(path, flag):
1031 1031 """Set the flag value for a given path.
1032 1032
1033 1033 Raises ``KeyError`` if the path is not already in the manifest.
1034 1034 """
1035 1035
1036 1036 def get(path, default=None):
1037 1037 """Obtain the node value for a path or a default value if missing."""
1038 1038
1039 1039 def flags(path):
1040 1040 """Return the flags value for a path (default: empty bytestring)."""
1041 1041
1042 1042 def copy():
1043 1043 """Return a copy of this manifest."""
1044 1044
1045 1045 def items():
1046 1046 """Returns an iterable of (path, node) for items in this manifest."""
1047 1047
1048 1048 def iteritems():
1049 1049 """Identical to items()."""
1050 1050
1051 1051 def iterentries():
1052 1052 """Returns an iterable of (path, node, flags) for this manifest.
1053 1053
1054 1054 Similar to ``iteritems()`` except items are a 3-tuple and include
1055 1055 flags.
1056 1056 """
1057 1057
1058 1058 def text():
1059 1059 """Obtain the raw data representation for this manifest.
1060 1060
1061 1061 Result is used to create a manifest revision.
1062 1062 """
1063 1063
1064 1064 def fastdelta(base, changes):
1065 1065 """Obtain a delta between this manifest and another given changes.
1066 1066
1067 1067 ``base`` in the raw data representation for another manifest.
1068 1068
1069 1069 ``changes`` is an iterable of ``(path, to_delete)``.
1070 1070
1071 1071 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1072 1072 delta between ``base`` and this manifest.
1073 1073
1074 1074 If this manifest implementation can't support ``fastdelta()``,
1075 1075 raise ``mercurial.manifest.FastdeltaUnavailable``.
1076 1076 """
1077 1077
1078 1078
1079 1079 class imanifestrevisionbase(interfaceutil.Interface):
1080 1080 """Base interface representing a single revision of a manifest.
1081 1081
1082 1082 Should not be used as a primary interface: should always be inherited
1083 1083 as part of a larger interface.
1084 1084 """
1085 1085
1086 1086 def copy():
1087 1087 """Obtain a copy of this manifest instance.
1088 1088
1089 1089 Returns an object conforming to the ``imanifestrevisionwritable``
1090 1090 interface. The instance will be associated with the same
1091 1091 ``imanifestlog`` collection as this instance.
1092 1092 """
1093 1093
1094 1094 def read():
1095 1095 """Obtain the parsed manifest data structure.
1096 1096
1097 1097 The returned object conforms to the ``imanifestdict`` interface.
1098 1098 """
1099 1099
1100 1100
1101 1101 class imanifestrevisionstored(imanifestrevisionbase):
1102 1102 """Interface representing a manifest revision committed to storage."""
1103 1103
1104 1104 def node():
1105 1105 """The binary node for this manifest."""
1106 1106
1107 1107 parents = interfaceutil.Attribute(
1108 1108 """List of binary nodes that are parents for this manifest revision."""
1109 1109 )
1110 1110
1111 1111 def readdelta(shallow=False):
1112 1112 """Obtain the manifest data structure representing changes from parent.
1113 1113
1114 1114 This manifest is compared to its 1st parent. A new manifest representing
1115 1115 those differences is constructed.
1116 1116
1117 1117 The returned object conforms to the ``imanifestdict`` interface.
1118 1118 """
1119 1119
1120 1120 def readfast(shallow=False):
1121 1121 """Calls either ``read()`` or ``readdelta()``.
1122 1122
1123 1123 The faster of the two options is called.
1124 1124 """
1125 1125
1126 1126 def find(key):
1127 1127 """Calls self.read().find(key)``.
1128 1128
1129 1129 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1130 1130 """
1131 1131
1132 1132
1133 1133 class imanifestrevisionwritable(imanifestrevisionbase):
1134 1134 """Interface representing a manifest revision that can be committed."""
1135 1135
1136 1136 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1137 1137 """Add this revision to storage.
1138 1138
1139 1139 Takes a transaction object, the changeset revision number it will
1140 1140 be associated with, its parent nodes, and lists of added and
1141 1141 removed paths.
1142 1142
1143 1143 If match is provided, storage can choose not to inspect or write out
1144 1144 items that do not match. Storage is still required to be able to provide
1145 1145 the full manifest in the future for any directories written (these
1146 1146 manifests should not be "narrowed on disk").
1147 1147
1148 1148 Returns the binary node of the created revision.
1149 1149 """
1150 1150
1151 1151
1152 1152 class imanifeststorage(interfaceutil.Interface):
1153 1153 """Storage interface for manifest data."""
1154 1154
1155 1155 nodeconstants = interfaceutil.Attribute(
1156 1156 """nodeconstants used by the current repository."""
1157 1157 )
1158 1158
1159 1159 tree = interfaceutil.Attribute(
1160 1160 """The path to the directory this manifest tracks.
1161 1161
1162 1162 The empty bytestring represents the root manifest.
1163 1163 """
1164 1164 )
1165 1165
1166 1166 index = interfaceutil.Attribute(
1167 1167 """An ``ifilerevisionssequence`` instance."""
1168 1168 )
1169 1169
1170 1170 indexfile = interfaceutil.Attribute(
1171 1171 """Path of revlog index file.
1172 1172
1173 1173 TODO this is revlog specific and should not be exposed.
1174 1174 """
1175 1175 )
1176 1176
1177 1177 opener = interfaceutil.Attribute(
1178 1178 """VFS opener to use to access underlying files used for storage.
1179 1179
1180 1180 TODO this is revlog specific and should not be exposed.
1181 1181 """
1182 1182 )
1183 1183
1184 version = interfaceutil.Attribute(
1185 """Revlog version number.
1186
1187 TODO this is revlog specific and should not be exposed.
1188 """
1189 )
1190
1191 1184 _generaldelta = interfaceutil.Attribute(
1192 1185 """Whether generaldelta storage is being used.
1193 1186
1194 1187 TODO this is revlog specific and should not be exposed.
1195 1188 """
1196 1189 )
1197 1190
1198 1191 fulltextcache = interfaceutil.Attribute(
1199 1192 """Dict with cache of fulltexts.
1200 1193
1201 1194 TODO this doesn't feel appropriate for the storage interface.
1202 1195 """
1203 1196 )
1204 1197
1205 1198 def __len__():
1206 1199 """Obtain the number of revisions stored for this manifest."""
1207 1200
1208 1201 def __iter__():
1209 1202 """Iterate over revision numbers for this manifest."""
1210 1203
1211 1204 def rev(node):
1212 1205 """Obtain the revision number given a binary node.
1213 1206
1214 1207 Raises ``error.LookupError`` if the node is not known.
1215 1208 """
1216 1209
1217 1210 def node(rev):
1218 1211 """Obtain the node value given a revision number.
1219 1212
1220 1213 Raises ``error.LookupError`` if the revision is not known.
1221 1214 """
1222 1215
1223 1216 def lookup(value):
1224 1217 """Attempt to resolve a value to a node.
1225 1218
1226 1219 Value can be a binary node, hex node, revision number, or a bytes
1227 1220 that can be converted to an integer.
1228 1221
1229 1222 Raises ``error.LookupError`` if a ndoe could not be resolved.
1230 1223 """
1231 1224
1232 1225 def parents(node):
1233 1226 """Returns a 2-tuple of parent nodes for a node.
1234 1227
1235 1228 Values will be ``nullid`` if the parent is empty.
1236 1229 """
1237 1230
1238 1231 def parentrevs(rev):
1239 1232 """Like parents() but operates on revision numbers."""
1240 1233
1241 1234 def linkrev(rev):
1242 1235 """Obtain the changeset revision number a revision is linked to."""
1243 1236
1244 1237 def revision(node, _df=None, raw=False):
1245 1238 """Obtain fulltext data for a node."""
1246 1239
1247 1240 def rawdata(node, _df=None):
1248 1241 """Obtain raw data for a node."""
1249 1242
1250 1243 def revdiff(rev1, rev2):
1251 1244 """Obtain a delta between two revision numbers.
1252 1245
1253 1246 The returned data is the result of ``bdiff.bdiff()`` on the raw
1254 1247 revision data.
1255 1248 """
1256 1249
1257 1250 def cmp(node, fulltext):
1258 1251 """Compare fulltext to another revision.
1259 1252
1260 1253 Returns True if the fulltext is different from what is stored.
1261 1254 """
1262 1255
1263 1256 def emitrevisions(
1264 1257 nodes,
1265 1258 nodesorder=None,
1266 1259 revisiondata=False,
1267 1260 assumehaveparentrevisions=False,
1268 1261 ):
1269 1262 """Produce ``irevisiondelta`` describing revisions.
1270 1263
1271 1264 See the documentation for ``ifiledata`` for more.
1272 1265 """
1273 1266
1274 1267 def addgroup(
1275 1268 deltas,
1276 1269 linkmapper,
1277 1270 transaction,
1278 1271 addrevisioncb=None,
1279 1272 duplicaterevisioncb=None,
1280 1273 ):
1281 1274 """Process a series of deltas for storage.
1282 1275
1283 1276 See the documentation in ``ifilemutation`` for more.
1284 1277 """
1285 1278
1286 1279 def rawsize(rev):
1287 1280 """Obtain the size of tracked data.
1288 1281
1289 1282 Is equivalent to ``len(m.rawdata(node))``.
1290 1283
1291 1284 TODO this method is only used by upgrade code and may be removed.
1292 1285 """
1293 1286
1294 1287 def getstrippoint(minlink):
1295 1288 """Find minimum revision that must be stripped to strip a linkrev.
1296 1289
1297 1290 See the documentation in ``ifilemutation`` for more.
1298 1291 """
1299 1292
1300 1293 def strip(minlink, transaction):
1301 1294 """Remove storage of items starting at a linkrev.
1302 1295
1303 1296 See the documentation in ``ifilemutation`` for more.
1304 1297 """
1305 1298
1306 1299 def checksize():
1307 1300 """Obtain the expected sizes of backing files.
1308 1301
1309 1302 TODO this is used by verify and it should not be part of the interface.
1310 1303 """
1311 1304
1312 1305 def files():
1313 1306 """Obtain paths that are backing storage for this manifest.
1314 1307
1315 1308 TODO this is used by verify and there should probably be a better API
1316 1309 for this functionality.
1317 1310 """
1318 1311
1319 1312 def deltaparent(rev):
1320 1313 """Obtain the revision that a revision is delta'd against.
1321 1314
1322 1315 TODO delta encoding is an implementation detail of storage and should
1323 1316 not be exposed to the storage interface.
1324 1317 """
1325 1318
1326 1319 def clone(tr, dest, **kwargs):
1327 1320 """Clone this instance to another."""
1328 1321
1329 1322 def clearcaches(clear_persisted_data=False):
1330 1323 """Clear any caches associated with this instance."""
1331 1324
1332 1325 def dirlog(d):
1333 1326 """Obtain a manifest storage instance for a tree."""
1334 1327
1335 1328 def add(
1336 1329 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1337 1330 ):
1338 1331 """Add a revision to storage.
1339 1332
1340 1333 ``m`` is an object conforming to ``imanifestdict``.
1341 1334
1342 1335 ``link`` is the linkrev revision number.
1343 1336
1344 1337 ``p1`` and ``p2`` are the parent revision numbers.
1345 1338
1346 1339 ``added`` and ``removed`` are iterables of added and removed paths,
1347 1340 respectively.
1348 1341
1349 1342 ``readtree`` is a function that can be used to read the child tree(s)
1350 1343 when recursively writing the full tree structure when using
1351 1344 treemanifets.
1352 1345
1353 1346 ``match`` is a matcher that can be used to hint to storage that not all
1354 1347 paths must be inspected; this is an optimization and can be safely
1355 1348 ignored. Note that the storage must still be able to reproduce a full
1356 1349 manifest including files that did not match.
1357 1350 """
1358 1351
1359 1352 def storageinfo(
1360 1353 exclusivefiles=False,
1361 1354 sharedfiles=False,
1362 1355 revisionscount=False,
1363 1356 trackedsize=False,
1364 1357 storedsize=False,
1365 1358 ):
1366 1359 """Obtain information about storage for this manifest's data.
1367 1360
1368 1361 See ``ifilestorage.storageinfo()`` for a description of this method.
1369 1362 This one behaves the same way, except for manifest data.
1370 1363 """
1371 1364
1372 1365
1373 1366 class imanifestlog(interfaceutil.Interface):
1374 1367 """Interface representing a collection of manifest snapshots.
1375 1368
1376 1369 Represents the root manifest in a repository.
1377 1370
1378 1371 Also serves as a means to access nested tree manifests and to cache
1379 1372 tree manifests.
1380 1373 """
1381 1374
1382 1375 nodeconstants = interfaceutil.Attribute(
1383 1376 """nodeconstants used by the current repository."""
1384 1377 )
1385 1378
1386 1379 def __getitem__(node):
1387 1380 """Obtain a manifest instance for a given binary node.
1388 1381
1389 1382 Equivalent to calling ``self.get('', node)``.
1390 1383
1391 1384 The returned object conforms to the ``imanifestrevisionstored``
1392 1385 interface.
1393 1386 """
1394 1387
1395 1388 def get(tree, node, verify=True):
1396 1389 """Retrieve the manifest instance for a given directory and binary node.
1397 1390
1398 1391 ``node`` always refers to the node of the root manifest (which will be
1399 1392 the only manifest if flat manifests are being used).
1400 1393
1401 1394 If ``tree`` is the empty string, the root manifest is returned.
1402 1395 Otherwise the manifest for the specified directory will be returned
1403 1396 (requires tree manifests).
1404 1397
1405 1398 If ``verify`` is True, ``LookupError`` is raised if the node is not
1406 1399 known.
1407 1400
1408 1401 The returned object conforms to the ``imanifestrevisionstored``
1409 1402 interface.
1410 1403 """
1411 1404
1412 1405 def getstorage(tree):
1413 1406 """Retrieve an interface to storage for a particular tree.
1414 1407
1415 1408 If ``tree`` is the empty bytestring, storage for the root manifest will
1416 1409 be returned. Otherwise storage for a tree manifest is returned.
1417 1410
1418 1411 TODO formalize interface for returned object.
1419 1412 """
1420 1413
1421 1414 def clearcaches():
1422 1415 """Clear caches associated with this collection."""
1423 1416
1424 1417 def rev(node):
1425 1418 """Obtain the revision number for a binary node.
1426 1419
1427 1420 Raises ``error.LookupError`` if the node is not known.
1428 1421 """
1429 1422
1430 1423 def update_caches(transaction):
1431 1424 """update whatever cache are relevant for the used storage."""
1432 1425
1433 1426
1434 1427 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1435 1428 """Local repository sub-interface providing access to tracked file storage.
1436 1429
1437 1430 This interface defines how a repository accesses storage for a single
1438 1431 tracked file path.
1439 1432 """
1440 1433
1441 1434 def file(f):
1442 1435 """Obtain a filelog for a tracked path.
1443 1436
1444 1437 The returned type conforms to the ``ifilestorage`` interface.
1445 1438 """
1446 1439
1447 1440
1448 1441 class ilocalrepositorymain(interfaceutil.Interface):
1449 1442 """Main interface for local repositories.
1450 1443
1451 1444 This currently captures the reality of things - not how things should be.
1452 1445 """
1453 1446
1454 1447 nodeconstants = interfaceutil.Attribute(
1455 1448 """Constant nodes matching the hash function used by the repository."""
1456 1449 )
1457 1450 nullid = interfaceutil.Attribute(
1458 1451 """null revision for the hash function used by the repository."""
1459 1452 )
1460 1453
1461 1454 supportedformats = interfaceutil.Attribute(
1462 1455 """Set of requirements that apply to stream clone.
1463 1456
1464 1457 This is actually a class attribute and is shared among all instances.
1465 1458 """
1466 1459 )
1467 1460
1468 1461 supported = interfaceutil.Attribute(
1469 1462 """Set of requirements that this repo is capable of opening."""
1470 1463 )
1471 1464
1472 1465 requirements = interfaceutil.Attribute(
1473 1466 """Set of requirements this repo uses."""
1474 1467 )
1475 1468
1476 1469 features = interfaceutil.Attribute(
1477 1470 """Set of "features" this repository supports.
1478 1471
1479 1472 A "feature" is a loosely-defined term. It can refer to a feature
1480 1473 in the classical sense or can describe an implementation detail
1481 1474 of the repository. For example, a ``readonly`` feature may denote
1482 1475 the repository as read-only. Or a ``revlogfilestore`` feature may
1483 1476 denote that the repository is using revlogs for file storage.
1484 1477
1485 1478 The intent of features is to provide a machine-queryable mechanism
1486 1479 for repo consumers to test for various repository characteristics.
1487 1480
1488 1481 Features are similar to ``requirements``. The main difference is that
1489 1482 requirements are stored on-disk and represent requirements to open the
1490 1483 repository. Features are more run-time capabilities of the repository
1491 1484 and more granular capabilities (which may be derived from requirements).
1492 1485 """
1493 1486 )
1494 1487
1495 1488 filtername = interfaceutil.Attribute(
1496 1489 """Name of the repoview that is active on this repo."""
1497 1490 )
1498 1491
1499 1492 wvfs = interfaceutil.Attribute(
1500 1493 """VFS used to access the working directory."""
1501 1494 )
1502 1495
1503 1496 vfs = interfaceutil.Attribute(
1504 1497 """VFS rooted at the .hg directory.
1505 1498
1506 1499 Used to access repository data not in the store.
1507 1500 """
1508 1501 )
1509 1502
1510 1503 svfs = interfaceutil.Attribute(
1511 1504 """VFS rooted at the store.
1512 1505
1513 1506 Used to access repository data in the store. Typically .hg/store.
1514 1507 But can point elsewhere if the store is shared.
1515 1508 """
1516 1509 )
1517 1510
1518 1511 root = interfaceutil.Attribute(
1519 1512 """Path to the root of the working directory."""
1520 1513 )
1521 1514
1522 1515 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1523 1516
1524 1517 origroot = interfaceutil.Attribute(
1525 1518 """The filesystem path that was used to construct the repo."""
1526 1519 )
1527 1520
1528 1521 auditor = interfaceutil.Attribute(
1529 1522 """A pathauditor for the working directory.
1530 1523
1531 1524 This checks if a path refers to a nested repository.
1532 1525
1533 1526 Operates on the filesystem.
1534 1527 """
1535 1528 )
1536 1529
1537 1530 nofsauditor = interfaceutil.Attribute(
1538 1531 """A pathauditor for the working directory.
1539 1532
1540 1533 This is like ``auditor`` except it doesn't do filesystem checks.
1541 1534 """
1542 1535 )
1543 1536
1544 1537 baseui = interfaceutil.Attribute(
1545 1538 """Original ui instance passed into constructor."""
1546 1539 )
1547 1540
1548 1541 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1549 1542
1550 1543 sharedpath = interfaceutil.Attribute(
1551 1544 """Path to the .hg directory of the repo this repo was shared from."""
1552 1545 )
1553 1546
1554 1547 store = interfaceutil.Attribute("""A store instance.""")
1555 1548
1556 1549 spath = interfaceutil.Attribute("""Path to the store.""")
1557 1550
1558 1551 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1559 1552
1560 1553 cachevfs = interfaceutil.Attribute(
1561 1554 """A VFS used to access the cache directory.
1562 1555
1563 1556 Typically .hg/cache.
1564 1557 """
1565 1558 )
1566 1559
1567 1560 wcachevfs = interfaceutil.Attribute(
1568 1561 """A VFS used to access the cache directory dedicated to working copy
1569 1562
1570 1563 Typically .hg/wcache.
1571 1564 """
1572 1565 )
1573 1566
1574 1567 filteredrevcache = interfaceutil.Attribute(
1575 1568 """Holds sets of revisions to be filtered."""
1576 1569 )
1577 1570
1578 1571 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1579 1572
1580 1573 filecopiesmode = interfaceutil.Attribute(
1581 1574 """The way files copies should be dealt with in this repo."""
1582 1575 )
1583 1576
1584 1577 def close():
1585 1578 """Close the handle on this repository."""
1586 1579
1587 1580 def peer():
1588 1581 """Obtain an object conforming to the ``peer`` interface."""
1589 1582
1590 1583 def unfiltered():
1591 1584 """Obtain an unfiltered/raw view of this repo."""
1592 1585
1593 1586 def filtered(name, visibilityexceptions=None):
1594 1587 """Obtain a named view of this repository."""
1595 1588
1596 1589 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1597 1590
1598 1591 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1599 1592
1600 1593 manifestlog = interfaceutil.Attribute(
1601 1594 """An instance conforming to the ``imanifestlog`` interface.
1602 1595
1603 1596 Provides access to manifests for the repository.
1604 1597 """
1605 1598 )
1606 1599
1607 1600 dirstate = interfaceutil.Attribute("""Working directory state.""")
1608 1601
1609 1602 narrowpats = interfaceutil.Attribute(
1610 1603 """Matcher patterns for this repository's narrowspec."""
1611 1604 )
1612 1605
1613 1606 def narrowmatch(match=None, includeexact=False):
1614 1607 """Obtain a matcher for the narrowspec."""
1615 1608
1616 1609 def setnarrowpats(newincludes, newexcludes):
1617 1610 """Define the narrowspec for this repository."""
1618 1611
1619 1612 def __getitem__(changeid):
1620 1613 """Try to resolve a changectx."""
1621 1614
1622 1615 def __contains__(changeid):
1623 1616 """Whether a changeset exists."""
1624 1617
1625 1618 def __nonzero__():
1626 1619 """Always returns True."""
1627 1620 return True
1628 1621
1629 1622 __bool__ = __nonzero__
1630 1623
1631 1624 def __len__():
1632 1625 """Returns the number of changesets in the repo."""
1633 1626
1634 1627 def __iter__():
1635 1628 """Iterate over revisions in the changelog."""
1636 1629
1637 1630 def revs(expr, *args):
1638 1631 """Evaluate a revset.
1639 1632
1640 1633 Emits revisions.
1641 1634 """
1642 1635
1643 1636 def set(expr, *args):
1644 1637 """Evaluate a revset.
1645 1638
1646 1639 Emits changectx instances.
1647 1640 """
1648 1641
1649 1642 def anyrevs(specs, user=False, localalias=None):
1650 1643 """Find revisions matching one of the given revsets."""
1651 1644
1652 1645 def url():
1653 1646 """Returns a string representing the location of this repo."""
1654 1647
1655 1648 def hook(name, throw=False, **args):
1656 1649 """Call a hook."""
1657 1650
1658 1651 def tags():
1659 1652 """Return a mapping of tag to node."""
1660 1653
1661 1654 def tagtype(tagname):
1662 1655 """Return the type of a given tag."""
1663 1656
1664 1657 def tagslist():
1665 1658 """Return a list of tags ordered by revision."""
1666 1659
1667 1660 def nodetags(node):
1668 1661 """Return the tags associated with a node."""
1669 1662
1670 1663 def nodebookmarks(node):
1671 1664 """Return the list of bookmarks pointing to the specified node."""
1672 1665
1673 1666 def branchmap():
1674 1667 """Return a mapping of branch to heads in that branch."""
1675 1668
1676 1669 def revbranchcache():
1677 1670 pass
1678 1671
1679 1672 def register_changeset(rev, changelogrevision):
1680 1673 """Extension point for caches for new nodes.
1681 1674
1682 1675 Multiple consumers are expected to need parts of the changelogrevision,
1683 1676 so it is provided as optimization to avoid duplicate lookups. A simple
1684 1677 cache would be fragile when other revisions are accessed, too."""
1685 1678 pass
1686 1679
1687 1680 def branchtip(branchtip, ignoremissing=False):
1688 1681 """Return the tip node for a given branch."""
1689 1682
1690 1683 def lookup(key):
1691 1684 """Resolve the node for a revision."""
1692 1685
1693 1686 def lookupbranch(key):
1694 1687 """Look up the branch name of the given revision or branch name."""
1695 1688
1696 1689 def known(nodes):
1697 1690 """Determine whether a series of nodes is known.
1698 1691
1699 1692 Returns a list of bools.
1700 1693 """
1701 1694
1702 1695 def local():
1703 1696 """Whether the repository is local."""
1704 1697 return True
1705 1698
1706 1699 def publishing():
1707 1700 """Whether the repository is a publishing repository."""
1708 1701
1709 1702 def cancopy():
1710 1703 pass
1711 1704
1712 1705 def shared():
1713 1706 """The type of shared repository or None."""
1714 1707
1715 1708 def wjoin(f, *insidef):
1716 1709 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1717 1710
1718 1711 def setparents(p1, p2):
1719 1712 """Set the parent nodes of the working directory."""
1720 1713
1721 1714 def filectx(path, changeid=None, fileid=None):
1722 1715 """Obtain a filectx for the given file revision."""
1723 1716
1724 1717 def getcwd():
1725 1718 """Obtain the current working directory from the dirstate."""
1726 1719
1727 1720 def pathto(f, cwd=None):
1728 1721 """Obtain the relative path to a file."""
1729 1722
1730 1723 def adddatafilter(name, fltr):
1731 1724 pass
1732 1725
1733 1726 def wread(filename):
1734 1727 """Read a file from wvfs, using data filters."""
1735 1728
1736 1729 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1737 1730 """Write data to a file in the wvfs, using data filters."""
1738 1731
1739 1732 def wwritedata(filename, data):
1740 1733 """Resolve data for writing to the wvfs, using data filters."""
1741 1734
1742 1735 def currenttransaction():
1743 1736 """Obtain the current transaction instance or None."""
1744 1737
1745 1738 def transaction(desc, report=None):
1746 1739 """Open a new transaction to write to the repository."""
1747 1740
1748 1741 def undofiles():
1749 1742 """Returns a list of (vfs, path) for files to undo transactions."""
1750 1743
1751 1744 def recover():
1752 1745 """Roll back an interrupted transaction."""
1753 1746
1754 1747 def rollback(dryrun=False, force=False):
1755 1748 """Undo the last transaction.
1756 1749
1757 1750 DANGEROUS.
1758 1751 """
1759 1752
1760 1753 def updatecaches(tr=None, full=False):
1761 1754 """Warm repo caches."""
1762 1755
1763 1756 def invalidatecaches():
1764 1757 """Invalidate cached data due to the repository mutating."""
1765 1758
1766 1759 def invalidatevolatilesets():
1767 1760 pass
1768 1761
1769 1762 def invalidatedirstate():
1770 1763 """Invalidate the dirstate."""
1771 1764
1772 1765 def invalidate(clearfilecache=False):
1773 1766 pass
1774 1767
1775 1768 def invalidateall():
1776 1769 pass
1777 1770
1778 1771 def lock(wait=True):
1779 1772 """Lock the repository store and return a lock instance."""
1780 1773
1781 1774 def wlock(wait=True):
1782 1775 """Lock the non-store parts of the repository."""
1783 1776
1784 1777 def currentwlock():
1785 1778 """Return the wlock if it's held or None."""
1786 1779
1787 1780 def checkcommitpatterns(wctx, match, status, fail):
1788 1781 pass
1789 1782
1790 1783 def commit(
1791 1784 text=b'',
1792 1785 user=None,
1793 1786 date=None,
1794 1787 match=None,
1795 1788 force=False,
1796 1789 editor=False,
1797 1790 extra=None,
1798 1791 ):
1799 1792 """Add a new revision to the repository."""
1800 1793
1801 1794 def commitctx(ctx, error=False, origctx=None):
1802 1795 """Commit a commitctx instance to the repository."""
1803 1796
1804 1797 def destroying():
1805 1798 """Inform the repository that nodes are about to be destroyed."""
1806 1799
1807 1800 def destroyed():
1808 1801 """Inform the repository that nodes have been destroyed."""
1809 1802
1810 1803 def status(
1811 1804 node1=b'.',
1812 1805 node2=None,
1813 1806 match=None,
1814 1807 ignored=False,
1815 1808 clean=False,
1816 1809 unknown=False,
1817 1810 listsubrepos=False,
1818 1811 ):
1819 1812 """Convenience method to call repo[x].status()."""
1820 1813
1821 1814 def addpostdsstatus(ps):
1822 1815 pass
1823 1816
1824 1817 def postdsstatus():
1825 1818 pass
1826 1819
1827 1820 def clearpostdsstatus():
1828 1821 pass
1829 1822
1830 1823 def heads(start=None):
1831 1824 """Obtain list of nodes that are DAG heads."""
1832 1825
1833 1826 def branchheads(branch=None, start=None, closed=False):
1834 1827 pass
1835 1828
1836 1829 def branches(nodes):
1837 1830 pass
1838 1831
1839 1832 def between(pairs):
1840 1833 pass
1841 1834
1842 1835 def checkpush(pushop):
1843 1836 pass
1844 1837
1845 1838 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1846 1839
1847 1840 def pushkey(namespace, key, old, new):
1848 1841 pass
1849 1842
1850 1843 def listkeys(namespace):
1851 1844 pass
1852 1845
1853 1846 def debugwireargs(one, two, three=None, four=None, five=None):
1854 1847 pass
1855 1848
1856 1849 def savecommitmessage(text):
1857 1850 pass
1858 1851
1859 1852 def register_sidedata_computer(
1860 1853 kind, category, keys, computer, flags, replace=False
1861 1854 ):
1862 1855 pass
1863 1856
1864 1857 def register_wanted_sidedata(category):
1865 1858 pass
1866 1859
1867 1860
1868 1861 class completelocalrepository(
1869 1862 ilocalrepositorymain, ilocalrepositoryfilestorage
1870 1863 ):
1871 1864 """Complete interface for a local repository."""
1872 1865
1873 1866
1874 1867 class iwireprotocolcommandcacher(interfaceutil.Interface):
1875 1868 """Represents a caching backend for wire protocol commands.
1876 1869
1877 1870 Wire protocol version 2 supports transparent caching of many commands.
1878 1871 To leverage this caching, servers can activate objects that cache
1879 1872 command responses. Objects handle both cache writing and reading.
1880 1873 This interface defines how that response caching mechanism works.
1881 1874
1882 1875 Wire protocol version 2 commands emit a series of objects that are
1883 1876 serialized and sent to the client. The caching layer exists between
1884 1877 the invocation of the command function and the sending of its output
1885 1878 objects to an output layer.
1886 1879
1887 1880 Instances of this interface represent a binding to a cache that
1888 1881 can serve a response (in place of calling a command function) and/or
1889 1882 write responses to a cache for subsequent use.
1890 1883
1891 1884 When a command request arrives, the following happens with regards
1892 1885 to this interface:
1893 1886
1894 1887 1. The server determines whether the command request is cacheable.
1895 1888 2. If it is, an instance of this interface is spawned.
1896 1889 3. The cacher is activated in a context manager (``__enter__`` is called).
1897 1890 4. A cache *key* for that request is derived. This will call the
1898 1891 instance's ``adjustcachekeystate()`` method so the derivation
1899 1892 can be influenced.
1900 1893 5. The cacher is informed of the derived cache key via a call to
1901 1894 ``setcachekey()``.
1902 1895 6. The cacher's ``lookup()`` method is called to test for presence of
1903 1896 the derived key in the cache.
1904 1897 7. If ``lookup()`` returns a hit, that cached result is used in place
1905 1898 of invoking the command function. ``__exit__`` is called and the instance
1906 1899 is discarded.
1907 1900 8. The command function is invoked.
1908 1901 9. ``onobject()`` is called for each object emitted by the command
1909 1902 function.
1910 1903 10. After the final object is seen, ``onfinished()`` is called.
1911 1904 11. ``__exit__`` is called to signal the end of use of the instance.
1912 1905
1913 1906 Cache *key* derivation can be influenced by the instance.
1914 1907
1915 1908 Cache keys are initially derived by a deterministic representation of
1916 1909 the command request. This includes the command name, arguments, protocol
1917 1910 version, etc. This initial key derivation is performed by CBOR-encoding a
1918 1911 data structure and feeding that output into a hasher.
1919 1912
1920 1913 Instances of this interface can influence this initial key derivation
1921 1914 via ``adjustcachekeystate()``.
1922 1915
1923 1916 The instance is informed of the derived cache key via a call to
1924 1917 ``setcachekey()``. The instance must store the key locally so it can
1925 1918 be consulted on subsequent operations that may require it.
1926 1919
1927 1920 When constructed, the instance has access to a callable that can be used
1928 1921 for encoding response objects. This callable receives as its single
1929 1922 argument an object emitted by a command function. It returns an iterable
1930 1923 of bytes chunks representing the encoded object. Unless the cacher is
1931 1924 caching native Python objects in memory or has a way of reconstructing
1932 1925 the original Python objects, implementations typically call this function
1933 1926 to produce bytes from the output objects and then store those bytes in
1934 1927 the cache. When it comes time to re-emit those bytes, they are wrapped
1935 1928 in a ``wireprototypes.encodedresponse`` instance to tell the output
1936 1929 layer that they are pre-encoded.
1937 1930
1938 1931 When receiving the objects emitted by the command function, instances
1939 1932 can choose what to do with those objects. The simplest thing to do is
1940 1933 re-emit the original objects. They will be forwarded to the output
1941 1934 layer and will be processed as if the cacher did not exist.
1942 1935
1943 1936 Implementations could also choose to not emit objects - instead locally
1944 1937 buffering objects or their encoded representation. They could then emit
1945 1938 a single "coalesced" object when ``onfinished()`` is called. In
1946 1939 this way, the implementation would function as a filtering layer of
1947 1940 sorts.
1948 1941
1949 1942 When caching objects, typically the encoded form of the object will
1950 1943 be stored. Keep in mind that if the original object is forwarded to
1951 1944 the output layer, it will need to be encoded there as well. For large
1952 1945 output, this redundant encoding could add overhead. Implementations
1953 1946 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1954 1947 instances to avoid this overhead.
1955 1948 """
1956 1949
1957 1950 def __enter__():
1958 1951 """Marks the instance as active.
1959 1952
1960 1953 Should return self.
1961 1954 """
1962 1955
1963 1956 def __exit__(exctype, excvalue, exctb):
1964 1957 """Called when cacher is no longer used.
1965 1958
1966 1959 This can be used by implementations to perform cleanup actions (e.g.
1967 1960 disconnecting network sockets, aborting a partially cached response.
1968 1961 """
1969 1962
1970 1963 def adjustcachekeystate(state):
1971 1964 """Influences cache key derivation by adjusting state to derive key.
1972 1965
1973 1966 A dict defining the state used to derive the cache key is passed.
1974 1967
1975 1968 Implementations can modify this dict to record additional state that
1976 1969 is wanted to influence key derivation.
1977 1970
1978 1971 Implementations are *highly* encouraged to not modify or delete
1979 1972 existing keys.
1980 1973 """
1981 1974
1982 1975 def setcachekey(key):
1983 1976 """Record the derived cache key for this request.
1984 1977
1985 1978 Instances may mutate the key for internal usage, as desired. e.g.
1986 1979 instances may wish to prepend the repo name, introduce path
1987 1980 components for filesystem or URL addressing, etc. Behavior is up to
1988 1981 the cache.
1989 1982
1990 1983 Returns a bool indicating if the request is cacheable by this
1991 1984 instance.
1992 1985 """
1993 1986
1994 1987 def lookup():
1995 1988 """Attempt to resolve an entry in the cache.
1996 1989
1997 1990 The instance is instructed to look for the cache key that it was
1998 1991 informed about via the call to ``setcachekey()``.
1999 1992
2000 1993 If there's no cache hit or the cacher doesn't wish to use the cached
2001 1994 entry, ``None`` should be returned.
2002 1995
2003 1996 Else, a dict defining the cached result should be returned. The
2004 1997 dict may have the following keys:
2005 1998
2006 1999 objs
2007 2000 An iterable of objects that should be sent to the client. That
2008 2001 iterable of objects is expected to be what the command function
2009 2002 would return if invoked or an equivalent representation thereof.
2010 2003 """
2011 2004
2012 2005 def onobject(obj):
2013 2006 """Called when a new object is emitted from the command function.
2014 2007
2015 2008 Receives as its argument the object that was emitted from the
2016 2009 command function.
2017 2010
2018 2011 This method returns an iterator of objects to forward to the output
2019 2012 layer. The easiest implementation is a generator that just
2020 2013 ``yield obj``.
2021 2014 """
2022 2015
2023 2016 def onfinished():
2024 2017 """Called after all objects have been emitted from the command function.
2025 2018
2026 2019 Implementations should return an iterator of objects to forward to
2027 2020 the output layer.
2028 2021
2029 2022 This method can be a generator.
2030 2023 """
@@ -1,2385 +1,2384
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullrev,
20 20 )
21 21 from .pycompat import getattr
22 22 from . import (
23 23 encoding,
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 pathutil,
28 28 policy,
29 29 pycompat,
30 30 revlog,
31 31 util,
32 32 )
33 33 from .interfaces import (
34 34 repository,
35 35 util as interfaceutil,
36 36 )
37 37 from .revlogutils import (
38 38 constants as revlog_constants,
39 39 )
40 40
41 41 parsers = policy.importmod('parsers')
42 42 propertycache = util.propertycache
43 43
44 44 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
45 45 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
46 46
47 47
48 48 def _parse(nodelen, data):
49 49 # This method does a little bit of excessive-looking
50 50 # precondition checking. This is so that the behavior of this
51 51 # class exactly matches its C counterpart to try and help
52 52 # prevent surprise breakage for anyone that develops against
53 53 # the pure version.
54 54 if data and data[-1:] != b'\n':
55 55 raise ValueError(b'Manifest did not end in a newline.')
56 56 prev = None
57 57 for l in data.splitlines():
58 58 if prev is not None and prev > l:
59 59 raise ValueError(b'Manifest lines not in sorted order.')
60 60 prev = l
61 61 f, n = l.split(b'\0')
62 62 nl = len(n)
63 63 flags = n[-1:]
64 64 if flags in _manifestflags:
65 65 n = n[:-1]
66 66 nl -= 1
67 67 else:
68 68 flags = b''
69 69 if nl != 2 * nodelen:
70 70 raise ValueError(b'Invalid manifest line')
71 71
72 72 yield f, bin(n), flags
73 73
74 74
75 75 def _text(it):
76 76 files = []
77 77 lines = []
78 78 for f, n, fl in it:
79 79 files.append(f)
80 80 # if this is changed to support newlines in filenames,
81 81 # be sure to check the templates/ dir again (especially *-raw.tmpl)
82 82 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
83 83
84 84 _checkforbidden(files)
85 85 return b''.join(lines)
86 86
87 87
88 88 class lazymanifestiter(object):
89 89 def __init__(self, lm):
90 90 self.pos = 0
91 91 self.lm = lm
92 92
93 93 def __iter__(self):
94 94 return self
95 95
96 96 def next(self):
97 97 try:
98 98 data, pos = self.lm._get(self.pos)
99 99 except IndexError:
100 100 raise StopIteration
101 101 if pos == -1:
102 102 self.pos += 1
103 103 return data[0]
104 104 self.pos += 1
105 105 zeropos = data.find(b'\x00', pos)
106 106 return data[pos:zeropos]
107 107
108 108 __next__ = next
109 109
110 110
111 111 class lazymanifestiterentries(object):
112 112 def __init__(self, lm):
113 113 self.lm = lm
114 114 self.pos = 0
115 115
116 116 def __iter__(self):
117 117 return self
118 118
119 119 def next(self):
120 120 try:
121 121 data, pos = self.lm._get(self.pos)
122 122 except IndexError:
123 123 raise StopIteration
124 124 if pos == -1:
125 125 self.pos += 1
126 126 return data
127 127 zeropos = data.find(b'\x00', pos)
128 128 nlpos = data.find(b'\n', pos)
129 129 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
130 130 raise error.StorageError(b'Invalid manifest line')
131 131 flags = data[nlpos - 1 : nlpos]
132 132 if flags in _manifestflags:
133 133 hlen = nlpos - zeropos - 2
134 134 else:
135 135 hlen = nlpos - zeropos - 1
136 136 flags = b''
137 137 if hlen != 2 * self.lm._nodelen:
138 138 raise error.StorageError(b'Invalid manifest line')
139 139 hashval = unhexlify(
140 140 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
141 141 )
142 142 self.pos += 1
143 143 return (data[pos:zeropos], hashval, flags)
144 144
145 145 __next__ = next
146 146
147 147
148 148 def unhexlify(data, extra, pos, length):
149 149 s = bin(data[pos : pos + length])
150 150 if extra:
151 151 s += chr(extra & 0xFF)
152 152 return s
153 153
154 154
155 155 def _cmp(a, b):
156 156 return (a > b) - (a < b)
157 157
158 158
159 159 _manifestflags = {b'', b'l', b't', b'x'}
160 160
161 161
162 162 class _lazymanifest(object):
163 163 """A pure python manifest backed by a byte string. It is supplimented with
164 164 internal lists as it is modified, until it is compacted back to a pure byte
165 165 string.
166 166
167 167 ``data`` is the initial manifest data.
168 168
169 169 ``positions`` is a list of offsets, one per manifest entry. Positive
170 170 values are offsets into ``data``, negative values are offsets into the
171 171 ``extradata`` list. When an entry is removed, its entry is dropped from
172 172 ``positions``. The values are encoded such that when walking the list and
173 173 indexing into ``data`` or ``extradata`` as appropriate, the entries are
174 174 sorted by filename.
175 175
176 176 ``extradata`` is a list of (key, hash, flags) for entries that were added or
177 177 modified since the manifest was created or compacted.
178 178 """
179 179
180 180 def __init__(
181 181 self,
182 182 nodelen,
183 183 data,
184 184 positions=None,
185 185 extrainfo=None,
186 186 extradata=None,
187 187 hasremovals=False,
188 188 ):
189 189 self._nodelen = nodelen
190 190 if positions is None:
191 191 self.positions = self.findlines(data)
192 192 self.extrainfo = [0] * len(self.positions)
193 193 self.data = data
194 194 self.extradata = []
195 195 self.hasremovals = False
196 196 else:
197 197 self.positions = positions[:]
198 198 self.extrainfo = extrainfo[:]
199 199 self.extradata = extradata[:]
200 200 self.data = data
201 201 self.hasremovals = hasremovals
202 202
203 203 def findlines(self, data):
204 204 if not data:
205 205 return []
206 206 pos = data.find(b"\n")
207 207 if pos == -1 or data[-1:] != b'\n':
208 208 raise ValueError(b"Manifest did not end in a newline.")
209 209 positions = [0]
210 210 prev = data[: data.find(b'\x00')]
211 211 while pos < len(data) - 1 and pos != -1:
212 212 positions.append(pos + 1)
213 213 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
214 214 if nexts < prev:
215 215 raise ValueError(b"Manifest lines not in sorted order.")
216 216 prev = nexts
217 217 pos = data.find(b"\n", pos + 1)
218 218 return positions
219 219
220 220 def _get(self, index):
221 221 # get the position encoded in pos:
222 222 # positive number is an index in 'data'
223 223 # negative number is in extrapieces
224 224 pos = self.positions[index]
225 225 if pos >= 0:
226 226 return self.data, pos
227 227 return self.extradata[-pos - 1], -1
228 228
229 229 def _getkey(self, pos):
230 230 if pos >= 0:
231 231 return self.data[pos : self.data.find(b'\x00', pos + 1)]
232 232 return self.extradata[-pos - 1][0]
233 233
234 234 def bsearch(self, key):
235 235 first = 0
236 236 last = len(self.positions) - 1
237 237
238 238 while first <= last:
239 239 midpoint = (first + last) // 2
240 240 nextpos = self.positions[midpoint]
241 241 candidate = self._getkey(nextpos)
242 242 r = _cmp(key, candidate)
243 243 if r == 0:
244 244 return midpoint
245 245 else:
246 246 if r < 0:
247 247 last = midpoint - 1
248 248 else:
249 249 first = midpoint + 1
250 250 return -1
251 251
252 252 def bsearch2(self, key):
253 253 # same as the above, but will always return the position
254 254 # done for performance reasons
255 255 first = 0
256 256 last = len(self.positions) - 1
257 257
258 258 while first <= last:
259 259 midpoint = (first + last) // 2
260 260 nextpos = self.positions[midpoint]
261 261 candidate = self._getkey(nextpos)
262 262 r = _cmp(key, candidate)
263 263 if r == 0:
264 264 return (midpoint, True)
265 265 else:
266 266 if r < 0:
267 267 last = midpoint - 1
268 268 else:
269 269 first = midpoint + 1
270 270 return (first, False)
271 271
272 272 def __contains__(self, key):
273 273 return self.bsearch(key) != -1
274 274
275 275 def __getitem__(self, key):
276 276 if not isinstance(key, bytes):
277 277 raise TypeError(b"getitem: manifest keys must be a bytes.")
278 278 needle = self.bsearch(key)
279 279 if needle == -1:
280 280 raise KeyError
281 281 data, pos = self._get(needle)
282 282 if pos == -1:
283 283 return (data[1], data[2])
284 284 zeropos = data.find(b'\x00', pos)
285 285 nlpos = data.find(b'\n', zeropos)
286 286 assert 0 <= needle <= len(self.positions)
287 287 assert len(self.extrainfo) == len(self.positions)
288 288 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
289 289 raise error.StorageError(b'Invalid manifest line')
290 290 hlen = nlpos - zeropos - 1
291 291 flags = data[nlpos - 1 : nlpos]
292 292 if flags in _manifestflags:
293 293 hlen -= 1
294 294 else:
295 295 flags = b''
296 296 if hlen != 2 * self._nodelen:
297 297 raise error.StorageError(b'Invalid manifest line')
298 298 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
299 299 return (hashval, flags)
300 300
301 301 def __delitem__(self, key):
302 302 needle, found = self.bsearch2(key)
303 303 if not found:
304 304 raise KeyError
305 305 cur = self.positions[needle]
306 306 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
307 307 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
308 308 if cur >= 0:
309 309 # This does NOT unsort the list as far as the search functions are
310 310 # concerned, as they only examine lines mapped by self.positions.
311 311 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
312 312 self.hasremovals = True
313 313
314 314 def __setitem__(self, key, value):
315 315 if not isinstance(key, bytes):
316 316 raise TypeError(b"setitem: manifest keys must be a byte string.")
317 317 if not isinstance(value, tuple) or len(value) != 2:
318 318 raise TypeError(
319 319 b"Manifest values must be a tuple of (node, flags)."
320 320 )
321 321 hashval = value[0]
322 322 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
323 323 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
324 324 flags = value[1]
325 325 if not isinstance(flags, bytes) or len(flags) > 1:
326 326 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
327 327 needle, found = self.bsearch2(key)
328 328 if found:
329 329 # put the item
330 330 pos = self.positions[needle]
331 331 if pos < 0:
332 332 self.extradata[-pos - 1] = (key, hashval, value[1])
333 333 else:
334 334 # just don't bother
335 335 self.extradata.append((key, hashval, value[1]))
336 336 self.positions[needle] = -len(self.extradata)
337 337 else:
338 338 # not found, put it in with extra positions
339 339 self.extradata.append((key, hashval, value[1]))
340 340 self.positions = (
341 341 self.positions[:needle]
342 342 + [-len(self.extradata)]
343 343 + self.positions[needle:]
344 344 )
345 345 self.extrainfo = (
346 346 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
347 347 )
348 348
349 349 def copy(self):
350 350 # XXX call _compact like in C?
351 351 return _lazymanifest(
352 352 self._nodelen,
353 353 self.data,
354 354 self.positions,
355 355 self.extrainfo,
356 356 self.extradata,
357 357 self.hasremovals,
358 358 )
359 359
360 360 def _compact(self):
361 361 # hopefully not called TOO often
362 362 if len(self.extradata) == 0 and not self.hasremovals:
363 363 return
364 364 l = []
365 365 i = 0
366 366 offset = 0
367 367 self.extrainfo = [0] * len(self.positions)
368 368 while i < len(self.positions):
369 369 if self.positions[i] >= 0:
370 370 cur = self.positions[i]
371 371 last_cut = cur
372 372
373 373 # Collect all contiguous entries in the buffer at the current
374 374 # offset, breaking out only for added/modified items held in
375 375 # extradata, or a deleted line prior to the next position.
376 376 while True:
377 377 self.positions[i] = offset
378 378 i += 1
379 379 if i == len(self.positions) or self.positions[i] < 0:
380 380 break
381 381
382 382 # A removed file has no positions[] entry, but does have an
383 383 # overwritten first byte. Break out and find the end of the
384 384 # current good entry/entries if there is a removed file
385 385 # before the next position.
386 386 if (
387 387 self.hasremovals
388 388 and self.data.find(b'\n\x00', cur, self.positions[i])
389 389 != -1
390 390 ):
391 391 break
392 392
393 393 offset += self.positions[i] - cur
394 394 cur = self.positions[i]
395 395 end_cut = self.data.find(b'\n', cur)
396 396 if end_cut != -1:
397 397 end_cut += 1
398 398 offset += end_cut - cur
399 399 l.append(self.data[last_cut:end_cut])
400 400 else:
401 401 while i < len(self.positions) and self.positions[i] < 0:
402 402 cur = self.positions[i]
403 403 t = self.extradata[-cur - 1]
404 404 l.append(self._pack(t))
405 405 self.positions[i] = offset
406 406 # Hashes are either 20 bytes (old sha1s) or 32
407 407 # bytes (new non-sha1).
408 408 hlen = 20
409 409 if len(t[1]) > 25:
410 410 hlen = 32
411 411 if len(t[1]) > hlen:
412 412 self.extrainfo[i] = ord(t[1][hlen + 1])
413 413 offset += len(l[-1])
414 414 i += 1
415 415 self.data = b''.join(l)
416 416 self.hasremovals = False
417 417 self.extradata = []
418 418
419 419 def _pack(self, d):
420 420 n = d[1]
421 421 assert len(n) in (20, 32)
422 422 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
423 423
424 424 def text(self):
425 425 self._compact()
426 426 return self.data
427 427
428 428 def diff(self, m2, clean=False):
429 429 '''Finds changes between the current manifest and m2.'''
430 430 # XXX think whether efficiency matters here
431 431 diff = {}
432 432
433 433 for fn, e1, flags in self.iterentries():
434 434 if fn not in m2:
435 435 diff[fn] = (e1, flags), (None, b'')
436 436 else:
437 437 e2 = m2[fn]
438 438 if (e1, flags) != e2:
439 439 diff[fn] = (e1, flags), e2
440 440 elif clean:
441 441 diff[fn] = None
442 442
443 443 for fn, e2, flags in m2.iterentries():
444 444 if fn not in self:
445 445 diff[fn] = (None, b''), (e2, flags)
446 446
447 447 return diff
448 448
449 449 def iterentries(self):
450 450 return lazymanifestiterentries(self)
451 451
452 452 def iterkeys(self):
453 453 return lazymanifestiter(self)
454 454
455 455 def __iter__(self):
456 456 return lazymanifestiter(self)
457 457
458 458 def __len__(self):
459 459 return len(self.positions)
460 460
461 461 def filtercopy(self, filterfn):
462 462 # XXX should be optimized
463 463 c = _lazymanifest(self._nodelen, b'')
464 464 for f, n, fl in self.iterentries():
465 465 if filterfn(f):
466 466 c[f] = n, fl
467 467 return c
468 468
469 469
470 470 try:
471 471 _lazymanifest = parsers.lazymanifest
472 472 except AttributeError:
473 473 pass
474 474
475 475
476 476 @interfaceutil.implementer(repository.imanifestdict)
477 477 class manifestdict(object):
478 478 def __init__(self, nodelen, data=b''):
479 479 self._nodelen = nodelen
480 480 self._lm = _lazymanifest(nodelen, data)
481 481
482 482 def __getitem__(self, key):
483 483 return self._lm[key][0]
484 484
485 485 def find(self, key):
486 486 return self._lm[key]
487 487
488 488 def __len__(self):
489 489 return len(self._lm)
490 490
491 491 def __nonzero__(self):
492 492 # nonzero is covered by the __len__ function, but implementing it here
493 493 # makes it easier for extensions to override.
494 494 return len(self._lm) != 0
495 495
496 496 __bool__ = __nonzero__
497 497
498 498 def __setitem__(self, key, node):
499 499 self._lm[key] = node, self.flags(key)
500 500
501 501 def __contains__(self, key):
502 502 if key is None:
503 503 return False
504 504 return key in self._lm
505 505
506 506 def __delitem__(self, key):
507 507 del self._lm[key]
508 508
509 509 def __iter__(self):
510 510 return self._lm.__iter__()
511 511
512 512 def iterkeys(self):
513 513 return self._lm.iterkeys()
514 514
515 515 def keys(self):
516 516 return list(self.iterkeys())
517 517
518 518 def filesnotin(self, m2, match=None):
519 519 '''Set of files in this manifest that are not in the other'''
520 520 if match is not None:
521 521 match = matchmod.badmatch(match, lambda path, msg: None)
522 522 sm2 = set(m2.walk(match))
523 523 return {f for f in self.walk(match) if f not in sm2}
524 524 return {f for f in self if f not in m2}
525 525
526 526 @propertycache
527 527 def _dirs(self):
528 528 return pathutil.dirs(self)
529 529
530 530 def dirs(self):
531 531 return self._dirs
532 532
533 533 def hasdir(self, dir):
534 534 return dir in self._dirs
535 535
536 536 def _filesfastpath(self, match):
537 537 """Checks whether we can correctly and quickly iterate over matcher
538 538 files instead of over manifest files."""
539 539 files = match.files()
540 540 return len(files) < 100 and (
541 541 match.isexact()
542 542 or (match.prefix() and all(fn in self for fn in files))
543 543 )
544 544
545 545 def walk(self, match):
546 546 """Generates matching file names.
547 547
548 548 Equivalent to manifest.matches(match).iterkeys(), but without creating
549 549 an entirely new manifest.
550 550
551 551 It also reports nonexistent files by marking them bad with match.bad().
552 552 """
553 553 if match.always():
554 554 for f in iter(self):
555 555 yield f
556 556 return
557 557
558 558 fset = set(match.files())
559 559
560 560 # avoid the entire walk if we're only looking for specific files
561 561 if self._filesfastpath(match):
562 562 for fn in sorted(fset):
563 563 if fn in self:
564 564 yield fn
565 565 return
566 566
567 567 for fn in self:
568 568 if fn in fset:
569 569 # specified pattern is the exact name
570 570 fset.remove(fn)
571 571 if match(fn):
572 572 yield fn
573 573
574 574 # for dirstate.walk, files=[''] means "walk the whole tree".
575 575 # follow that here, too
576 576 fset.discard(b'')
577 577
578 578 for fn in sorted(fset):
579 579 if not self.hasdir(fn):
580 580 match.bad(fn, None)
581 581
582 582 def _matches(self, match):
583 583 '''generate a new manifest filtered by the match argument'''
584 584 if match.always():
585 585 return self.copy()
586 586
587 587 if self._filesfastpath(match):
588 588 m = manifestdict(self._nodelen)
589 589 lm = self._lm
590 590 for fn in match.files():
591 591 if fn in lm:
592 592 m._lm[fn] = lm[fn]
593 593 return m
594 594
595 595 m = manifestdict(self._nodelen)
596 596 m._lm = self._lm.filtercopy(match)
597 597 return m
598 598
599 599 def diff(self, m2, match=None, clean=False):
600 600 """Finds changes between the current manifest and m2.
601 601
602 602 Args:
603 603 m2: the manifest to which this manifest should be compared.
604 604 clean: if true, include files unchanged between these manifests
605 605 with a None value in the returned dictionary.
606 606
607 607 The result is returned as a dict with filename as key and
608 608 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
609 609 nodeid in the current/other manifest and fl1/fl2 is the flag
610 610 in the current/other manifest. Where the file does not exist,
611 611 the nodeid will be None and the flags will be the empty
612 612 string.
613 613 """
614 614 if match:
615 615 m1 = self._matches(match)
616 616 m2 = m2._matches(match)
617 617 return m1.diff(m2, clean=clean)
618 618 return self._lm.diff(m2._lm, clean)
619 619
620 620 def setflag(self, key, flag):
621 621 if flag not in _manifestflags:
622 622 raise TypeError(b"Invalid manifest flag set.")
623 623 self._lm[key] = self[key], flag
624 624
625 625 def get(self, key, default=None):
626 626 try:
627 627 return self._lm[key][0]
628 628 except KeyError:
629 629 return default
630 630
631 631 def flags(self, key):
632 632 try:
633 633 return self._lm[key][1]
634 634 except KeyError:
635 635 return b''
636 636
637 637 def copy(self):
638 638 c = manifestdict(self._nodelen)
639 639 c._lm = self._lm.copy()
640 640 return c
641 641
642 642 def items(self):
643 643 return (x[:2] for x in self._lm.iterentries())
644 644
645 645 def iteritems(self):
646 646 return (x[:2] for x in self._lm.iterentries())
647 647
648 648 def iterentries(self):
649 649 return self._lm.iterentries()
650 650
651 651 def text(self):
652 652 # most likely uses native version
653 653 return self._lm.text()
654 654
655 655 def fastdelta(self, base, changes):
656 656 """Given a base manifest text as a bytearray and a list of changes
657 657 relative to that text, compute a delta that can be used by revlog.
658 658 """
659 659 delta = []
660 660 dstart = None
661 661 dend = None
662 662 dline = [b""]
663 663 start = 0
664 664 # zero copy representation of base as a buffer
665 665 addbuf = util.buffer(base)
666 666
667 667 changes = list(changes)
668 668 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
669 669 # start with a readonly loop that finds the offset of
670 670 # each line and creates the deltas
671 671 for f, todelete in changes:
672 672 # bs will either be the index of the item or the insert point
673 673 start, end = _msearch(addbuf, f, start)
674 674 if not todelete:
675 675 h, fl = self._lm[f]
676 676 l = b"%s\0%s%s\n" % (f, hex(h), fl)
677 677 else:
678 678 if start == end:
679 679 # item we want to delete was not found, error out
680 680 raise AssertionError(
681 681 _(b"failed to remove %s from manifest") % f
682 682 )
683 683 l = b""
684 684 if dstart is not None and dstart <= start and dend >= start:
685 685 if dend < end:
686 686 dend = end
687 687 if l:
688 688 dline.append(l)
689 689 else:
690 690 if dstart is not None:
691 691 delta.append([dstart, dend, b"".join(dline)])
692 692 dstart = start
693 693 dend = end
694 694 dline = [l]
695 695
696 696 if dstart is not None:
697 697 delta.append([dstart, dend, b"".join(dline)])
698 698 # apply the delta to the base, and get a delta for addrevision
699 699 deltatext, arraytext = _addlistdelta(base, delta)
700 700 else:
701 701 # For large changes, it's much cheaper to just build the text and
702 702 # diff it.
703 703 arraytext = bytearray(self.text())
704 704 deltatext = mdiff.textdiff(
705 705 util.buffer(base), util.buffer(arraytext)
706 706 )
707 707
708 708 return arraytext, deltatext
709 709
710 710
711 711 def _msearch(m, s, lo=0, hi=None):
712 712 """return a tuple (start, end) that says where to find s within m.
713 713
714 714 If the string is found m[start:end] are the line containing
715 715 that string. If start == end the string was not found and
716 716 they indicate the proper sorted insertion point.
717 717
718 718 m should be a buffer, a memoryview or a byte string.
719 719 s is a byte string"""
720 720
721 721 def advance(i, c):
722 722 while i < lenm and m[i : i + 1] != c:
723 723 i += 1
724 724 return i
725 725
726 726 if not s:
727 727 return (lo, lo)
728 728 lenm = len(m)
729 729 if not hi:
730 730 hi = lenm
731 731 while lo < hi:
732 732 mid = (lo + hi) // 2
733 733 start = mid
734 734 while start > 0 and m[start - 1 : start] != b'\n':
735 735 start -= 1
736 736 end = advance(start, b'\0')
737 737 if bytes(m[start:end]) < s:
738 738 # we know that after the null there are 40 bytes of sha1
739 739 # this translates to the bisect lo = mid + 1
740 740 lo = advance(end + 40, b'\n') + 1
741 741 else:
742 742 # this translates to the bisect hi = mid
743 743 hi = start
744 744 end = advance(lo, b'\0')
745 745 found = m[lo:end]
746 746 if s == found:
747 747 # we know that after the null there are 40 bytes of sha1
748 748 end = advance(end + 40, b'\n')
749 749 return (lo, end + 1)
750 750 else:
751 751 return (lo, lo)
752 752
753 753
754 754 def _checkforbidden(l):
755 755 """Check filenames for illegal characters."""
756 756 for f in l:
757 757 if b'\n' in f or b'\r' in f:
758 758 raise error.StorageError(
759 759 _(b"'\\n' and '\\r' disallowed in filenames: %r")
760 760 % pycompat.bytestr(f)
761 761 )
762 762
763 763
764 764 # apply the changes collected during the bisect loop to our addlist
765 765 # return a delta suitable for addrevision
766 766 def _addlistdelta(addlist, x):
767 767 # for large addlist arrays, building a new array is cheaper
768 768 # than repeatedly modifying the existing one
769 769 currentposition = 0
770 770 newaddlist = bytearray()
771 771
772 772 for start, end, content in x:
773 773 newaddlist += addlist[currentposition:start]
774 774 if content:
775 775 newaddlist += bytearray(content)
776 776
777 777 currentposition = end
778 778
779 779 newaddlist += addlist[currentposition:]
780 780
781 781 deltatext = b"".join(
782 782 struct.pack(b">lll", start, end, len(content)) + content
783 783 for start, end, content in x
784 784 )
785 785 return deltatext, newaddlist
786 786
787 787
788 788 def _splittopdir(f):
789 789 if b'/' in f:
790 790 dir, subpath = f.split(b'/', 1)
791 791 return dir + b'/', subpath
792 792 else:
793 793 return b'', f
794 794
795 795
796 796 _noop = lambda s: None
797 797
798 798
799 799 @interfaceutil.implementer(repository.imanifestdict)
800 800 class treemanifest(object):
801 801 def __init__(self, nodeconstants, dir=b'', text=b''):
802 802 self._dir = dir
803 803 self.nodeconstants = nodeconstants
804 804 self._node = self.nodeconstants.nullid
805 805 self._nodelen = self.nodeconstants.nodelen
806 806 self._loadfunc = _noop
807 807 self._copyfunc = _noop
808 808 self._dirty = False
809 809 self._dirs = {}
810 810 self._lazydirs = {}
811 811 # Using _lazymanifest here is a little slower than plain old dicts
812 812 self._files = {}
813 813 self._flags = {}
814 814 if text:
815 815
816 816 def readsubtree(subdir, subm):
817 817 raise AssertionError(
818 818 b'treemanifest constructor only accepts flat manifests'
819 819 )
820 820
821 821 self.parse(text, readsubtree)
822 822 self._dirty = True # Mark flat manifest dirty after parsing
823 823
824 824 def _subpath(self, path):
825 825 return self._dir + path
826 826
827 827 def _loadalllazy(self):
828 828 selfdirs = self._dirs
829 829 subpath = self._subpath
830 830 for d, (node, readsubtree, docopy) in pycompat.iteritems(
831 831 self._lazydirs
832 832 ):
833 833 if docopy:
834 834 selfdirs[d] = readsubtree(subpath(d), node).copy()
835 835 else:
836 836 selfdirs[d] = readsubtree(subpath(d), node)
837 837 self._lazydirs = {}
838 838
839 839 def _loadlazy(self, d):
840 840 v = self._lazydirs.get(d)
841 841 if v:
842 842 node, readsubtree, docopy = v
843 843 if docopy:
844 844 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
845 845 else:
846 846 self._dirs[d] = readsubtree(self._subpath(d), node)
847 847 del self._lazydirs[d]
848 848
849 849 def _loadchildrensetlazy(self, visit):
850 850 if not visit:
851 851 return None
852 852 if visit == b'all' or visit == b'this':
853 853 self._loadalllazy()
854 854 return None
855 855
856 856 loadlazy = self._loadlazy
857 857 for k in visit:
858 858 loadlazy(k + b'/')
859 859 return visit
860 860
861 861 def _loaddifflazy(self, t1, t2):
862 862 """load items in t1 and t2 if they're needed for diffing.
863 863
864 864 The criteria currently is:
865 865 - if it's not present in _lazydirs in either t1 or t2, load it in the
866 866 other (it may already be loaded or it may not exist, doesn't matter)
867 867 - if it's present in _lazydirs in both, compare the nodeid; if it
868 868 differs, load it in both
869 869 """
870 870 toloadlazy = []
871 871 for d, v1 in pycompat.iteritems(t1._lazydirs):
872 872 v2 = t2._lazydirs.get(d)
873 873 if not v2 or v2[0] != v1[0]:
874 874 toloadlazy.append(d)
875 875 for d, v1 in pycompat.iteritems(t2._lazydirs):
876 876 if d not in t1._lazydirs:
877 877 toloadlazy.append(d)
878 878
879 879 for d in toloadlazy:
880 880 t1._loadlazy(d)
881 881 t2._loadlazy(d)
882 882
883 883 def __len__(self):
884 884 self._load()
885 885 size = len(self._files)
886 886 self._loadalllazy()
887 887 for m in self._dirs.values():
888 888 size += m.__len__()
889 889 return size
890 890
891 891 def __nonzero__(self):
892 892 # Faster than "__len() != 0" since it avoids loading sub-manifests
893 893 return not self._isempty()
894 894
895 895 __bool__ = __nonzero__
896 896
897 897 def _isempty(self):
898 898 self._load() # for consistency; already loaded by all callers
899 899 # See if we can skip loading everything.
900 900 if self._files or (
901 901 self._dirs and any(not m._isempty() for m in self._dirs.values())
902 902 ):
903 903 return False
904 904 self._loadalllazy()
905 905 return not self._dirs or all(m._isempty() for m in self._dirs.values())
906 906
907 907 @encoding.strmethod
908 908 def __repr__(self):
909 909 return (
910 910 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
911 911 % (
912 912 self._dir,
913 913 hex(self._node),
914 914 bool(self._loadfunc is _noop),
915 915 self._dirty,
916 916 id(self),
917 917 )
918 918 )
919 919
920 920 def dir(self):
921 921 """The directory that this tree manifest represents, including a
922 922 trailing '/'. Empty string for the repo root directory."""
923 923 return self._dir
924 924
925 925 def node(self):
926 926 """This node of this instance. nullid for unsaved instances. Should
927 927 be updated when the instance is read or written from a revlog.
928 928 """
929 929 assert not self._dirty
930 930 return self._node
931 931
932 932 def setnode(self, node):
933 933 self._node = node
934 934 self._dirty = False
935 935
936 936 def iterentries(self):
937 937 self._load()
938 938 self._loadalllazy()
939 939 for p, n in sorted(
940 940 itertools.chain(self._dirs.items(), self._files.items())
941 941 ):
942 942 if p in self._files:
943 943 yield self._subpath(p), n, self._flags.get(p, b'')
944 944 else:
945 945 for x in n.iterentries():
946 946 yield x
947 947
948 948 def items(self):
949 949 self._load()
950 950 self._loadalllazy()
951 951 for p, n in sorted(
952 952 itertools.chain(self._dirs.items(), self._files.items())
953 953 ):
954 954 if p in self._files:
955 955 yield self._subpath(p), n
956 956 else:
957 957 for f, sn in pycompat.iteritems(n):
958 958 yield f, sn
959 959
960 960 iteritems = items
961 961
962 962 def iterkeys(self):
963 963 self._load()
964 964 self._loadalllazy()
965 965 for p in sorted(itertools.chain(self._dirs, self._files)):
966 966 if p in self._files:
967 967 yield self._subpath(p)
968 968 else:
969 969 for f in self._dirs[p]:
970 970 yield f
971 971
972 972 def keys(self):
973 973 return list(self.iterkeys())
974 974
975 975 def __iter__(self):
976 976 return self.iterkeys()
977 977
978 978 def __contains__(self, f):
979 979 if f is None:
980 980 return False
981 981 self._load()
982 982 dir, subpath = _splittopdir(f)
983 983 if dir:
984 984 self._loadlazy(dir)
985 985
986 986 if dir not in self._dirs:
987 987 return False
988 988
989 989 return self._dirs[dir].__contains__(subpath)
990 990 else:
991 991 return f in self._files
992 992
993 993 def get(self, f, default=None):
994 994 self._load()
995 995 dir, subpath = _splittopdir(f)
996 996 if dir:
997 997 self._loadlazy(dir)
998 998
999 999 if dir not in self._dirs:
1000 1000 return default
1001 1001 return self._dirs[dir].get(subpath, default)
1002 1002 else:
1003 1003 return self._files.get(f, default)
1004 1004
1005 1005 def __getitem__(self, f):
1006 1006 self._load()
1007 1007 dir, subpath = _splittopdir(f)
1008 1008 if dir:
1009 1009 self._loadlazy(dir)
1010 1010
1011 1011 return self._dirs[dir].__getitem__(subpath)
1012 1012 else:
1013 1013 return self._files[f]
1014 1014
1015 1015 def flags(self, f):
1016 1016 self._load()
1017 1017 dir, subpath = _splittopdir(f)
1018 1018 if dir:
1019 1019 self._loadlazy(dir)
1020 1020
1021 1021 if dir not in self._dirs:
1022 1022 return b''
1023 1023 return self._dirs[dir].flags(subpath)
1024 1024 else:
1025 1025 if f in self._lazydirs or f in self._dirs:
1026 1026 return b''
1027 1027 return self._flags.get(f, b'')
1028 1028
1029 1029 def find(self, f):
1030 1030 self._load()
1031 1031 dir, subpath = _splittopdir(f)
1032 1032 if dir:
1033 1033 self._loadlazy(dir)
1034 1034
1035 1035 return self._dirs[dir].find(subpath)
1036 1036 else:
1037 1037 return self._files[f], self._flags.get(f, b'')
1038 1038
1039 1039 def __delitem__(self, f):
1040 1040 self._load()
1041 1041 dir, subpath = _splittopdir(f)
1042 1042 if dir:
1043 1043 self._loadlazy(dir)
1044 1044
1045 1045 self._dirs[dir].__delitem__(subpath)
1046 1046 # If the directory is now empty, remove it
1047 1047 if self._dirs[dir]._isempty():
1048 1048 del self._dirs[dir]
1049 1049 else:
1050 1050 del self._files[f]
1051 1051 if f in self._flags:
1052 1052 del self._flags[f]
1053 1053 self._dirty = True
1054 1054
1055 1055 def __setitem__(self, f, n):
1056 1056 assert n is not None
1057 1057 self._load()
1058 1058 dir, subpath = _splittopdir(f)
1059 1059 if dir:
1060 1060 self._loadlazy(dir)
1061 1061 if dir not in self._dirs:
1062 1062 self._dirs[dir] = treemanifest(
1063 1063 self.nodeconstants, self._subpath(dir)
1064 1064 )
1065 1065 self._dirs[dir].__setitem__(subpath, n)
1066 1066 else:
1067 1067 # manifest nodes are either 20 bytes or 32 bytes,
1068 1068 # depending on the hash in use. Assert this as historically
1069 1069 # sometimes extra bytes were added.
1070 1070 assert len(n) in (20, 32)
1071 1071 self._files[f] = n
1072 1072 self._dirty = True
1073 1073
1074 1074 def _load(self):
1075 1075 if self._loadfunc is not _noop:
1076 1076 lf, self._loadfunc = self._loadfunc, _noop
1077 1077 lf(self)
1078 1078 elif self._copyfunc is not _noop:
1079 1079 cf, self._copyfunc = self._copyfunc, _noop
1080 1080 cf(self)
1081 1081
1082 1082 def setflag(self, f, flags):
1083 1083 """Set the flags (symlink, executable) for path f."""
1084 1084 if flags not in _manifestflags:
1085 1085 raise TypeError(b"Invalid manifest flag set.")
1086 1086 self._load()
1087 1087 dir, subpath = _splittopdir(f)
1088 1088 if dir:
1089 1089 self._loadlazy(dir)
1090 1090 if dir not in self._dirs:
1091 1091 self._dirs[dir] = treemanifest(
1092 1092 self.nodeconstants, self._subpath(dir)
1093 1093 )
1094 1094 self._dirs[dir].setflag(subpath, flags)
1095 1095 else:
1096 1096 self._flags[f] = flags
1097 1097 self._dirty = True
1098 1098
1099 1099 def copy(self):
1100 1100 copy = treemanifest(self.nodeconstants, self._dir)
1101 1101 copy._node = self._node
1102 1102 copy._dirty = self._dirty
1103 1103 if self._copyfunc is _noop:
1104 1104
1105 1105 def _copyfunc(s):
1106 1106 self._load()
1107 1107 s._lazydirs = {
1108 1108 d: (n, r, True)
1109 1109 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1110 1110 }
1111 1111 sdirs = s._dirs
1112 1112 for d, v in pycompat.iteritems(self._dirs):
1113 1113 sdirs[d] = v.copy()
1114 1114 s._files = dict.copy(self._files)
1115 1115 s._flags = dict.copy(self._flags)
1116 1116
1117 1117 if self._loadfunc is _noop:
1118 1118 _copyfunc(copy)
1119 1119 else:
1120 1120 copy._copyfunc = _copyfunc
1121 1121 else:
1122 1122 copy._copyfunc = self._copyfunc
1123 1123 return copy
1124 1124
1125 1125 def filesnotin(self, m2, match=None):
1126 1126 '''Set of files in this manifest that are not in the other'''
1127 1127 if match and not match.always():
1128 1128 m1 = self._matches(match)
1129 1129 m2 = m2._matches(match)
1130 1130 return m1.filesnotin(m2)
1131 1131
1132 1132 files = set()
1133 1133
1134 1134 def _filesnotin(t1, t2):
1135 1135 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1136 1136 return
1137 1137 t1._load()
1138 1138 t2._load()
1139 1139 self._loaddifflazy(t1, t2)
1140 1140 for d, m1 in pycompat.iteritems(t1._dirs):
1141 1141 if d in t2._dirs:
1142 1142 m2 = t2._dirs[d]
1143 1143 _filesnotin(m1, m2)
1144 1144 else:
1145 1145 files.update(m1.iterkeys())
1146 1146
1147 1147 for fn in t1._files:
1148 1148 if fn not in t2._files:
1149 1149 files.add(t1._subpath(fn))
1150 1150
1151 1151 _filesnotin(self, m2)
1152 1152 return files
1153 1153
1154 1154 @propertycache
1155 1155 def _alldirs(self):
1156 1156 return pathutil.dirs(self)
1157 1157
1158 1158 def dirs(self):
1159 1159 return self._alldirs
1160 1160
1161 1161 def hasdir(self, dir):
1162 1162 self._load()
1163 1163 topdir, subdir = _splittopdir(dir)
1164 1164 if topdir:
1165 1165 self._loadlazy(topdir)
1166 1166 if topdir in self._dirs:
1167 1167 return self._dirs[topdir].hasdir(subdir)
1168 1168 return False
1169 1169 dirslash = dir + b'/'
1170 1170 return dirslash in self._dirs or dirslash in self._lazydirs
1171 1171
1172 1172 def walk(self, match):
1173 1173 """Generates matching file names.
1174 1174
1175 1175 It also reports nonexistent files by marking them bad with match.bad().
1176 1176 """
1177 1177 if match.always():
1178 1178 for f in iter(self):
1179 1179 yield f
1180 1180 return
1181 1181
1182 1182 fset = set(match.files())
1183 1183
1184 1184 for fn in self._walk(match):
1185 1185 if fn in fset:
1186 1186 # specified pattern is the exact name
1187 1187 fset.remove(fn)
1188 1188 yield fn
1189 1189
1190 1190 # for dirstate.walk, files=[''] means "walk the whole tree".
1191 1191 # follow that here, too
1192 1192 fset.discard(b'')
1193 1193
1194 1194 for fn in sorted(fset):
1195 1195 if not self.hasdir(fn):
1196 1196 match.bad(fn, None)
1197 1197
1198 1198 def _walk(self, match):
1199 1199 '''Recursively generates matching file names for walk().'''
1200 1200 visit = match.visitchildrenset(self._dir[:-1])
1201 1201 if not visit:
1202 1202 return
1203 1203
1204 1204 # yield this dir's files and walk its submanifests
1205 1205 self._load()
1206 1206 visit = self._loadchildrensetlazy(visit)
1207 1207 for p in sorted(list(self._dirs) + list(self._files)):
1208 1208 if p in self._files:
1209 1209 fullp = self._subpath(p)
1210 1210 if match(fullp):
1211 1211 yield fullp
1212 1212 else:
1213 1213 if not visit or p[:-1] in visit:
1214 1214 for f in self._dirs[p]._walk(match):
1215 1215 yield f
1216 1216
1217 1217 def _matches(self, match):
1218 1218 """recursively generate a new manifest filtered by the match argument."""
1219 1219 if match.always():
1220 1220 return self.copy()
1221 1221 return self._matches_inner(match)
1222 1222
1223 1223 def _matches_inner(self, match):
1224 1224 if match.always():
1225 1225 return self.copy()
1226 1226
1227 1227 visit = match.visitchildrenset(self._dir[:-1])
1228 1228 if visit == b'all':
1229 1229 return self.copy()
1230 1230 ret = treemanifest(self.nodeconstants, self._dir)
1231 1231 if not visit:
1232 1232 return ret
1233 1233
1234 1234 self._load()
1235 1235 for fn in self._files:
1236 1236 # While visitchildrenset *usually* lists only subdirs, this is
1237 1237 # actually up to the matcher and may have some files in the set().
1238 1238 # If visit == 'this', we should obviously look at the files in this
1239 1239 # directory; if visit is a set, and fn is in it, we should inspect
1240 1240 # fn (but no need to inspect things not in the set).
1241 1241 if visit != b'this' and fn not in visit:
1242 1242 continue
1243 1243 fullp = self._subpath(fn)
1244 1244 # visitchildrenset isn't perfect, we still need to call the regular
1245 1245 # matcher code to further filter results.
1246 1246 if not match(fullp):
1247 1247 continue
1248 1248 ret._files[fn] = self._files[fn]
1249 1249 if fn in self._flags:
1250 1250 ret._flags[fn] = self._flags[fn]
1251 1251
1252 1252 visit = self._loadchildrensetlazy(visit)
1253 1253 for dir, subm in pycompat.iteritems(self._dirs):
1254 1254 if visit and dir[:-1] not in visit:
1255 1255 continue
1256 1256 m = subm._matches_inner(match)
1257 1257 if not m._isempty():
1258 1258 ret._dirs[dir] = m
1259 1259
1260 1260 if not ret._isempty():
1261 1261 ret._dirty = True
1262 1262 return ret
1263 1263
1264 1264 def fastdelta(self, base, changes):
1265 1265 raise FastdeltaUnavailable()
1266 1266
1267 1267 def diff(self, m2, match=None, clean=False):
1268 1268 """Finds changes between the current manifest and m2.
1269 1269
1270 1270 Args:
1271 1271 m2: the manifest to which this manifest should be compared.
1272 1272 clean: if true, include files unchanged between these manifests
1273 1273 with a None value in the returned dictionary.
1274 1274
1275 1275 The result is returned as a dict with filename as key and
1276 1276 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1277 1277 nodeid in the current/other manifest and fl1/fl2 is the flag
1278 1278 in the current/other manifest. Where the file does not exist,
1279 1279 the nodeid will be None and the flags will be the empty
1280 1280 string.
1281 1281 """
1282 1282 if match and not match.always():
1283 1283 m1 = self._matches(match)
1284 1284 m2 = m2._matches(match)
1285 1285 return m1.diff(m2, clean=clean)
1286 1286 result = {}
1287 1287 emptytree = treemanifest(self.nodeconstants)
1288 1288
1289 1289 def _iterativediff(t1, t2, stack):
1290 1290 """compares two tree manifests and append new tree-manifests which
1291 1291 needs to be compared to stack"""
1292 1292 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1293 1293 return
1294 1294 t1._load()
1295 1295 t2._load()
1296 1296 self._loaddifflazy(t1, t2)
1297 1297
1298 1298 for d, m1 in pycompat.iteritems(t1._dirs):
1299 1299 m2 = t2._dirs.get(d, emptytree)
1300 1300 stack.append((m1, m2))
1301 1301
1302 1302 for d, m2 in pycompat.iteritems(t2._dirs):
1303 1303 if d not in t1._dirs:
1304 1304 stack.append((emptytree, m2))
1305 1305
1306 1306 for fn, n1 in pycompat.iteritems(t1._files):
1307 1307 fl1 = t1._flags.get(fn, b'')
1308 1308 n2 = t2._files.get(fn, None)
1309 1309 fl2 = t2._flags.get(fn, b'')
1310 1310 if n1 != n2 or fl1 != fl2:
1311 1311 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1312 1312 elif clean:
1313 1313 result[t1._subpath(fn)] = None
1314 1314
1315 1315 for fn, n2 in pycompat.iteritems(t2._files):
1316 1316 if fn not in t1._files:
1317 1317 fl2 = t2._flags.get(fn, b'')
1318 1318 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1319 1319
1320 1320 stackls = []
1321 1321 _iterativediff(self, m2, stackls)
1322 1322 while stackls:
1323 1323 t1, t2 = stackls.pop()
1324 1324 # stackls is populated in the function call
1325 1325 _iterativediff(t1, t2, stackls)
1326 1326 return result
1327 1327
1328 1328 def unmodifiedsince(self, m2):
1329 1329 return not self._dirty and not m2._dirty and self._node == m2._node
1330 1330
1331 1331 def parse(self, text, readsubtree):
1332 1332 selflazy = self._lazydirs
1333 1333 for f, n, fl in _parse(self._nodelen, text):
1334 1334 if fl == b't':
1335 1335 f = f + b'/'
1336 1336 # False below means "doesn't need to be copied" and can use the
1337 1337 # cached value from readsubtree directly.
1338 1338 selflazy[f] = (n, readsubtree, False)
1339 1339 elif b'/' in f:
1340 1340 # This is a flat manifest, so use __setitem__ and setflag rather
1341 1341 # than assigning directly to _files and _flags, so we can
1342 1342 # assign a path in a subdirectory, and to mark dirty (compared
1343 1343 # to nullid).
1344 1344 self[f] = n
1345 1345 if fl:
1346 1346 self.setflag(f, fl)
1347 1347 else:
1348 1348 # Assigning to _files and _flags avoids marking as dirty,
1349 1349 # and should be a little faster.
1350 1350 self._files[f] = n
1351 1351 if fl:
1352 1352 self._flags[f] = fl
1353 1353
1354 1354 def text(self):
1355 1355 """Get the full data of this manifest as a bytestring."""
1356 1356 self._load()
1357 1357 return _text(self.iterentries())
1358 1358
1359 1359 def dirtext(self):
1360 1360 """Get the full data of this directory as a bytestring. Make sure that
1361 1361 any submanifests have been written first, so their nodeids are correct.
1362 1362 """
1363 1363 self._load()
1364 1364 flags = self.flags
1365 1365 lazydirs = [
1366 1366 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1367 1367 ]
1368 1368 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1369 1369 files = [(f, self._files[f], flags(f)) for f in self._files]
1370 1370 return _text(sorted(dirs + files + lazydirs))
1371 1371
1372 1372 def read(self, gettext, readsubtree):
1373 1373 def _load_for_read(s):
1374 1374 s.parse(gettext(), readsubtree)
1375 1375 s._dirty = False
1376 1376
1377 1377 self._loadfunc = _load_for_read
1378 1378
1379 1379 def writesubtrees(self, m1, m2, writesubtree, match):
1380 1380 self._load() # for consistency; should never have any effect here
1381 1381 m1._load()
1382 1382 m2._load()
1383 1383 emptytree = treemanifest(self.nodeconstants)
1384 1384
1385 1385 def getnode(m, d):
1386 1386 ld = m._lazydirs.get(d)
1387 1387 if ld:
1388 1388 return ld[0]
1389 1389 return m._dirs.get(d, emptytree)._node
1390 1390
1391 1391 # let's skip investigating things that `match` says we do not need.
1392 1392 visit = match.visitchildrenset(self._dir[:-1])
1393 1393 visit = self._loadchildrensetlazy(visit)
1394 1394 if visit == b'this' or visit == b'all':
1395 1395 visit = None
1396 1396 for d, subm in pycompat.iteritems(self._dirs):
1397 1397 if visit and d[:-1] not in visit:
1398 1398 continue
1399 1399 subp1 = getnode(m1, d)
1400 1400 subp2 = getnode(m2, d)
1401 1401 if subp1 == self.nodeconstants.nullid:
1402 1402 subp1, subp2 = subp2, subp1
1403 1403 writesubtree(subm, subp1, subp2, match)
1404 1404
1405 1405 def walksubtrees(self, matcher=None):
1406 1406 """Returns an iterator of the subtrees of this manifest, including this
1407 1407 manifest itself.
1408 1408
1409 1409 If `matcher` is provided, it only returns subtrees that match.
1410 1410 """
1411 1411 if matcher and not matcher.visitdir(self._dir[:-1]):
1412 1412 return
1413 1413 if not matcher or matcher(self._dir[:-1]):
1414 1414 yield self
1415 1415
1416 1416 self._load()
1417 1417 # OPT: use visitchildrenset to avoid loading everything.
1418 1418 self._loadalllazy()
1419 1419 for d, subm in pycompat.iteritems(self._dirs):
1420 1420 for subtree in subm.walksubtrees(matcher=matcher):
1421 1421 yield subtree
1422 1422
1423 1423
1424 1424 class manifestfulltextcache(util.lrucachedict):
1425 1425 """File-backed LRU cache for the manifest cache
1426 1426
1427 1427 File consists of entries, up to EOF:
1428 1428
1429 1429 - 20 bytes node, 4 bytes length, <length> manifest data
1430 1430
1431 1431 These are written in reverse cache order (oldest to newest).
1432 1432
1433 1433 """
1434 1434
1435 1435 _file = b'manifestfulltextcache'
1436 1436
1437 1437 def __init__(self, max):
1438 1438 super(manifestfulltextcache, self).__init__(max)
1439 1439 self._dirty = False
1440 1440 self._read = False
1441 1441 self._opener = None
1442 1442
1443 1443 def read(self):
1444 1444 if self._read or self._opener is None:
1445 1445 return
1446 1446
1447 1447 try:
1448 1448 with self._opener(self._file) as fp:
1449 1449 set = super(manifestfulltextcache, self).__setitem__
1450 1450 # ignore trailing data, this is a cache, corruption is skipped
1451 1451 while True:
1452 1452 # TODO do we need to do work here for sha1 portability?
1453 1453 node = fp.read(20)
1454 1454 if len(node) < 20:
1455 1455 break
1456 1456 try:
1457 1457 size = struct.unpack(b'>L', fp.read(4))[0]
1458 1458 except struct.error:
1459 1459 break
1460 1460 value = bytearray(fp.read(size))
1461 1461 if len(value) != size:
1462 1462 break
1463 1463 set(node, value)
1464 1464 except IOError:
1465 1465 # the file is allowed to be missing
1466 1466 pass
1467 1467
1468 1468 self._read = True
1469 1469 self._dirty = False
1470 1470
1471 1471 def write(self):
1472 1472 if not self._dirty or self._opener is None:
1473 1473 return
1474 1474 # rotate backwards to the first used node
1475 1475 try:
1476 1476 with self._opener(
1477 1477 self._file, b'w', atomictemp=True, checkambig=True
1478 1478 ) as fp:
1479 1479 node = self._head.prev
1480 1480 while True:
1481 1481 if node.key in self._cache:
1482 1482 fp.write(node.key)
1483 1483 fp.write(struct.pack(b'>L', len(node.value)))
1484 1484 fp.write(node.value)
1485 1485 if node is self._head:
1486 1486 break
1487 1487 node = node.prev
1488 1488 except IOError:
1489 1489 # We could not write the cache (eg: permission error)
1490 1490 # the content can be missing.
1491 1491 #
1492 1492 # We could try harder and see if we could recreate a wcache
1493 1493 # directory were we coudl write too.
1494 1494 #
1495 1495 # XXX the error pass silently, having some way to issue an error
1496 1496 # log `ui.log` would be nice.
1497 1497 pass
1498 1498
1499 1499 def __len__(self):
1500 1500 if not self._read:
1501 1501 self.read()
1502 1502 return super(manifestfulltextcache, self).__len__()
1503 1503
1504 1504 def __contains__(self, k):
1505 1505 if not self._read:
1506 1506 self.read()
1507 1507 return super(manifestfulltextcache, self).__contains__(k)
1508 1508
1509 1509 def __iter__(self):
1510 1510 if not self._read:
1511 1511 self.read()
1512 1512 return super(manifestfulltextcache, self).__iter__()
1513 1513
1514 1514 def __getitem__(self, k):
1515 1515 if not self._read:
1516 1516 self.read()
1517 1517 # the cache lru order can change on read
1518 1518 setdirty = self._cache.get(k) is not self._head
1519 1519 value = super(manifestfulltextcache, self).__getitem__(k)
1520 1520 if setdirty:
1521 1521 self._dirty = True
1522 1522 return value
1523 1523
1524 1524 def __setitem__(self, k, v):
1525 1525 if not self._read:
1526 1526 self.read()
1527 1527 super(manifestfulltextcache, self).__setitem__(k, v)
1528 1528 self._dirty = True
1529 1529
1530 1530 def __delitem__(self, k):
1531 1531 if not self._read:
1532 1532 self.read()
1533 1533 super(manifestfulltextcache, self).__delitem__(k)
1534 1534 self._dirty = True
1535 1535
1536 1536 def get(self, k, default=None):
1537 1537 if not self._read:
1538 1538 self.read()
1539 1539 return super(manifestfulltextcache, self).get(k, default=default)
1540 1540
1541 1541 def clear(self, clear_persisted_data=False):
1542 1542 super(manifestfulltextcache, self).clear()
1543 1543 if clear_persisted_data:
1544 1544 self._dirty = True
1545 1545 self.write()
1546 1546 self._read = False
1547 1547
1548 1548
1549 1549 # and upper bound of what we expect from compression
1550 1550 # (real live value seems to be "3")
1551 1551 MAXCOMPRESSION = 3
1552 1552
1553 1553
1554 1554 class FastdeltaUnavailable(Exception):
1555 1555 """Exception raised when fastdelta isn't usable on a manifest."""
1556 1556
1557 1557
1558 1558 @interfaceutil.implementer(repository.imanifeststorage)
1559 1559 class manifestrevlog(object):
1560 1560 """A revlog that stores manifest texts. This is responsible for caching the
1561 1561 full-text manifest contents.
1562 1562 """
1563 1563
1564 1564 def __init__(
1565 1565 self,
1566 1566 nodeconstants,
1567 1567 opener,
1568 1568 tree=b'',
1569 1569 dirlogcache=None,
1570 1570 indexfile=None,
1571 1571 treemanifest=False,
1572 1572 ):
1573 1573 """Constructs a new manifest revlog
1574 1574
1575 1575 `indexfile` - used by extensions to have two manifests at once, like
1576 1576 when transitioning between flatmanifeset and treemanifests.
1577 1577
1578 1578 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1579 1579 options can also be used to make this a tree manifest revlog. The opener
1580 1580 option takes precedence, so if it is set to True, we ignore whatever
1581 1581 value is passed in to the constructor.
1582 1582 """
1583 1583 self.nodeconstants = nodeconstants
1584 1584 # During normal operations, we expect to deal with not more than four
1585 1585 # revs at a time (such as during commit --amend). When rebasing large
1586 1586 # stacks of commits, the number can go up, hence the config knob below.
1587 1587 cachesize = 4
1588 1588 optiontreemanifest = False
1589 1589 opts = getattr(opener, 'options', None)
1590 1590 if opts is not None:
1591 1591 cachesize = opts.get(b'manifestcachesize', cachesize)
1592 1592 optiontreemanifest = opts.get(b'treemanifest', False)
1593 1593
1594 1594 self._treeondisk = optiontreemanifest or treemanifest
1595 1595
1596 1596 self._fulltextcache = manifestfulltextcache(cachesize)
1597 1597
1598 1598 if tree:
1599 1599 assert self._treeondisk, b'opts is %r' % opts
1600 1600
1601 1601 if indexfile is None:
1602 1602 indexfile = b'00manifest.i'
1603 1603 if tree:
1604 1604 indexfile = b"meta/" + tree + indexfile
1605 1605
1606 1606 self.tree = tree
1607 1607
1608 1608 # The dirlogcache is kept on the root manifest log
1609 1609 if tree:
1610 1610 self._dirlogcache = dirlogcache
1611 1611 else:
1612 1612 self._dirlogcache = {b'': self}
1613 1613
1614 1614 self._revlog = revlog.revlog(
1615 1615 opener,
1616 1616 target=(revlog_constants.KIND_MANIFESTLOG, self.tree),
1617 1617 indexfile=indexfile,
1618 1618 # only root indexfile is cached
1619 1619 checkambig=not bool(tree),
1620 1620 mmaplargeindex=True,
1621 1621 upperboundcomp=MAXCOMPRESSION,
1622 1622 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1623 1623 )
1624 1624
1625 1625 self.index = self._revlog.index
1626 self.version = self._revlog.version
1627 1626 self._generaldelta = self._revlog._generaldelta
1628 1627
1629 1628 def _setupmanifestcachehooks(self, repo):
1630 1629 """Persist the manifestfulltextcache on lock release"""
1631 1630 if not util.safehasattr(repo, b'_wlockref'):
1632 1631 return
1633 1632
1634 1633 self._fulltextcache._opener = repo.wcachevfs
1635 1634 if repo._currentlock(repo._wlockref) is None:
1636 1635 return
1637 1636
1638 1637 reporef = weakref.ref(repo)
1639 1638 manifestrevlogref = weakref.ref(self)
1640 1639
1641 1640 def persistmanifestcache(success):
1642 1641 # Repo is in an unknown state, do not persist.
1643 1642 if not success:
1644 1643 return
1645 1644
1646 1645 repo = reporef()
1647 1646 self = manifestrevlogref()
1648 1647 if repo is None or self is None:
1649 1648 return
1650 1649 if repo.manifestlog.getstorage(b'') is not self:
1651 1650 # there's a different manifest in play now, abort
1652 1651 return
1653 1652 self._fulltextcache.write()
1654 1653
1655 1654 repo._afterlock(persistmanifestcache)
1656 1655
1657 1656 @property
1658 1657 def fulltextcache(self):
1659 1658 return self._fulltextcache
1660 1659
1661 1660 def clearcaches(self, clear_persisted_data=False):
1662 1661 self._revlog.clearcaches()
1663 1662 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1664 1663 self._dirlogcache = {self.tree: self}
1665 1664
1666 1665 def dirlog(self, d):
1667 1666 if d:
1668 1667 assert self._treeondisk
1669 1668 if d not in self._dirlogcache:
1670 1669 mfrevlog = manifestrevlog(
1671 1670 self.nodeconstants,
1672 1671 self.opener,
1673 1672 d,
1674 1673 self._dirlogcache,
1675 1674 treemanifest=self._treeondisk,
1676 1675 )
1677 1676 self._dirlogcache[d] = mfrevlog
1678 1677 return self._dirlogcache[d]
1679 1678
1680 1679 def add(
1681 1680 self,
1682 1681 m,
1683 1682 transaction,
1684 1683 link,
1685 1684 p1,
1686 1685 p2,
1687 1686 added,
1688 1687 removed,
1689 1688 readtree=None,
1690 1689 match=None,
1691 1690 ):
1692 1691 """add some manifest entry in to the manifest log
1693 1692
1694 1693 input:
1695 1694
1696 1695 m: the manifest dict we want to store
1697 1696 transaction: the open transaction
1698 1697 p1: manifest-node of p1
1699 1698 p2: manifest-node of p2
1700 1699 added: file added/changed compared to parent
1701 1700 removed: file removed compared to parent
1702 1701
1703 1702 tree manifest input:
1704 1703
1705 1704 readtree: a function to read a subtree
1706 1705 match: a filematcher for the subpart of the tree manifest
1707 1706 """
1708 1707 try:
1709 1708 if p1 not in self.fulltextcache:
1710 1709 raise FastdeltaUnavailable()
1711 1710 # If our first parent is in the manifest cache, we can
1712 1711 # compute a delta here using properties we know about the
1713 1712 # manifest up-front, which may save time later for the
1714 1713 # revlog layer.
1715 1714
1716 1715 _checkforbidden(added)
1717 1716 # combine the changed lists into one sorted iterator
1718 1717 work = heapq.merge(
1719 1718 [(x, False) for x in sorted(added)],
1720 1719 [(x, True) for x in sorted(removed)],
1721 1720 )
1722 1721
1723 1722 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1724 1723 cachedelta = self._revlog.rev(p1), deltatext
1725 1724 text = util.buffer(arraytext)
1726 1725 rev = self._revlog.addrevision(
1727 1726 text, transaction, link, p1, p2, cachedelta
1728 1727 )
1729 1728 n = self._revlog.node(rev)
1730 1729 except FastdeltaUnavailable:
1731 1730 # The first parent manifest isn't already loaded or the
1732 1731 # manifest implementation doesn't support fastdelta, so
1733 1732 # we'll just encode a fulltext of the manifest and pass
1734 1733 # that through to the revlog layer, and let it handle the
1735 1734 # delta process.
1736 1735 if self._treeondisk:
1737 1736 assert readtree, b"readtree must be set for treemanifest writes"
1738 1737 assert match, b"match must be specified for treemanifest writes"
1739 1738 m1 = readtree(self.tree, p1)
1740 1739 m2 = readtree(self.tree, p2)
1741 1740 n = self._addtree(
1742 1741 m, transaction, link, m1, m2, readtree, match=match
1743 1742 )
1744 1743 arraytext = None
1745 1744 else:
1746 1745 text = m.text()
1747 1746 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1748 1747 n = self._revlog.node(rev)
1749 1748 arraytext = bytearray(text)
1750 1749
1751 1750 if arraytext is not None:
1752 1751 self.fulltextcache[n] = arraytext
1753 1752
1754 1753 return n
1755 1754
1756 1755 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1757 1756 # If the manifest is unchanged compared to one parent,
1758 1757 # don't write a new revision
1759 1758 if self.tree != b'' and (
1760 1759 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1761 1760 ):
1762 1761 return m.node()
1763 1762
1764 1763 def writesubtree(subm, subp1, subp2, match):
1765 1764 sublog = self.dirlog(subm.dir())
1766 1765 sublog.add(
1767 1766 subm,
1768 1767 transaction,
1769 1768 link,
1770 1769 subp1,
1771 1770 subp2,
1772 1771 None,
1773 1772 None,
1774 1773 readtree=readtree,
1775 1774 match=match,
1776 1775 )
1777 1776
1778 1777 m.writesubtrees(m1, m2, writesubtree, match)
1779 1778 text = m.dirtext()
1780 1779 n = None
1781 1780 if self.tree != b'':
1782 1781 # Double-check whether contents are unchanged to one parent
1783 1782 if text == m1.dirtext():
1784 1783 n = m1.node()
1785 1784 elif text == m2.dirtext():
1786 1785 n = m2.node()
1787 1786
1788 1787 if not n:
1789 1788 rev = self._revlog.addrevision(
1790 1789 text, transaction, link, m1.node(), m2.node()
1791 1790 )
1792 1791 n = self._revlog.node(rev)
1793 1792
1794 1793 # Save nodeid so parent manifest can calculate its nodeid
1795 1794 m.setnode(n)
1796 1795 return n
1797 1796
1798 1797 def __len__(self):
1799 1798 return len(self._revlog)
1800 1799
1801 1800 def __iter__(self):
1802 1801 return self._revlog.__iter__()
1803 1802
1804 1803 def rev(self, node):
1805 1804 return self._revlog.rev(node)
1806 1805
1807 1806 def node(self, rev):
1808 1807 return self._revlog.node(rev)
1809 1808
1810 1809 def lookup(self, value):
1811 1810 return self._revlog.lookup(value)
1812 1811
1813 1812 def parentrevs(self, rev):
1814 1813 return self._revlog.parentrevs(rev)
1815 1814
1816 1815 def parents(self, node):
1817 1816 return self._revlog.parents(node)
1818 1817
1819 1818 def linkrev(self, rev):
1820 1819 return self._revlog.linkrev(rev)
1821 1820
1822 1821 def checksize(self):
1823 1822 return self._revlog.checksize()
1824 1823
1825 1824 def revision(self, node, _df=None, raw=False):
1826 1825 return self._revlog.revision(node, _df=_df, raw=raw)
1827 1826
1828 1827 def rawdata(self, node, _df=None):
1829 1828 return self._revlog.rawdata(node, _df=_df)
1830 1829
1831 1830 def revdiff(self, rev1, rev2):
1832 1831 return self._revlog.revdiff(rev1, rev2)
1833 1832
1834 1833 def cmp(self, node, text):
1835 1834 return self._revlog.cmp(node, text)
1836 1835
1837 1836 def deltaparent(self, rev):
1838 1837 return self._revlog.deltaparent(rev)
1839 1838
1840 1839 def emitrevisions(
1841 1840 self,
1842 1841 nodes,
1843 1842 nodesorder=None,
1844 1843 revisiondata=False,
1845 1844 assumehaveparentrevisions=False,
1846 1845 deltamode=repository.CG_DELTAMODE_STD,
1847 1846 sidedata_helpers=None,
1848 1847 ):
1849 1848 return self._revlog.emitrevisions(
1850 1849 nodes,
1851 1850 nodesorder=nodesorder,
1852 1851 revisiondata=revisiondata,
1853 1852 assumehaveparentrevisions=assumehaveparentrevisions,
1854 1853 deltamode=deltamode,
1855 1854 sidedata_helpers=sidedata_helpers,
1856 1855 )
1857 1856
1858 1857 def addgroup(
1859 1858 self,
1860 1859 deltas,
1861 1860 linkmapper,
1862 1861 transaction,
1863 1862 alwayscache=False,
1864 1863 addrevisioncb=None,
1865 1864 duplicaterevisioncb=None,
1866 1865 ):
1867 1866 return self._revlog.addgroup(
1868 1867 deltas,
1869 1868 linkmapper,
1870 1869 transaction,
1871 1870 alwayscache=alwayscache,
1872 1871 addrevisioncb=addrevisioncb,
1873 1872 duplicaterevisioncb=duplicaterevisioncb,
1874 1873 )
1875 1874
1876 1875 def rawsize(self, rev):
1877 1876 return self._revlog.rawsize(rev)
1878 1877
1879 1878 def getstrippoint(self, minlink):
1880 1879 return self._revlog.getstrippoint(minlink)
1881 1880
1882 1881 def strip(self, minlink, transaction):
1883 1882 return self._revlog.strip(minlink, transaction)
1884 1883
1885 1884 def files(self):
1886 1885 return self._revlog.files()
1887 1886
1888 1887 def clone(self, tr, destrevlog, **kwargs):
1889 1888 if not isinstance(destrevlog, manifestrevlog):
1890 1889 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1891 1890
1892 1891 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1893 1892
1894 1893 def storageinfo(
1895 1894 self,
1896 1895 exclusivefiles=False,
1897 1896 sharedfiles=False,
1898 1897 revisionscount=False,
1899 1898 trackedsize=False,
1900 1899 storedsize=False,
1901 1900 ):
1902 1901 return self._revlog.storageinfo(
1903 1902 exclusivefiles=exclusivefiles,
1904 1903 sharedfiles=sharedfiles,
1905 1904 revisionscount=revisionscount,
1906 1905 trackedsize=trackedsize,
1907 1906 storedsize=storedsize,
1908 1907 )
1909 1908
1910 1909 @property
1911 1910 def indexfile(self):
1912 1911 return self._revlog.indexfile
1913 1912
1914 1913 @indexfile.setter
1915 1914 def indexfile(self, value):
1916 1915 self._revlog.indexfile = value
1917 1916
1918 1917 @property
1919 1918 def opener(self):
1920 1919 return self._revlog.opener
1921 1920
1922 1921 @opener.setter
1923 1922 def opener(self, value):
1924 1923 self._revlog.opener = value
1925 1924
1926 1925
1927 1926 @interfaceutil.implementer(repository.imanifestlog)
1928 1927 class manifestlog(object):
1929 1928 """A collection class representing the collection of manifest snapshots
1930 1929 referenced by commits in the repository.
1931 1930
1932 1931 In this situation, 'manifest' refers to the abstract concept of a snapshot
1933 1932 of the list of files in the given commit. Consumers of the output of this
1934 1933 class do not care about the implementation details of the actual manifests
1935 1934 they receive (i.e. tree or flat or lazily loaded, etc)."""
1936 1935
1937 1936 def __init__(self, opener, repo, rootstore, narrowmatch):
1938 1937 self.nodeconstants = repo.nodeconstants
1939 1938 usetreemanifest = False
1940 1939 cachesize = 4
1941 1940
1942 1941 opts = getattr(opener, 'options', None)
1943 1942 if opts is not None:
1944 1943 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1945 1944 cachesize = opts.get(b'manifestcachesize', cachesize)
1946 1945
1947 1946 self._treemanifests = usetreemanifest
1948 1947
1949 1948 self._rootstore = rootstore
1950 1949 self._rootstore._setupmanifestcachehooks(repo)
1951 1950 self._narrowmatch = narrowmatch
1952 1951
1953 1952 # A cache of the manifestctx or treemanifestctx for each directory
1954 1953 self._dirmancache = {}
1955 1954 self._dirmancache[b''] = util.lrucachedict(cachesize)
1956 1955
1957 1956 self._cachesize = cachesize
1958 1957
1959 1958 def __getitem__(self, node):
1960 1959 """Retrieves the manifest instance for the given node. Throws a
1961 1960 LookupError if not found.
1962 1961 """
1963 1962 return self.get(b'', node)
1964 1963
1965 1964 def get(self, tree, node, verify=True):
1966 1965 """Retrieves the manifest instance for the given node. Throws a
1967 1966 LookupError if not found.
1968 1967
1969 1968 `verify` - if True an exception will be thrown if the node is not in
1970 1969 the revlog
1971 1970 """
1972 1971 if node in self._dirmancache.get(tree, ()):
1973 1972 return self._dirmancache[tree][node]
1974 1973
1975 1974 if not self._narrowmatch.always():
1976 1975 if not self._narrowmatch.visitdir(tree[:-1]):
1977 1976 return excludeddirmanifestctx(self.nodeconstants, tree, node)
1978 1977 if tree:
1979 1978 if self._rootstore._treeondisk:
1980 1979 if verify:
1981 1980 # Side-effect is LookupError is raised if node doesn't
1982 1981 # exist.
1983 1982 self.getstorage(tree).rev(node)
1984 1983
1985 1984 m = treemanifestctx(self, tree, node)
1986 1985 else:
1987 1986 raise error.Abort(
1988 1987 _(
1989 1988 b"cannot ask for manifest directory '%s' in a flat "
1990 1989 b"manifest"
1991 1990 )
1992 1991 % tree
1993 1992 )
1994 1993 else:
1995 1994 if verify:
1996 1995 # Side-effect is LookupError is raised if node doesn't exist.
1997 1996 self._rootstore.rev(node)
1998 1997
1999 1998 if self._treemanifests:
2000 1999 m = treemanifestctx(self, b'', node)
2001 2000 else:
2002 2001 m = manifestctx(self, node)
2003 2002
2004 2003 if node != self.nodeconstants.nullid:
2005 2004 mancache = self._dirmancache.get(tree)
2006 2005 if not mancache:
2007 2006 mancache = util.lrucachedict(self._cachesize)
2008 2007 self._dirmancache[tree] = mancache
2009 2008 mancache[node] = m
2010 2009 return m
2011 2010
2012 2011 def getstorage(self, tree):
2013 2012 return self._rootstore.dirlog(tree)
2014 2013
2015 2014 def clearcaches(self, clear_persisted_data=False):
2016 2015 self._dirmancache.clear()
2017 2016 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
2018 2017
2019 2018 def rev(self, node):
2020 2019 return self._rootstore.rev(node)
2021 2020
2022 2021 def update_caches(self, transaction):
2023 2022 return self._rootstore._revlog.update_caches(transaction=transaction)
2024 2023
2025 2024
2026 2025 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2027 2026 class memmanifestctx(object):
2028 2027 def __init__(self, manifestlog):
2029 2028 self._manifestlog = manifestlog
2030 2029 self._manifestdict = manifestdict(manifestlog.nodeconstants.nodelen)
2031 2030
2032 2031 def _storage(self):
2033 2032 return self._manifestlog.getstorage(b'')
2034 2033
2035 2034 def copy(self):
2036 2035 memmf = memmanifestctx(self._manifestlog)
2037 2036 memmf._manifestdict = self.read().copy()
2038 2037 return memmf
2039 2038
2040 2039 def read(self):
2041 2040 return self._manifestdict
2042 2041
2043 2042 def write(self, transaction, link, p1, p2, added, removed, match=None):
2044 2043 return self._storage().add(
2045 2044 self._manifestdict,
2046 2045 transaction,
2047 2046 link,
2048 2047 p1,
2049 2048 p2,
2050 2049 added,
2051 2050 removed,
2052 2051 match=match,
2053 2052 )
2054 2053
2055 2054
2056 2055 @interfaceutil.implementer(repository.imanifestrevisionstored)
2057 2056 class manifestctx(object):
2058 2057 """A class representing a single revision of a manifest, including its
2059 2058 contents, its parent revs, and its linkrev.
2060 2059 """
2061 2060
2062 2061 def __init__(self, manifestlog, node):
2063 2062 self._manifestlog = manifestlog
2064 2063 self._data = None
2065 2064
2066 2065 self._node = node
2067 2066
2068 2067 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2069 2068 # but let's add it later when something needs it and we can load it
2070 2069 # lazily.
2071 2070 # self.p1, self.p2 = store.parents(node)
2072 2071 # rev = store.rev(node)
2073 2072 # self.linkrev = store.linkrev(rev)
2074 2073
2075 2074 def _storage(self):
2076 2075 return self._manifestlog.getstorage(b'')
2077 2076
2078 2077 def node(self):
2079 2078 return self._node
2080 2079
2081 2080 def copy(self):
2082 2081 memmf = memmanifestctx(self._manifestlog)
2083 2082 memmf._manifestdict = self.read().copy()
2084 2083 return memmf
2085 2084
2086 2085 @propertycache
2087 2086 def parents(self):
2088 2087 return self._storage().parents(self._node)
2089 2088
2090 2089 def read(self):
2091 2090 if self._data is None:
2092 2091 nc = self._manifestlog.nodeconstants
2093 2092 if self._node == nc.nullid:
2094 2093 self._data = manifestdict(nc.nodelen)
2095 2094 else:
2096 2095 store = self._storage()
2097 2096 if self._node in store.fulltextcache:
2098 2097 text = pycompat.bytestr(store.fulltextcache[self._node])
2099 2098 else:
2100 2099 text = store.revision(self._node)
2101 2100 arraytext = bytearray(text)
2102 2101 store.fulltextcache[self._node] = arraytext
2103 2102 self._data = manifestdict(nc.nodelen, text)
2104 2103 return self._data
2105 2104
2106 2105 def readfast(self, shallow=False):
2107 2106 """Calls either readdelta or read, based on which would be less work.
2108 2107 readdelta is called if the delta is against the p1, and therefore can be
2109 2108 read quickly.
2110 2109
2111 2110 If `shallow` is True, nothing changes since this is a flat manifest.
2112 2111 """
2113 2112 store = self._storage()
2114 2113 r = store.rev(self._node)
2115 2114 deltaparent = store.deltaparent(r)
2116 2115 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2117 2116 return self.readdelta()
2118 2117 return self.read()
2119 2118
2120 2119 def readdelta(self, shallow=False):
2121 2120 """Returns a manifest containing just the entries that are present
2122 2121 in this manifest, but not in its p1 manifest. This is efficient to read
2123 2122 if the revlog delta is already p1.
2124 2123
2125 2124 Changing the value of `shallow` has no effect on flat manifests.
2126 2125 """
2127 2126 store = self._storage()
2128 2127 r = store.rev(self._node)
2129 2128 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2130 2129 return manifestdict(store.nodeconstants.nodelen, d)
2131 2130
2132 2131 def find(self, key):
2133 2132 return self.read().find(key)
2134 2133
2135 2134
2136 2135 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2137 2136 class memtreemanifestctx(object):
2138 2137 def __init__(self, manifestlog, dir=b''):
2139 2138 self._manifestlog = manifestlog
2140 2139 self._dir = dir
2141 2140 self._treemanifest = treemanifest(manifestlog.nodeconstants)
2142 2141
2143 2142 def _storage(self):
2144 2143 return self._manifestlog.getstorage(b'')
2145 2144
2146 2145 def copy(self):
2147 2146 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2148 2147 memmf._treemanifest = self._treemanifest.copy()
2149 2148 return memmf
2150 2149
2151 2150 def read(self):
2152 2151 return self._treemanifest
2153 2152
2154 2153 def write(self, transaction, link, p1, p2, added, removed, match=None):
2155 2154 def readtree(dir, node):
2156 2155 return self._manifestlog.get(dir, node).read()
2157 2156
2158 2157 return self._storage().add(
2159 2158 self._treemanifest,
2160 2159 transaction,
2161 2160 link,
2162 2161 p1,
2163 2162 p2,
2164 2163 added,
2165 2164 removed,
2166 2165 readtree=readtree,
2167 2166 match=match,
2168 2167 )
2169 2168
2170 2169
2171 2170 @interfaceutil.implementer(repository.imanifestrevisionstored)
2172 2171 class treemanifestctx(object):
2173 2172 def __init__(self, manifestlog, dir, node):
2174 2173 self._manifestlog = manifestlog
2175 2174 self._dir = dir
2176 2175 self._data = None
2177 2176
2178 2177 self._node = node
2179 2178
2180 2179 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2181 2180 # we can instantiate treemanifestctx objects for directories we don't
2182 2181 # have on disk.
2183 2182 # self.p1, self.p2 = store.parents(node)
2184 2183 # rev = store.rev(node)
2185 2184 # self.linkrev = store.linkrev(rev)
2186 2185
2187 2186 def _storage(self):
2188 2187 narrowmatch = self._manifestlog._narrowmatch
2189 2188 if not narrowmatch.always():
2190 2189 if not narrowmatch.visitdir(self._dir[:-1]):
2191 2190 return excludedmanifestrevlog(
2192 2191 self._manifestlog.nodeconstants, self._dir
2193 2192 )
2194 2193 return self._manifestlog.getstorage(self._dir)
2195 2194
2196 2195 def read(self):
2197 2196 if self._data is None:
2198 2197 store = self._storage()
2199 2198 if self._node == self._manifestlog.nodeconstants.nullid:
2200 2199 self._data = treemanifest(self._manifestlog.nodeconstants)
2201 2200 # TODO accessing non-public API
2202 2201 elif store._treeondisk:
2203 2202 m = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2204 2203
2205 2204 def gettext():
2206 2205 return store.revision(self._node)
2207 2206
2208 2207 def readsubtree(dir, subm):
2209 2208 # Set verify to False since we need to be able to create
2210 2209 # subtrees for trees that don't exist on disk.
2211 2210 return self._manifestlog.get(dir, subm, verify=False).read()
2212 2211
2213 2212 m.read(gettext, readsubtree)
2214 2213 m.setnode(self._node)
2215 2214 self._data = m
2216 2215 else:
2217 2216 if self._node in store.fulltextcache:
2218 2217 text = pycompat.bytestr(store.fulltextcache[self._node])
2219 2218 else:
2220 2219 text = store.revision(self._node)
2221 2220 arraytext = bytearray(text)
2222 2221 store.fulltextcache[self._node] = arraytext
2223 2222 self._data = treemanifest(
2224 2223 self._manifestlog.nodeconstants, dir=self._dir, text=text
2225 2224 )
2226 2225
2227 2226 return self._data
2228 2227
2229 2228 def node(self):
2230 2229 return self._node
2231 2230
2232 2231 def copy(self):
2233 2232 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2234 2233 memmf._treemanifest = self.read().copy()
2235 2234 return memmf
2236 2235
2237 2236 @propertycache
2238 2237 def parents(self):
2239 2238 return self._storage().parents(self._node)
2240 2239
2241 2240 def readdelta(self, shallow=False):
2242 2241 """Returns a manifest containing just the entries that are present
2243 2242 in this manifest, but not in its p1 manifest. This is efficient to read
2244 2243 if the revlog delta is already p1.
2245 2244
2246 2245 If `shallow` is True, this will read the delta for this directory,
2247 2246 without recursively reading subdirectory manifests. Instead, any
2248 2247 subdirectory entry will be reported as it appears in the manifest, i.e.
2249 2248 the subdirectory will be reported among files and distinguished only by
2250 2249 its 't' flag.
2251 2250 """
2252 2251 store = self._storage()
2253 2252 if shallow:
2254 2253 r = store.rev(self._node)
2255 2254 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2256 2255 return manifestdict(store.nodeconstants.nodelen, d)
2257 2256 else:
2258 2257 # Need to perform a slow delta
2259 2258 r0 = store.deltaparent(store.rev(self._node))
2260 2259 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2261 2260 m1 = self.read()
2262 2261 md = treemanifest(self._manifestlog.nodeconstants, dir=self._dir)
2263 2262 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2264 2263 if n1:
2265 2264 md[f] = n1
2266 2265 if fl1:
2267 2266 md.setflag(f, fl1)
2268 2267 return md
2269 2268
2270 2269 def readfast(self, shallow=False):
2271 2270 """Calls either readdelta or read, based on which would be less work.
2272 2271 readdelta is called if the delta is against the p1, and therefore can be
2273 2272 read quickly.
2274 2273
2275 2274 If `shallow` is True, it only returns the entries from this manifest,
2276 2275 and not any submanifests.
2277 2276 """
2278 2277 store = self._storage()
2279 2278 r = store.rev(self._node)
2280 2279 deltaparent = store.deltaparent(r)
2281 2280 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2282 2281 return self.readdelta(shallow=shallow)
2283 2282
2284 2283 if shallow:
2285 2284 return manifestdict(
2286 2285 store.nodeconstants.nodelen, store.revision(self._node)
2287 2286 )
2288 2287 else:
2289 2288 return self.read()
2290 2289
2291 2290 def find(self, key):
2292 2291 return self.read().find(key)
2293 2292
2294 2293
2295 2294 class excludeddir(treemanifest):
2296 2295 """Stand-in for a directory that is excluded from the repository.
2297 2296
2298 2297 With narrowing active on a repository that uses treemanifests,
2299 2298 some of the directory revlogs will be excluded from the resulting
2300 2299 clone. This is a huge storage win for clients, but means we need
2301 2300 some sort of pseudo-manifest to surface to internals so we can
2302 2301 detect a merge conflict outside the narrowspec. That's what this
2303 2302 class is: it stands in for a directory whose node is known, but
2304 2303 whose contents are unknown.
2305 2304 """
2306 2305
2307 2306 def __init__(self, nodeconstants, dir, node):
2308 2307 super(excludeddir, self).__init__(nodeconstants, dir)
2309 2308 self._node = node
2310 2309 # Add an empty file, which will be included by iterators and such,
2311 2310 # appearing as the directory itself (i.e. something like "dir/")
2312 2311 self._files[b''] = node
2313 2312 self._flags[b''] = b't'
2314 2313
2315 2314 # Manifests outside the narrowspec should never be modified, so avoid
2316 2315 # copying. This makes a noticeable difference when there are very many
2317 2316 # directories outside the narrowspec. Also, it makes sense for the copy to
2318 2317 # be of the same type as the original, which would not happen with the
2319 2318 # super type's copy().
2320 2319 def copy(self):
2321 2320 return self
2322 2321
2323 2322
2324 2323 class excludeddirmanifestctx(treemanifestctx):
2325 2324 """context wrapper for excludeddir - see that docstring for rationale"""
2326 2325
2327 2326 def __init__(self, nodeconstants, dir, node):
2328 2327 self.nodeconstants = nodeconstants
2329 2328 self._dir = dir
2330 2329 self._node = node
2331 2330
2332 2331 def read(self):
2333 2332 return excludeddir(self.nodeconstants, self._dir, self._node)
2334 2333
2335 2334 def readfast(self, shallow=False):
2336 2335 # special version of readfast since we don't have underlying storage
2337 2336 return self.read()
2338 2337
2339 2338 def write(self, *args):
2340 2339 raise error.ProgrammingError(
2341 2340 b'attempt to write manifest from excluded dir %s' % self._dir
2342 2341 )
2343 2342
2344 2343
2345 2344 class excludedmanifestrevlog(manifestrevlog):
2346 2345 """Stand-in for excluded treemanifest revlogs.
2347 2346
2348 2347 When narrowing is active on a treemanifest repository, we'll have
2349 2348 references to directories we can't see due to the revlog being
2350 2349 skipped. This class exists to conform to the manifestrevlog
2351 2350 interface for those directories and proactively prevent writes to
2352 2351 outside the narrowspec.
2353 2352 """
2354 2353
2355 2354 def __init__(self, nodeconstants, dir):
2356 2355 self.nodeconstants = nodeconstants
2357 2356 self._dir = dir
2358 2357
2359 2358 def __len__(self):
2360 2359 raise error.ProgrammingError(
2361 2360 b'attempt to get length of excluded dir %s' % self._dir
2362 2361 )
2363 2362
2364 2363 def rev(self, node):
2365 2364 raise error.ProgrammingError(
2366 2365 b'attempt to get rev from excluded dir %s' % self._dir
2367 2366 )
2368 2367
2369 2368 def linkrev(self, node):
2370 2369 raise error.ProgrammingError(
2371 2370 b'attempt to get linkrev from excluded dir %s' % self._dir
2372 2371 )
2373 2372
2374 2373 def node(self, rev):
2375 2374 raise error.ProgrammingError(
2376 2375 b'attempt to get node from excluded dir %s' % self._dir
2377 2376 )
2378 2377
2379 2378 def add(self, *args, **kwargs):
2380 2379 # We should never write entries in dirlogs outside the narrow clone.
2381 2380 # However, the method still gets called from writesubtree() in
2382 2381 # _addtree(), so we need to handle it. We should possibly make that
2383 2382 # avoid calling add() with a clean manifest (_dirty is always False
2384 2383 # in excludeddir instances).
2385 2384 pass
@@ -1,3142 +1,3145
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import binascii
17 17 import collections
18 18 import contextlib
19 19 import errno
20 20 import io
21 21 import os
22 22 import struct
23 23 import zlib
24 24
25 25 # import stuff from node for others to import from revlog
26 26 from .node import (
27 27 bin,
28 28 hex,
29 29 nullrev,
30 30 sha1nodeconstants,
31 31 short,
32 32 wdirrev,
33 33 )
34 34 from .i18n import _
35 35 from .pycompat import getattr
36 36 from .revlogutils.constants import (
37 37 ALL_KINDS,
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 INDEX_HEADER,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_HASCOPIESINFO,
56 56 REVIDX_ISCENSORED,
57 57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 58 )
59 59 from .thirdparty import attr
60 60 from . import (
61 61 ancestor,
62 62 dagop,
63 63 error,
64 64 mdiff,
65 65 policy,
66 66 pycompat,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .interfaces import (
71 71 repository,
72 72 util as interfaceutil,
73 73 )
74 74 from .revlogutils import (
75 75 deltas as deltautil,
76 76 flagutil,
77 77 nodemap as nodemaputil,
78 78 revlogv0,
79 79 sidedata as sidedatautil,
80 80 )
81 81 from .utils import (
82 82 storageutil,
83 83 stringutil,
84 84 )
85 85
86 86 # blanked usage of all the name to prevent pyflakes constraints
87 87 # We need these name available in the module for extensions.
88 88
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_HASCOPIESINFO
102 102 REVIDX_EXTSTORED
103 103 REVIDX_DEFAULT_FLAGS
104 104 REVIDX_FLAGS_ORDER
105 105 REVIDX_RAWTEXT_CHANGING_FLAGS
106 106
107 107 parsers = policy.importmod('parsers')
108 108 rustancestor = policy.importrust('ancestor')
109 109 rustdagop = policy.importrust('dagop')
110 110 rustrevlog = policy.importrust('revlog')
111 111
112 112 # Aliased for performance.
113 113 _zlibdecompress = zlib.decompress
114 114
115 115 # max size of revlog with inline data
116 116 _maxinline = 131072
117 117 _chunksize = 1048576
118 118
119 119 # Flag processors for REVIDX_ELLIPSIS.
120 120 def ellipsisreadprocessor(rl, text):
121 121 return text, False
122 122
123 123
124 124 def ellipsiswriteprocessor(rl, text):
125 125 return text, False
126 126
127 127
128 128 def ellipsisrawprocessor(rl, text):
129 129 return False
130 130
131 131
132 132 ellipsisprocessor = (
133 133 ellipsisreadprocessor,
134 134 ellipsiswriteprocessor,
135 135 ellipsisrawprocessor,
136 136 )
137 137
138 138
139 139 def offset_type(offset, type):
140 140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 141 raise ValueError(b'unknown revlog index flags')
142 142 return int(int(offset) << 16 | type)
143 143
144 144
145 145 def _verify_revision(rl, skipflags, state, node):
146 146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 147 point for extensions to influence the operation."""
148 148 if skipflags:
149 149 state[b'skipread'].add(node)
150 150 else:
151 151 # Side-effect: read content and verify hash.
152 152 rl.revision(node)
153 153
154 154
155 155 # True if a fast implementation for persistent-nodemap is available
156 156 #
157 157 # We also consider we have a "fast" implementation in "pure" python because
158 158 # people using pure don't really have performance consideration (and a
159 159 # wheelbarrow of other slowness source)
160 160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 161 parsers, 'BaseIndexObject'
162 162 )
163 163
164 164
165 165 @attr.s(slots=True, frozen=True)
166 166 class _revisioninfo(object):
167 167 """Information about a revision that allows building its fulltext
168 168 node: expected hash of the revision
169 169 p1, p2: parent revs of the revision
170 170 btext: built text cache consisting of a one-element list
171 171 cachedelta: (baserev, uncompressed_delta) or None
172 172 flags: flags associated to the revision storage
173 173
174 174 One of btext[0] or cachedelta must be set.
175 175 """
176 176
177 177 node = attr.ib()
178 178 p1 = attr.ib()
179 179 p2 = attr.ib()
180 180 btext = attr.ib()
181 181 textlen = attr.ib()
182 182 cachedelta = attr.ib()
183 183 flags = attr.ib()
184 184
185 185
186 186 @interfaceutil.implementer(repository.irevisiondelta)
187 187 @attr.s(slots=True)
188 188 class revlogrevisiondelta(object):
189 189 node = attr.ib()
190 190 p1node = attr.ib()
191 191 p2node = attr.ib()
192 192 basenode = attr.ib()
193 193 flags = attr.ib()
194 194 baserevisionsize = attr.ib()
195 195 revision = attr.ib()
196 196 delta = attr.ib()
197 197 sidedata = attr.ib()
198 198 protocol_flags = attr.ib()
199 199 linknode = attr.ib(default=None)
200 200
201 201
202 202 @interfaceutil.implementer(repository.iverifyproblem)
203 203 @attr.s(frozen=True)
204 204 class revlogproblem(object):
205 205 warning = attr.ib(default=None)
206 206 error = attr.ib(default=None)
207 207 node = attr.ib(default=None)
208 208
209 209
210 210 def parse_index_v1(data, inline):
211 211 # call the C implementation to parse the index data
212 212 index, cache = parsers.parse_index2(data, inline)
213 213 return index, cache
214 214
215 215
216 216 def parse_index_v2(data, inline):
217 217 # call the C implementation to parse the index data
218 218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 219 return index, cache
220 220
221 221
222 222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223 223
224 224 def parse_index_v1_nodemap(data, inline):
225 225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 226 return index, cache
227 227
228 228
229 229 else:
230 230 parse_index_v1_nodemap = None
231 231
232 232
233 233 def parse_index_v1_mixed(data, inline):
234 234 index, cache = parse_index_v1(data, inline)
235 235 return rustrevlog.MixedIndex(index), cache
236 236
237 237
238 238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 239 # signed integer)
240 240 _maxentrysize = 0x7FFFFFFF
241 241
242 242
243 243 class revlog(object):
244 244 """
245 245 the underlying revision storage object
246 246
247 247 A revlog consists of two parts, an index and the revision data.
248 248
249 249 The index is a file with a fixed record size containing
250 250 information on each revision, including its nodeid (hash), the
251 251 nodeids of its parents, the position and offset of its data within
252 252 the data file, and the revision it's based on. Finally, each entry
253 253 contains a linkrev entry that can serve as a pointer to external
254 254 data.
255 255
256 256 The revision data itself is a linear collection of data chunks.
257 257 Each chunk represents a revision and is usually represented as a
258 258 delta against the previous chunk. To bound lookup time, runs of
259 259 deltas are limited to about 2 times the length of the original
260 260 version data. This makes retrieval of a version proportional to
261 261 its size, or O(1) relative to the number of revisions.
262 262
263 263 Both pieces of the revlog are written to in an append-only
264 264 fashion, which means we never need to rewrite a file to insert or
265 265 remove data, and can use some simple techniques to avoid the need
266 266 for locking while reading.
267 267
268 268 If checkambig, indexfile is opened with checkambig=True at
269 269 writing, to avoid file stat ambiguity.
270 270
271 271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 272 index will be mmapped rather than read if it is larger than the
273 273 configured threshold.
274 274
275 275 If censorable is True, the revlog can have censored revisions.
276 276
277 277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 278 compression for the data content.
279 279
280 280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 281 file handle, a filename, and an expected position. It should check whether
282 282 the current position in the file handle is valid, and log/warn/fail (by
283 283 raising).
284 284 """
285 285
286 286 _flagserrorclass = error.RevlogError
287 287
288 288 def __init__(
289 289 self,
290 290 opener,
291 291 target,
292 292 indexfile=None,
293 293 datafile=None,
294 294 checkambig=False,
295 295 mmaplargeindex=False,
296 296 censorable=False,
297 297 upperboundcomp=None,
298 298 persistentnodemap=False,
299 299 concurrencychecker=None,
300 300 ):
301 301 """
302 302 create a revlog object
303 303
304 304 opener is a function that abstracts the file opening operation
305 305 and can be used to implement COW semantics or the like.
306 306
307 307 `target`: a (KIND, ID) tuple that identify the content stored in
308 308 this revlog. It help the rest of the code to understand what the revlog
309 309 is about without having to resort to heuristic and index filename
310 310 analysis. Note: that this must be reliably be set by normal code, but
311 311 that test, debug, or performance measurement code might not set this to
312 312 accurate value.
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile or (indexfile[:-2] + b".d")
317 317 self.nodemap_file = None
318 318 if persistentnodemap:
319 319 self.nodemap_file = nodemaputil.get_nodemap_file(
320 320 opener, self.indexfile
321 321 )
322 322
323 323 self.opener = opener
324 324 assert target[0] in ALL_KINDS
325 325 assert len(target) == 2
326 326 self.target = target
327 327 # When True, indexfile is opened with checkambig=True at writing, to
328 328 # avoid file stat ambiguity.
329 329 self._checkambig = checkambig
330 330 self._mmaplargeindex = mmaplargeindex
331 331 self._censorable = censorable
332 332 # 3-tuple of (node, rev, text) for a raw revision.
333 333 self._revisioncache = None
334 334 # Maps rev to chain base rev.
335 335 self._chainbasecache = util.lrucachedict(100)
336 336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 337 self._chunkcache = (0, b'')
338 338 # How much data to read and cache into the raw revlog data cache.
339 339 self._chunkcachesize = 65536
340 340 self._maxchainlen = None
341 341 self._deltabothparents = True
342 342 self.index = None
343 343 self._nodemap_docket = None
344 344 # Mapping of partial identifiers to full nodes.
345 345 self._pcache = {}
346 346 # Mapping of revision integer to full node.
347 347 self._compengine = b'zlib'
348 348 self._compengineopts = {}
349 349 self._maxdeltachainspan = -1
350 350 self._withsparseread = False
351 351 self._sparserevlog = False
352 352 self._srdensitythreshold = 0.50
353 353 self._srmingapsize = 262144
354 354
355 355 # Make copy of flag processors so each revlog instance can support
356 356 # custom flags.
357 357 self._flagprocessors = dict(flagutil.flagprocessors)
358 358
359 359 # 2-tuple of file handles being used for active writing.
360 360 self._writinghandles = None
361 361
362 362 self._loadindex()
363 363
364 364 self._concurrencychecker = concurrencychecker
365 365
366 366 def _loadindex(self):
367 367 mmapindexthreshold = None
368 368 opts = self.opener.options
369 369
370 370 if b'revlogv2' in opts:
371 371 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
372 372 elif b'revlogv1' in opts:
373 373 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
374 374 if b'generaldelta' in opts:
375 375 newversionflags |= FLAG_GENERALDELTA
376 376 elif b'revlogv0' in self.opener.options:
377 377 newversionflags = REVLOGV0
378 378 else:
379 379 newversionflags = REVLOG_DEFAULT_VERSION
380 380
381 381 if b'chunkcachesize' in opts:
382 382 self._chunkcachesize = opts[b'chunkcachesize']
383 383 if b'maxchainlen' in opts:
384 384 self._maxchainlen = opts[b'maxchainlen']
385 385 if b'deltabothparents' in opts:
386 386 self._deltabothparents = opts[b'deltabothparents']
387 387 self._lazydelta = bool(opts.get(b'lazydelta', True))
388 388 self._lazydeltabase = False
389 389 if self._lazydelta:
390 390 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
391 391 if b'compengine' in opts:
392 392 self._compengine = opts[b'compengine']
393 393 if b'zlib.level' in opts:
394 394 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
395 395 if b'zstd.level' in opts:
396 396 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
397 397 if b'maxdeltachainspan' in opts:
398 398 self._maxdeltachainspan = opts[b'maxdeltachainspan']
399 399 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
400 400 mmapindexthreshold = opts[b'mmapindexthreshold']
401 401 self.hassidedata = bool(opts.get(b'side-data', False))
402 402 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
403 403 withsparseread = bool(opts.get(b'with-sparse-read', False))
404 404 # sparse-revlog forces sparse-read
405 405 self._withsparseread = self._sparserevlog or withsparseread
406 406 if b'sparse-read-density-threshold' in opts:
407 407 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
408 408 if b'sparse-read-min-gap-size' in opts:
409 409 self._srmingapsize = opts[b'sparse-read-min-gap-size']
410 410 if opts.get(b'enableellipsis'):
411 411 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
412 412
413 413 # revlog v0 doesn't have flag processors
414 414 for flag, processor in pycompat.iteritems(
415 415 opts.get(b'flagprocessors', {})
416 416 ):
417 417 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
418 418
419 419 if self._chunkcachesize <= 0:
420 420 raise error.RevlogError(
421 421 _(b'revlog chunk cache size %r is not greater than 0')
422 422 % self._chunkcachesize
423 423 )
424 424 elif self._chunkcachesize & (self._chunkcachesize - 1):
425 425 raise error.RevlogError(
426 426 _(b'revlog chunk cache size %r is not a power of 2')
427 427 % self._chunkcachesize
428 428 )
429 429
430 430 indexdata = b''
431 431 self._initempty = True
432 432 try:
433 433 with self._indexfp() as f:
434 434 if (
435 435 mmapindexthreshold is not None
436 436 and self.opener.fstat(f).st_size >= mmapindexthreshold
437 437 ):
438 438 # TODO: should .close() to release resources without
439 439 # relying on Python GC
440 440 indexdata = util.buffer(util.mmapread(f))
441 441 else:
442 442 indexdata = f.read()
443 443 if len(indexdata) > 0:
444 444 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
445 445 self._initempty = False
446 446 else:
447 447 versionflags = newversionflags
448 448 except IOError as inst:
449 449 if inst.errno != errno.ENOENT:
450 450 raise
451 451
452 452 versionflags = newversionflags
453 453
454 self.version = versionflags
455
456 flags = versionflags & ~0xFFFF
457 fmt = versionflags & 0xFFFF
454 flags = self._format_flags = versionflags & ~0xFFFF
455 fmt = self._format_version = versionflags & 0xFFFF
458 456
459 457 if fmt == REVLOGV0:
460 458 if flags:
461 459 raise error.RevlogError(
462 460 _(b'unknown flags (%#04x) in version %d revlog %s')
463 461 % (flags >> 16, fmt, self.indexfile)
464 462 )
465 463
466 464 self._inline = False
467 465 self._generaldelta = False
468 466
469 467 elif fmt == REVLOGV1:
470 468 if flags & ~REVLOGV1_FLAGS:
471 469 raise error.RevlogError(
472 470 _(b'unknown flags (%#04x) in version %d revlog %s')
473 471 % (flags >> 16, fmt, self.indexfile)
474 472 )
475 473
476 474 self._inline = versionflags & FLAG_INLINE_DATA
477 475 self._generaldelta = versionflags & FLAG_GENERALDELTA
478 476
479 477 elif fmt == REVLOGV2:
480 478 if flags & ~REVLOGV2_FLAGS:
481 479 raise error.RevlogError(
482 480 _(b'unknown flags (%#04x) in version %d revlog %s')
483 481 % (flags >> 16, fmt, self.indexfile)
484 482 )
485 483
486 484 # There is a bug in the transaction handling when going from an
487 485 # inline revlog to a separate index and data file. Turn it off until
488 486 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
489 487 # See issue6485
490 488 self._inline = False
491 489 # generaldelta implied by version 2 revlogs.
492 490 self._generaldelta = True
493 491
494 492 else:
495 493 raise error.RevlogError(
496 494 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
497 495 )
498 496
499 497 self.nodeconstants = sha1nodeconstants
500 498 self.nullid = self.nodeconstants.nullid
501 499
502 500 # sparse-revlog can't be on without general-delta (issue6056)
503 501 if not self._generaldelta:
504 502 self._sparserevlog = False
505 503
506 504 self._storedeltachains = True
507 505
508 506 devel_nodemap = (
509 507 self.nodemap_file
510 508 and opts.get(b'devel-force-nodemap', False)
511 509 and parse_index_v1_nodemap is not None
512 510 )
513 511
514 512 use_rust_index = False
515 513 if rustrevlog is not None:
516 514 if self.nodemap_file is not None:
517 515 use_rust_index = True
518 516 else:
519 517 use_rust_index = self.opener.options.get(b'rust.index')
520 518
521 519 self._parse_index = parse_index_v1
522 if self.version == REVLOGV0:
520 if self._format_version == REVLOGV0:
523 521 self._parse_index = revlogv0.parse_index_v0
524 522 elif fmt == REVLOGV2:
525 523 self._parse_index = parse_index_v2
526 524 elif devel_nodemap:
527 525 self._parse_index = parse_index_v1_nodemap
528 526 elif use_rust_index:
529 527 self._parse_index = parse_index_v1_mixed
530 528 try:
531 529 d = self._parse_index(indexdata, self._inline)
532 530 index, _chunkcache = d
533 531 use_nodemap = (
534 532 not self._inline
535 533 and self.nodemap_file is not None
536 534 and util.safehasattr(index, 'update_nodemap_data')
537 535 )
538 536 if use_nodemap:
539 537 nodemap_data = nodemaputil.persisted_data(self)
540 538 if nodemap_data is not None:
541 539 docket = nodemap_data[0]
542 540 if (
543 541 len(d[0]) > docket.tip_rev
544 542 and d[0][docket.tip_rev][7] == docket.tip_node
545 543 ):
546 544 # no changelog tampering
547 545 self._nodemap_docket = docket
548 546 index.update_nodemap_data(*nodemap_data)
549 547 except (ValueError, IndexError):
550 548 raise error.RevlogError(
551 549 _(b"index %s is corrupted") % self.indexfile
552 550 )
553 551 self.index, self._chunkcache = d
554 552 if not self._chunkcache:
555 553 self._chunkclear()
556 554 # revnum -> (chain-length, sum-delta-length)
557 555 self._chaininfocache = util.lrucachedict(500)
558 556 # revlog header -> revlog compressor
559 557 self._decompressors = {}
560 558
561 559 @util.propertycache
562 560 def revlog_kind(self):
563 561 return self.target[0]
564 562
565 563 @util.propertycache
566 564 def _compressor(self):
567 565 engine = util.compengines[self._compengine]
568 566 return engine.revlogcompressor(self._compengineopts)
569 567
570 568 def _indexfp(self, mode=b'r'):
571 569 """file object for the revlog's index file"""
572 570 args = {'mode': mode}
573 571 if mode != b'r':
574 572 args['checkambig'] = self._checkambig
575 573 if mode == b'w':
576 574 args['atomictemp'] = True
577 575 return self.opener(self.indexfile, **args)
578 576
579 577 def _datafp(self, mode=b'r'):
580 578 """file object for the revlog's data file"""
581 579 return self.opener(self.datafile, mode=mode)
582 580
583 581 @contextlib.contextmanager
584 582 def _datareadfp(self, existingfp=None):
585 583 """file object suitable to read data"""
586 584 # Use explicit file handle, if given.
587 585 if existingfp is not None:
588 586 yield existingfp
589 587
590 588 # Use a file handle being actively used for writes, if available.
591 589 # There is some danger to doing this because reads will seek the
592 590 # file. However, _writeentry() performs a SEEK_END before all writes,
593 591 # so we should be safe.
594 592 elif self._writinghandles:
595 593 if self._inline:
596 594 yield self._writinghandles[0]
597 595 else:
598 596 yield self._writinghandles[1]
599 597
600 598 # Otherwise open a new file handle.
601 599 else:
602 600 if self._inline:
603 601 func = self._indexfp
604 602 else:
605 603 func = self._datafp
606 604 with func() as fp:
607 605 yield fp
608 606
609 607 def tiprev(self):
610 608 return len(self.index) - 1
611 609
612 610 def tip(self):
613 611 return self.node(self.tiprev())
614 612
615 613 def __contains__(self, rev):
616 614 return 0 <= rev < len(self)
617 615
618 616 def __len__(self):
619 617 return len(self.index)
620 618
621 619 def __iter__(self):
622 620 return iter(pycompat.xrange(len(self)))
623 621
624 622 def revs(self, start=0, stop=None):
625 623 """iterate over all rev in this revlog (from start to stop)"""
626 624 return storageutil.iterrevs(len(self), start=start, stop=stop)
627 625
628 626 @property
629 627 def nodemap(self):
630 628 msg = (
631 629 b"revlog.nodemap is deprecated, "
632 630 b"use revlog.index.[has_node|rev|get_rev]"
633 631 )
634 632 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
635 633 return self.index.nodemap
636 634
637 635 @property
638 636 def _nodecache(self):
639 637 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
640 638 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
641 639 return self.index.nodemap
642 640
643 641 def hasnode(self, node):
644 642 try:
645 643 self.rev(node)
646 644 return True
647 645 except KeyError:
648 646 return False
649 647
650 648 def candelta(self, baserev, rev):
651 649 """whether two revisions (baserev, rev) can be delta-ed or not"""
652 650 # Disable delta if either rev requires a content-changing flag
653 651 # processor (ex. LFS). This is because such flag processor can alter
654 652 # the rawtext content that the delta will be based on, and two clients
655 653 # could have a same revlog node with different flags (i.e. different
656 654 # rawtext contents) and the delta could be incompatible.
657 655 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
658 656 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
659 657 ):
660 658 return False
661 659 return True
662 660
663 661 def update_caches(self, transaction):
664 662 if self.nodemap_file is not None:
665 663 if transaction is None:
666 664 nodemaputil.update_persistent_nodemap(self)
667 665 else:
668 666 nodemaputil.setup_persistent_nodemap(transaction, self)
669 667
670 668 def clearcaches(self):
671 669 self._revisioncache = None
672 670 self._chainbasecache.clear()
673 671 self._chunkcache = (0, b'')
674 672 self._pcache = {}
675 673 self._nodemap_docket = None
676 674 self.index.clearcaches()
677 675 # The python code is the one responsible for validating the docket, we
678 676 # end up having to refresh it here.
679 677 use_nodemap = (
680 678 not self._inline
681 679 and self.nodemap_file is not None
682 680 and util.safehasattr(self.index, 'update_nodemap_data')
683 681 )
684 682 if use_nodemap:
685 683 nodemap_data = nodemaputil.persisted_data(self)
686 684 if nodemap_data is not None:
687 685 self._nodemap_docket = nodemap_data[0]
688 686 self.index.update_nodemap_data(*nodemap_data)
689 687
690 688 def rev(self, node):
691 689 try:
692 690 return self.index.rev(node)
693 691 except TypeError:
694 692 raise
695 693 except error.RevlogError:
696 694 # parsers.c radix tree lookup failed
697 695 if (
698 696 node == self.nodeconstants.wdirid
699 697 or node in self.nodeconstants.wdirfilenodeids
700 698 ):
701 699 raise error.WdirUnsupported
702 700 raise error.LookupError(node, self.indexfile, _(b'no node'))
703 701
704 702 # Accessors for index entries.
705 703
706 704 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
707 705 # are flags.
708 706 def start(self, rev):
709 707 return int(self.index[rev][0] >> 16)
710 708
711 709 def flags(self, rev):
712 710 return self.index[rev][0] & 0xFFFF
713 711
714 712 def length(self, rev):
715 713 return self.index[rev][1]
716 714
717 715 def sidedata_length(self, rev):
718 716 if not self.hassidedata:
719 717 return 0
720 718 return self.index[rev][9]
721 719
722 720 def rawsize(self, rev):
723 721 """return the length of the uncompressed text for a given revision"""
724 722 l = self.index[rev][2]
725 723 if l >= 0:
726 724 return l
727 725
728 726 t = self.rawdata(rev)
729 727 return len(t)
730 728
731 729 def size(self, rev):
732 730 """length of non-raw text (processed by a "read" flag processor)"""
733 731 # fast path: if no "read" flag processor could change the content,
734 732 # size is rawsize. note: ELLIPSIS is known to not change the content.
735 733 flags = self.flags(rev)
736 734 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
737 735 return self.rawsize(rev)
738 736
739 737 return len(self.revision(rev, raw=False))
740 738
741 739 def chainbase(self, rev):
742 740 base = self._chainbasecache.get(rev)
743 741 if base is not None:
744 742 return base
745 743
746 744 index = self.index
747 745 iterrev = rev
748 746 base = index[iterrev][3]
749 747 while base != iterrev:
750 748 iterrev = base
751 749 base = index[iterrev][3]
752 750
753 751 self._chainbasecache[rev] = base
754 752 return base
755 753
756 754 def linkrev(self, rev):
757 755 return self.index[rev][4]
758 756
759 757 def parentrevs(self, rev):
760 758 try:
761 759 entry = self.index[rev]
762 760 except IndexError:
763 761 if rev == wdirrev:
764 762 raise error.WdirUnsupported
765 763 raise
766 764 if entry[5] == nullrev:
767 765 return entry[6], entry[5]
768 766 else:
769 767 return entry[5], entry[6]
770 768
771 769 # fast parentrevs(rev) where rev isn't filtered
772 770 _uncheckedparentrevs = parentrevs
773 771
774 772 def node(self, rev):
775 773 try:
776 774 return self.index[rev][7]
777 775 except IndexError:
778 776 if rev == wdirrev:
779 777 raise error.WdirUnsupported
780 778 raise
781 779
782 780 # Derived from index values.
783 781
784 782 def end(self, rev):
785 783 return self.start(rev) + self.length(rev)
786 784
787 785 def parents(self, node):
788 786 i = self.index
789 787 d = i[self.rev(node)]
790 788 # inline node() to avoid function call overhead
791 789 if d[5] == self.nullid:
792 790 return i[d[6]][7], i[d[5]][7]
793 791 else:
794 792 return i[d[5]][7], i[d[6]][7]
795 793
796 794 def chainlen(self, rev):
797 795 return self._chaininfo(rev)[0]
798 796
799 797 def _chaininfo(self, rev):
800 798 chaininfocache = self._chaininfocache
801 799 if rev in chaininfocache:
802 800 return chaininfocache[rev]
803 801 index = self.index
804 802 generaldelta = self._generaldelta
805 803 iterrev = rev
806 804 e = index[iterrev]
807 805 clen = 0
808 806 compresseddeltalen = 0
809 807 while iterrev != e[3]:
810 808 clen += 1
811 809 compresseddeltalen += e[1]
812 810 if generaldelta:
813 811 iterrev = e[3]
814 812 else:
815 813 iterrev -= 1
816 814 if iterrev in chaininfocache:
817 815 t = chaininfocache[iterrev]
818 816 clen += t[0]
819 817 compresseddeltalen += t[1]
820 818 break
821 819 e = index[iterrev]
822 820 else:
823 821 # Add text length of base since decompressing that also takes
824 822 # work. For cache hits the length is already included.
825 823 compresseddeltalen += e[1]
826 824 r = (clen, compresseddeltalen)
827 825 chaininfocache[rev] = r
828 826 return r
829 827
830 828 def _deltachain(self, rev, stoprev=None):
831 829 """Obtain the delta chain for a revision.
832 830
833 831 ``stoprev`` specifies a revision to stop at. If not specified, we
834 832 stop at the base of the chain.
835 833
836 834 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
837 835 revs in ascending order and ``stopped`` is a bool indicating whether
838 836 ``stoprev`` was hit.
839 837 """
840 838 # Try C implementation.
841 839 try:
842 840 return self.index.deltachain(rev, stoprev, self._generaldelta)
843 841 except AttributeError:
844 842 pass
845 843
846 844 chain = []
847 845
848 846 # Alias to prevent attribute lookup in tight loop.
849 847 index = self.index
850 848 generaldelta = self._generaldelta
851 849
852 850 iterrev = rev
853 851 e = index[iterrev]
854 852 while iterrev != e[3] and iterrev != stoprev:
855 853 chain.append(iterrev)
856 854 if generaldelta:
857 855 iterrev = e[3]
858 856 else:
859 857 iterrev -= 1
860 858 e = index[iterrev]
861 859
862 860 if iterrev == stoprev:
863 861 stopped = True
864 862 else:
865 863 chain.append(iterrev)
866 864 stopped = False
867 865
868 866 chain.reverse()
869 867 return chain, stopped
870 868
871 869 def ancestors(self, revs, stoprev=0, inclusive=False):
872 870 """Generate the ancestors of 'revs' in reverse revision order.
873 871 Does not generate revs lower than stoprev.
874 872
875 873 See the documentation for ancestor.lazyancestors for more details."""
876 874
877 875 # first, make sure start revisions aren't filtered
878 876 revs = list(revs)
879 877 checkrev = self.node
880 878 for r in revs:
881 879 checkrev(r)
882 880 # and we're sure ancestors aren't filtered as well
883 881
884 882 if rustancestor is not None:
885 883 lazyancestors = rustancestor.LazyAncestors
886 884 arg = self.index
887 885 else:
888 886 lazyancestors = ancestor.lazyancestors
889 887 arg = self._uncheckedparentrevs
890 888 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
891 889
892 890 def descendants(self, revs):
893 891 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
894 892
895 893 def findcommonmissing(self, common=None, heads=None):
896 894 """Return a tuple of the ancestors of common and the ancestors of heads
897 895 that are not ancestors of common. In revset terminology, we return the
898 896 tuple:
899 897
900 898 ::common, (::heads) - (::common)
901 899
902 900 The list is sorted by revision number, meaning it is
903 901 topologically sorted.
904 902
905 903 'heads' and 'common' are both lists of node IDs. If heads is
906 904 not supplied, uses all of the revlog's heads. If common is not
907 905 supplied, uses nullid."""
908 906 if common is None:
909 907 common = [self.nullid]
910 908 if heads is None:
911 909 heads = self.heads()
912 910
913 911 common = [self.rev(n) for n in common]
914 912 heads = [self.rev(n) for n in heads]
915 913
916 914 # we want the ancestors, but inclusive
917 915 class lazyset(object):
918 916 def __init__(self, lazyvalues):
919 917 self.addedvalues = set()
920 918 self.lazyvalues = lazyvalues
921 919
922 920 def __contains__(self, value):
923 921 return value in self.addedvalues or value in self.lazyvalues
924 922
925 923 def __iter__(self):
926 924 added = self.addedvalues
927 925 for r in added:
928 926 yield r
929 927 for r in self.lazyvalues:
930 928 if not r in added:
931 929 yield r
932 930
933 931 def add(self, value):
934 932 self.addedvalues.add(value)
935 933
936 934 def update(self, values):
937 935 self.addedvalues.update(values)
938 936
939 937 has = lazyset(self.ancestors(common))
940 938 has.add(nullrev)
941 939 has.update(common)
942 940
943 941 # take all ancestors from heads that aren't in has
944 942 missing = set()
945 943 visit = collections.deque(r for r in heads if r not in has)
946 944 while visit:
947 945 r = visit.popleft()
948 946 if r in missing:
949 947 continue
950 948 else:
951 949 missing.add(r)
952 950 for p in self.parentrevs(r):
953 951 if p not in has:
954 952 visit.append(p)
955 953 missing = list(missing)
956 954 missing.sort()
957 955 return has, [self.node(miss) for miss in missing]
958 956
959 957 def incrementalmissingrevs(self, common=None):
960 958 """Return an object that can be used to incrementally compute the
961 959 revision numbers of the ancestors of arbitrary sets that are not
962 960 ancestors of common. This is an ancestor.incrementalmissingancestors
963 961 object.
964 962
965 963 'common' is a list of revision numbers. If common is not supplied, uses
966 964 nullrev.
967 965 """
968 966 if common is None:
969 967 common = [nullrev]
970 968
971 969 if rustancestor is not None:
972 970 return rustancestor.MissingAncestors(self.index, common)
973 971 return ancestor.incrementalmissingancestors(self.parentrevs, common)
974 972
975 973 def findmissingrevs(self, common=None, heads=None):
976 974 """Return the revision numbers of the ancestors of heads that
977 975 are not ancestors of common.
978 976
979 977 More specifically, return a list of revision numbers corresponding to
980 978 nodes N such that every N satisfies the following constraints:
981 979
982 980 1. N is an ancestor of some node in 'heads'
983 981 2. N is not an ancestor of any node in 'common'
984 982
985 983 The list is sorted by revision number, meaning it is
986 984 topologically sorted.
987 985
988 986 'heads' and 'common' are both lists of revision numbers. If heads is
989 987 not supplied, uses all of the revlog's heads. If common is not
990 988 supplied, uses nullid."""
991 989 if common is None:
992 990 common = [nullrev]
993 991 if heads is None:
994 992 heads = self.headrevs()
995 993
996 994 inc = self.incrementalmissingrevs(common=common)
997 995 return inc.missingancestors(heads)
998 996
999 997 def findmissing(self, common=None, heads=None):
1000 998 """Return the ancestors of heads that are not ancestors of common.
1001 999
1002 1000 More specifically, return a list of nodes N such that every N
1003 1001 satisfies the following constraints:
1004 1002
1005 1003 1. N is an ancestor of some node in 'heads'
1006 1004 2. N is not an ancestor of any node in 'common'
1007 1005
1008 1006 The list is sorted by revision number, meaning it is
1009 1007 topologically sorted.
1010 1008
1011 1009 'heads' and 'common' are both lists of node IDs. If heads is
1012 1010 not supplied, uses all of the revlog's heads. If common is not
1013 1011 supplied, uses nullid."""
1014 1012 if common is None:
1015 1013 common = [self.nullid]
1016 1014 if heads is None:
1017 1015 heads = self.heads()
1018 1016
1019 1017 common = [self.rev(n) for n in common]
1020 1018 heads = [self.rev(n) for n in heads]
1021 1019
1022 1020 inc = self.incrementalmissingrevs(common=common)
1023 1021 return [self.node(r) for r in inc.missingancestors(heads)]
1024 1022
1025 1023 def nodesbetween(self, roots=None, heads=None):
1026 1024 """Return a topological path from 'roots' to 'heads'.
1027 1025
1028 1026 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1029 1027 topologically sorted list of all nodes N that satisfy both of
1030 1028 these constraints:
1031 1029
1032 1030 1. N is a descendant of some node in 'roots'
1033 1031 2. N is an ancestor of some node in 'heads'
1034 1032
1035 1033 Every node is considered to be both a descendant and an ancestor
1036 1034 of itself, so every reachable node in 'roots' and 'heads' will be
1037 1035 included in 'nodes'.
1038 1036
1039 1037 'outroots' is the list of reachable nodes in 'roots', i.e., the
1040 1038 subset of 'roots' that is returned in 'nodes'. Likewise,
1041 1039 'outheads' is the subset of 'heads' that is also in 'nodes'.
1042 1040
1043 1041 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1044 1042 unspecified, uses nullid as the only root. If 'heads' is
1045 1043 unspecified, uses list of all of the revlog's heads."""
1046 1044 nonodes = ([], [], [])
1047 1045 if roots is not None:
1048 1046 roots = list(roots)
1049 1047 if not roots:
1050 1048 return nonodes
1051 1049 lowestrev = min([self.rev(n) for n in roots])
1052 1050 else:
1053 1051 roots = [self.nullid] # Everybody's a descendant of nullid
1054 1052 lowestrev = nullrev
1055 1053 if (lowestrev == nullrev) and (heads is None):
1056 1054 # We want _all_ the nodes!
1057 1055 return (
1058 1056 [self.node(r) for r in self],
1059 1057 [self.nullid],
1060 1058 list(self.heads()),
1061 1059 )
1062 1060 if heads is None:
1063 1061 # All nodes are ancestors, so the latest ancestor is the last
1064 1062 # node.
1065 1063 highestrev = len(self) - 1
1066 1064 # Set ancestors to None to signal that every node is an ancestor.
1067 1065 ancestors = None
1068 1066 # Set heads to an empty dictionary for later discovery of heads
1069 1067 heads = {}
1070 1068 else:
1071 1069 heads = list(heads)
1072 1070 if not heads:
1073 1071 return nonodes
1074 1072 ancestors = set()
1075 1073 # Turn heads into a dictionary so we can remove 'fake' heads.
1076 1074 # Also, later we will be using it to filter out the heads we can't
1077 1075 # find from roots.
1078 1076 heads = dict.fromkeys(heads, False)
1079 1077 # Start at the top and keep marking parents until we're done.
1080 1078 nodestotag = set(heads)
1081 1079 # Remember where the top was so we can use it as a limit later.
1082 1080 highestrev = max([self.rev(n) for n in nodestotag])
1083 1081 while nodestotag:
1084 1082 # grab a node to tag
1085 1083 n = nodestotag.pop()
1086 1084 # Never tag nullid
1087 1085 if n == self.nullid:
1088 1086 continue
1089 1087 # A node's revision number represents its place in a
1090 1088 # topologically sorted list of nodes.
1091 1089 r = self.rev(n)
1092 1090 if r >= lowestrev:
1093 1091 if n not in ancestors:
1094 1092 # If we are possibly a descendant of one of the roots
1095 1093 # and we haven't already been marked as an ancestor
1096 1094 ancestors.add(n) # Mark as ancestor
1097 1095 # Add non-nullid parents to list of nodes to tag.
1098 1096 nodestotag.update(
1099 1097 [p for p in self.parents(n) if p != self.nullid]
1100 1098 )
1101 1099 elif n in heads: # We've seen it before, is it a fake head?
1102 1100 # So it is, real heads should not be the ancestors of
1103 1101 # any other heads.
1104 1102 heads.pop(n)
1105 1103 if not ancestors:
1106 1104 return nonodes
1107 1105 # Now that we have our set of ancestors, we want to remove any
1108 1106 # roots that are not ancestors.
1109 1107
1110 1108 # If one of the roots was nullid, everything is included anyway.
1111 1109 if lowestrev > nullrev:
1112 1110 # But, since we weren't, let's recompute the lowest rev to not
1113 1111 # include roots that aren't ancestors.
1114 1112
1115 1113 # Filter out roots that aren't ancestors of heads
1116 1114 roots = [root for root in roots if root in ancestors]
1117 1115 # Recompute the lowest revision
1118 1116 if roots:
1119 1117 lowestrev = min([self.rev(root) for root in roots])
1120 1118 else:
1121 1119 # No more roots? Return empty list
1122 1120 return nonodes
1123 1121 else:
1124 1122 # We are descending from nullid, and don't need to care about
1125 1123 # any other roots.
1126 1124 lowestrev = nullrev
1127 1125 roots = [self.nullid]
1128 1126 # Transform our roots list into a set.
1129 1127 descendants = set(roots)
1130 1128 # Also, keep the original roots so we can filter out roots that aren't
1131 1129 # 'real' roots (i.e. are descended from other roots).
1132 1130 roots = descendants.copy()
1133 1131 # Our topologically sorted list of output nodes.
1134 1132 orderedout = []
1135 1133 # Don't start at nullid since we don't want nullid in our output list,
1136 1134 # and if nullid shows up in descendants, empty parents will look like
1137 1135 # they're descendants.
1138 1136 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1139 1137 n = self.node(r)
1140 1138 isdescendant = False
1141 1139 if lowestrev == nullrev: # Everybody is a descendant of nullid
1142 1140 isdescendant = True
1143 1141 elif n in descendants:
1144 1142 # n is already a descendant
1145 1143 isdescendant = True
1146 1144 # This check only needs to be done here because all the roots
1147 1145 # will start being marked is descendants before the loop.
1148 1146 if n in roots:
1149 1147 # If n was a root, check if it's a 'real' root.
1150 1148 p = tuple(self.parents(n))
1151 1149 # If any of its parents are descendants, it's not a root.
1152 1150 if (p[0] in descendants) or (p[1] in descendants):
1153 1151 roots.remove(n)
1154 1152 else:
1155 1153 p = tuple(self.parents(n))
1156 1154 # A node is a descendant if either of its parents are
1157 1155 # descendants. (We seeded the dependents list with the roots
1158 1156 # up there, remember?)
1159 1157 if (p[0] in descendants) or (p[1] in descendants):
1160 1158 descendants.add(n)
1161 1159 isdescendant = True
1162 1160 if isdescendant and ((ancestors is None) or (n in ancestors)):
1163 1161 # Only include nodes that are both descendants and ancestors.
1164 1162 orderedout.append(n)
1165 1163 if (ancestors is not None) and (n in heads):
1166 1164 # We're trying to figure out which heads are reachable
1167 1165 # from roots.
1168 1166 # Mark this head as having been reached
1169 1167 heads[n] = True
1170 1168 elif ancestors is None:
1171 1169 # Otherwise, we're trying to discover the heads.
1172 1170 # Assume this is a head because if it isn't, the next step
1173 1171 # will eventually remove it.
1174 1172 heads[n] = True
1175 1173 # But, obviously its parents aren't.
1176 1174 for p in self.parents(n):
1177 1175 heads.pop(p, None)
1178 1176 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1179 1177 roots = list(roots)
1180 1178 assert orderedout
1181 1179 assert roots
1182 1180 assert heads
1183 1181 return (orderedout, roots, heads)
1184 1182
1185 1183 def headrevs(self, revs=None):
1186 1184 if revs is None:
1187 1185 try:
1188 1186 return self.index.headrevs()
1189 1187 except AttributeError:
1190 1188 return self._headrevs()
1191 1189 if rustdagop is not None:
1192 1190 return rustdagop.headrevs(self.index, revs)
1193 1191 return dagop.headrevs(revs, self._uncheckedparentrevs)
1194 1192
1195 1193 def computephases(self, roots):
1196 1194 return self.index.computephasesmapsets(roots)
1197 1195
1198 1196 def _headrevs(self):
1199 1197 count = len(self)
1200 1198 if not count:
1201 1199 return [nullrev]
1202 1200 # we won't iter over filtered rev so nobody is a head at start
1203 1201 ishead = [0] * (count + 1)
1204 1202 index = self.index
1205 1203 for r in self:
1206 1204 ishead[r] = 1 # I may be an head
1207 1205 e = index[r]
1208 1206 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1209 1207 return [r for r, val in enumerate(ishead) if val]
1210 1208
1211 1209 def heads(self, start=None, stop=None):
1212 1210 """return the list of all nodes that have no children
1213 1211
1214 1212 if start is specified, only heads that are descendants of
1215 1213 start will be returned
1216 1214 if stop is specified, it will consider all the revs from stop
1217 1215 as if they had no children
1218 1216 """
1219 1217 if start is None and stop is None:
1220 1218 if not len(self):
1221 1219 return [self.nullid]
1222 1220 return [self.node(r) for r in self.headrevs()]
1223 1221
1224 1222 if start is None:
1225 1223 start = nullrev
1226 1224 else:
1227 1225 start = self.rev(start)
1228 1226
1229 1227 stoprevs = {self.rev(n) for n in stop or []}
1230 1228
1231 1229 revs = dagop.headrevssubset(
1232 1230 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1233 1231 )
1234 1232
1235 1233 return [self.node(rev) for rev in revs]
1236 1234
1237 1235 def children(self, node):
1238 1236 """find the children of a given node"""
1239 1237 c = []
1240 1238 p = self.rev(node)
1241 1239 for r in self.revs(start=p + 1):
1242 1240 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1243 1241 if prevs:
1244 1242 for pr in prevs:
1245 1243 if pr == p:
1246 1244 c.append(self.node(r))
1247 1245 elif p == nullrev:
1248 1246 c.append(self.node(r))
1249 1247 return c
1250 1248
1251 1249 def commonancestorsheads(self, a, b):
1252 1250 """calculate all the heads of the common ancestors of nodes a and b"""
1253 1251 a, b = self.rev(a), self.rev(b)
1254 1252 ancs = self._commonancestorsheads(a, b)
1255 1253 return pycompat.maplist(self.node, ancs)
1256 1254
1257 1255 def _commonancestorsheads(self, *revs):
1258 1256 """calculate all the heads of the common ancestors of revs"""
1259 1257 try:
1260 1258 ancs = self.index.commonancestorsheads(*revs)
1261 1259 except (AttributeError, OverflowError): # C implementation failed
1262 1260 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1263 1261 return ancs
1264 1262
1265 1263 def isancestor(self, a, b):
1266 1264 """return True if node a is an ancestor of node b
1267 1265
1268 1266 A revision is considered an ancestor of itself."""
1269 1267 a, b = self.rev(a), self.rev(b)
1270 1268 return self.isancestorrev(a, b)
1271 1269
1272 1270 def isancestorrev(self, a, b):
1273 1271 """return True if revision a is an ancestor of revision b
1274 1272
1275 1273 A revision is considered an ancestor of itself.
1276 1274
1277 1275 The implementation of this is trivial but the use of
1278 1276 reachableroots is not."""
1279 1277 if a == nullrev:
1280 1278 return True
1281 1279 elif a == b:
1282 1280 return True
1283 1281 elif a > b:
1284 1282 return False
1285 1283 return bool(self.reachableroots(a, [b], [a], includepath=False))
1286 1284
1287 1285 def reachableroots(self, minroot, heads, roots, includepath=False):
1288 1286 """return (heads(::(<roots> and <roots>::<heads>)))
1289 1287
1290 1288 If includepath is True, return (<roots>::<heads>)."""
1291 1289 try:
1292 1290 return self.index.reachableroots2(
1293 1291 minroot, heads, roots, includepath
1294 1292 )
1295 1293 except AttributeError:
1296 1294 return dagop._reachablerootspure(
1297 1295 self.parentrevs, minroot, roots, heads, includepath
1298 1296 )
1299 1297
1300 1298 def ancestor(self, a, b):
1301 1299 """calculate the "best" common ancestor of nodes a and b"""
1302 1300
1303 1301 a, b = self.rev(a), self.rev(b)
1304 1302 try:
1305 1303 ancs = self.index.ancestors(a, b)
1306 1304 except (AttributeError, OverflowError):
1307 1305 ancs = ancestor.ancestors(self.parentrevs, a, b)
1308 1306 if ancs:
1309 1307 # choose a consistent winner when there's a tie
1310 1308 return min(map(self.node, ancs))
1311 1309 return self.nullid
1312 1310
1313 1311 def _match(self, id):
1314 1312 if isinstance(id, int):
1315 1313 # rev
1316 1314 return self.node(id)
1317 1315 if len(id) == self.nodeconstants.nodelen:
1318 1316 # possibly a binary node
1319 1317 # odds of a binary node being all hex in ASCII are 1 in 10**25
1320 1318 try:
1321 1319 node = id
1322 1320 self.rev(node) # quick search the index
1323 1321 return node
1324 1322 except error.LookupError:
1325 1323 pass # may be partial hex id
1326 1324 try:
1327 1325 # str(rev)
1328 1326 rev = int(id)
1329 1327 if b"%d" % rev != id:
1330 1328 raise ValueError
1331 1329 if rev < 0:
1332 1330 rev = len(self) + rev
1333 1331 if rev < 0 or rev >= len(self):
1334 1332 raise ValueError
1335 1333 return self.node(rev)
1336 1334 except (ValueError, OverflowError):
1337 1335 pass
1338 1336 if len(id) == 2 * self.nodeconstants.nodelen:
1339 1337 try:
1340 1338 # a full hex nodeid?
1341 1339 node = bin(id)
1342 1340 self.rev(node)
1343 1341 return node
1344 1342 except (TypeError, error.LookupError):
1345 1343 pass
1346 1344
1347 1345 def _partialmatch(self, id):
1348 1346 # we don't care wdirfilenodeids as they should be always full hash
1349 1347 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1350 1348 try:
1351 1349 partial = self.index.partialmatch(id)
1352 1350 if partial and self.hasnode(partial):
1353 1351 if maybewdir:
1354 1352 # single 'ff...' match in radix tree, ambiguous with wdir
1355 1353 raise error.RevlogError
1356 1354 return partial
1357 1355 if maybewdir:
1358 1356 # no 'ff...' match in radix tree, wdir identified
1359 1357 raise error.WdirUnsupported
1360 1358 return None
1361 1359 except error.RevlogError:
1362 1360 # parsers.c radix tree lookup gave multiple matches
1363 1361 # fast path: for unfiltered changelog, radix tree is accurate
1364 1362 if not getattr(self, 'filteredrevs', None):
1365 1363 raise error.AmbiguousPrefixLookupError(
1366 1364 id, self.indexfile, _(b'ambiguous identifier')
1367 1365 )
1368 1366 # fall through to slow path that filters hidden revisions
1369 1367 except (AttributeError, ValueError):
1370 1368 # we are pure python, or key was too short to search radix tree
1371 1369 pass
1372 1370
1373 1371 if id in self._pcache:
1374 1372 return self._pcache[id]
1375 1373
1376 1374 if len(id) <= 40:
1377 1375 try:
1378 1376 # hex(node)[:...]
1379 1377 l = len(id) // 2 # grab an even number of digits
1380 1378 prefix = bin(id[: l * 2])
1381 1379 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1382 1380 nl = [
1383 1381 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1384 1382 ]
1385 1383 if self.nodeconstants.nullhex.startswith(id):
1386 1384 nl.append(self.nullid)
1387 1385 if len(nl) > 0:
1388 1386 if len(nl) == 1 and not maybewdir:
1389 1387 self._pcache[id] = nl[0]
1390 1388 return nl[0]
1391 1389 raise error.AmbiguousPrefixLookupError(
1392 1390 id, self.indexfile, _(b'ambiguous identifier')
1393 1391 )
1394 1392 if maybewdir:
1395 1393 raise error.WdirUnsupported
1396 1394 return None
1397 1395 except TypeError:
1398 1396 pass
1399 1397
1400 1398 def lookup(self, id):
1401 1399 """locate a node based on:
1402 1400 - revision number or str(revision number)
1403 1401 - nodeid or subset of hex nodeid
1404 1402 """
1405 1403 n = self._match(id)
1406 1404 if n is not None:
1407 1405 return n
1408 1406 n = self._partialmatch(id)
1409 1407 if n:
1410 1408 return n
1411 1409
1412 1410 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1413 1411
1414 1412 def shortest(self, node, minlength=1):
1415 1413 """Find the shortest unambiguous prefix that matches node."""
1416 1414
1417 1415 def isvalid(prefix):
1418 1416 try:
1419 1417 matchednode = self._partialmatch(prefix)
1420 1418 except error.AmbiguousPrefixLookupError:
1421 1419 return False
1422 1420 except error.WdirUnsupported:
1423 1421 # single 'ff...' match
1424 1422 return True
1425 1423 if matchednode is None:
1426 1424 raise error.LookupError(node, self.indexfile, _(b'no node'))
1427 1425 return True
1428 1426
1429 1427 def maybewdir(prefix):
1430 1428 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1431 1429
1432 1430 hexnode = hex(node)
1433 1431
1434 1432 def disambiguate(hexnode, minlength):
1435 1433 """Disambiguate against wdirid."""
1436 1434 for length in range(minlength, len(hexnode) + 1):
1437 1435 prefix = hexnode[:length]
1438 1436 if not maybewdir(prefix):
1439 1437 return prefix
1440 1438
1441 1439 if not getattr(self, 'filteredrevs', None):
1442 1440 try:
1443 1441 length = max(self.index.shortest(node), minlength)
1444 1442 return disambiguate(hexnode, length)
1445 1443 except error.RevlogError:
1446 1444 if node != self.nodeconstants.wdirid:
1447 1445 raise error.LookupError(node, self.indexfile, _(b'no node'))
1448 1446 except AttributeError:
1449 1447 # Fall through to pure code
1450 1448 pass
1451 1449
1452 1450 if node == self.nodeconstants.wdirid:
1453 1451 for length in range(minlength, len(hexnode) + 1):
1454 1452 prefix = hexnode[:length]
1455 1453 if isvalid(prefix):
1456 1454 return prefix
1457 1455
1458 1456 for length in range(minlength, len(hexnode) + 1):
1459 1457 prefix = hexnode[:length]
1460 1458 if isvalid(prefix):
1461 1459 return disambiguate(hexnode, length)
1462 1460
1463 1461 def cmp(self, node, text):
1464 1462 """compare text with a given file revision
1465 1463
1466 1464 returns True if text is different than what is stored.
1467 1465 """
1468 1466 p1, p2 = self.parents(node)
1469 1467 return storageutil.hashrevisionsha1(text, p1, p2) != node
1470 1468
1471 1469 def _cachesegment(self, offset, data):
1472 1470 """Add a segment to the revlog cache.
1473 1471
1474 1472 Accepts an absolute offset and the data that is at that location.
1475 1473 """
1476 1474 o, d = self._chunkcache
1477 1475 # try to add to existing cache
1478 1476 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1479 1477 self._chunkcache = o, d + data
1480 1478 else:
1481 1479 self._chunkcache = offset, data
1482 1480
1483 1481 def _readsegment(self, offset, length, df=None):
1484 1482 """Load a segment of raw data from the revlog.
1485 1483
1486 1484 Accepts an absolute offset, length to read, and an optional existing
1487 1485 file handle to read from.
1488 1486
1489 1487 If an existing file handle is passed, it will be seeked and the
1490 1488 original seek position will NOT be restored.
1491 1489
1492 1490 Returns a str or buffer of raw byte data.
1493 1491
1494 1492 Raises if the requested number of bytes could not be read.
1495 1493 """
1496 1494 # Cache data both forward and backward around the requested
1497 1495 # data, in a fixed size window. This helps speed up operations
1498 1496 # involving reading the revlog backwards.
1499 1497 cachesize = self._chunkcachesize
1500 1498 realoffset = offset & ~(cachesize - 1)
1501 1499 reallength = (
1502 1500 (offset + length + cachesize) & ~(cachesize - 1)
1503 1501 ) - realoffset
1504 1502 with self._datareadfp(df) as df:
1505 1503 df.seek(realoffset)
1506 1504 d = df.read(reallength)
1507 1505
1508 1506 self._cachesegment(realoffset, d)
1509 1507 if offset != realoffset or reallength != length:
1510 1508 startoffset = offset - realoffset
1511 1509 if len(d) - startoffset < length:
1512 1510 raise error.RevlogError(
1513 1511 _(
1514 1512 b'partial read of revlog %s; expected %d bytes from '
1515 1513 b'offset %d, got %d'
1516 1514 )
1517 1515 % (
1518 1516 self.indexfile if self._inline else self.datafile,
1519 1517 length,
1520 1518 realoffset,
1521 1519 len(d) - startoffset,
1522 1520 )
1523 1521 )
1524 1522
1525 1523 return util.buffer(d, startoffset, length)
1526 1524
1527 1525 if len(d) < length:
1528 1526 raise error.RevlogError(
1529 1527 _(
1530 1528 b'partial read of revlog %s; expected %d bytes from offset '
1531 1529 b'%d, got %d'
1532 1530 )
1533 1531 % (
1534 1532 self.indexfile if self._inline else self.datafile,
1535 1533 length,
1536 1534 offset,
1537 1535 len(d),
1538 1536 )
1539 1537 )
1540 1538
1541 1539 return d
1542 1540
1543 1541 def _getsegment(self, offset, length, df=None):
1544 1542 """Obtain a segment of raw data from the revlog.
1545 1543
1546 1544 Accepts an absolute offset, length of bytes to obtain, and an
1547 1545 optional file handle to the already-opened revlog. If the file
1548 1546 handle is used, it's original seek position will not be preserved.
1549 1547
1550 1548 Requests for data may be returned from a cache.
1551 1549
1552 1550 Returns a str or a buffer instance of raw byte data.
1553 1551 """
1554 1552 o, d = self._chunkcache
1555 1553 l = len(d)
1556 1554
1557 1555 # is it in the cache?
1558 1556 cachestart = offset - o
1559 1557 cacheend = cachestart + length
1560 1558 if cachestart >= 0 and cacheend <= l:
1561 1559 if cachestart == 0 and cacheend == l:
1562 1560 return d # avoid a copy
1563 1561 return util.buffer(d, cachestart, cacheend - cachestart)
1564 1562
1565 1563 return self._readsegment(offset, length, df=df)
1566 1564
1567 1565 def _getsegmentforrevs(self, startrev, endrev, df=None):
1568 1566 """Obtain a segment of raw data corresponding to a range of revisions.
1569 1567
1570 1568 Accepts the start and end revisions and an optional already-open
1571 1569 file handle to be used for reading. If the file handle is read, its
1572 1570 seek position will not be preserved.
1573 1571
1574 1572 Requests for data may be satisfied by a cache.
1575 1573
1576 1574 Returns a 2-tuple of (offset, data) for the requested range of
1577 1575 revisions. Offset is the integer offset from the beginning of the
1578 1576 revlog and data is a str or buffer of the raw byte data.
1579 1577
1580 1578 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1581 1579 to determine where each revision's data begins and ends.
1582 1580 """
1583 1581 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1584 1582 # (functions are expensive).
1585 1583 index = self.index
1586 1584 istart = index[startrev]
1587 1585 start = int(istart[0] >> 16)
1588 1586 if startrev == endrev:
1589 1587 end = start + istart[1]
1590 1588 else:
1591 1589 iend = index[endrev]
1592 1590 end = int(iend[0] >> 16) + iend[1]
1593 1591
1594 1592 if self._inline:
1595 1593 start += (startrev + 1) * self.index.entry_size
1596 1594 end += (endrev + 1) * self.index.entry_size
1597 1595 length = end - start
1598 1596
1599 1597 return start, self._getsegment(start, length, df=df)
1600 1598
1601 1599 def _chunk(self, rev, df=None):
1602 1600 """Obtain a single decompressed chunk for a revision.
1603 1601
1604 1602 Accepts an integer revision and an optional already-open file handle
1605 1603 to be used for reading. If used, the seek position of the file will not
1606 1604 be preserved.
1607 1605
1608 1606 Returns a str holding uncompressed data for the requested revision.
1609 1607 """
1610 1608 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1611 1609
1612 1610 def _chunks(self, revs, df=None, targetsize=None):
1613 1611 """Obtain decompressed chunks for the specified revisions.
1614 1612
1615 1613 Accepts an iterable of numeric revisions that are assumed to be in
1616 1614 ascending order. Also accepts an optional already-open file handle
1617 1615 to be used for reading. If used, the seek position of the file will
1618 1616 not be preserved.
1619 1617
1620 1618 This function is similar to calling ``self._chunk()`` multiple times,
1621 1619 but is faster.
1622 1620
1623 1621 Returns a list with decompressed data for each requested revision.
1624 1622 """
1625 1623 if not revs:
1626 1624 return []
1627 1625 start = self.start
1628 1626 length = self.length
1629 1627 inline = self._inline
1630 1628 iosize = self.index.entry_size
1631 1629 buffer = util.buffer
1632 1630
1633 1631 l = []
1634 1632 ladd = l.append
1635 1633
1636 1634 if not self._withsparseread:
1637 1635 slicedchunks = (revs,)
1638 1636 else:
1639 1637 slicedchunks = deltautil.slicechunk(
1640 1638 self, revs, targetsize=targetsize
1641 1639 )
1642 1640
1643 1641 for revschunk in slicedchunks:
1644 1642 firstrev = revschunk[0]
1645 1643 # Skip trailing revisions with empty diff
1646 1644 for lastrev in revschunk[::-1]:
1647 1645 if length(lastrev) != 0:
1648 1646 break
1649 1647
1650 1648 try:
1651 1649 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1652 1650 except OverflowError:
1653 1651 # issue4215 - we can't cache a run of chunks greater than
1654 1652 # 2G on Windows
1655 1653 return [self._chunk(rev, df=df) for rev in revschunk]
1656 1654
1657 1655 decomp = self.decompress
1658 1656 for rev in revschunk:
1659 1657 chunkstart = start(rev)
1660 1658 if inline:
1661 1659 chunkstart += (rev + 1) * iosize
1662 1660 chunklength = length(rev)
1663 1661 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1664 1662
1665 1663 return l
1666 1664
1667 1665 def _chunkclear(self):
1668 1666 """Clear the raw chunk cache."""
1669 1667 self._chunkcache = (0, b'')
1670 1668
1671 1669 def deltaparent(self, rev):
1672 1670 """return deltaparent of the given revision"""
1673 1671 base = self.index[rev][3]
1674 1672 if base == rev:
1675 1673 return nullrev
1676 1674 elif self._generaldelta:
1677 1675 return base
1678 1676 else:
1679 1677 return rev - 1
1680 1678
1681 1679 def issnapshot(self, rev):
1682 1680 """tells whether rev is a snapshot"""
1683 1681 if not self._sparserevlog:
1684 1682 return self.deltaparent(rev) == nullrev
1685 1683 elif util.safehasattr(self.index, b'issnapshot'):
1686 1684 # directly assign the method to cache the testing and access
1687 1685 self.issnapshot = self.index.issnapshot
1688 1686 return self.issnapshot(rev)
1689 1687 if rev == nullrev:
1690 1688 return True
1691 1689 entry = self.index[rev]
1692 1690 base = entry[3]
1693 1691 if base == rev:
1694 1692 return True
1695 1693 if base == nullrev:
1696 1694 return True
1697 1695 p1 = entry[5]
1698 1696 p2 = entry[6]
1699 1697 if base == p1 or base == p2:
1700 1698 return False
1701 1699 return self.issnapshot(base)
1702 1700
1703 1701 def snapshotdepth(self, rev):
1704 1702 """number of snapshot in the chain before this one"""
1705 1703 if not self.issnapshot(rev):
1706 1704 raise error.ProgrammingError(b'revision %d not a snapshot')
1707 1705 return len(self._deltachain(rev)[0]) - 1
1708 1706
1709 1707 def revdiff(self, rev1, rev2):
1710 1708 """return or calculate a delta between two revisions
1711 1709
1712 1710 The delta calculated is in binary form and is intended to be written to
1713 1711 revlog data directly. So this function needs raw revision data.
1714 1712 """
1715 1713 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1716 1714 return bytes(self._chunk(rev2))
1717 1715
1718 1716 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1719 1717
1720 1718 def _processflags(self, text, flags, operation, raw=False):
1721 1719 """deprecated entry point to access flag processors"""
1722 1720 msg = b'_processflag(...) use the specialized variant'
1723 1721 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1724 1722 if raw:
1725 1723 return text, flagutil.processflagsraw(self, text, flags)
1726 1724 elif operation == b'read':
1727 1725 return flagutil.processflagsread(self, text, flags)
1728 1726 else: # write operation
1729 1727 return flagutil.processflagswrite(self, text, flags)
1730 1728
1731 1729 def revision(self, nodeorrev, _df=None, raw=False):
1732 1730 """return an uncompressed revision of a given node or revision
1733 1731 number.
1734 1732
1735 1733 _df - an existing file handle to read from. (internal-only)
1736 1734 raw - an optional argument specifying if the revision data is to be
1737 1735 treated as raw data when applying flag transforms. 'raw' should be set
1738 1736 to True when generating changegroups or in debug commands.
1739 1737 """
1740 1738 if raw:
1741 1739 msg = (
1742 1740 b'revlog.revision(..., raw=True) is deprecated, '
1743 1741 b'use revlog.rawdata(...)'
1744 1742 )
1745 1743 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1746 1744 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1747 1745
1748 1746 def sidedata(self, nodeorrev, _df=None):
1749 1747 """a map of extra data related to the changeset but not part of the hash
1750 1748
1751 1749 This function currently return a dictionary. However, more advanced
1752 1750 mapping object will likely be used in the future for a more
1753 1751 efficient/lazy code.
1754 1752 """
1755 1753 return self._revisiondata(nodeorrev, _df)[1]
1756 1754
1757 1755 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1758 1756 # deal with <nodeorrev> argument type
1759 1757 if isinstance(nodeorrev, int):
1760 1758 rev = nodeorrev
1761 1759 node = self.node(rev)
1762 1760 else:
1763 1761 node = nodeorrev
1764 1762 rev = None
1765 1763
1766 1764 # fast path the special `nullid` rev
1767 1765 if node == self.nullid:
1768 1766 return b"", {}
1769 1767
1770 1768 # ``rawtext`` is the text as stored inside the revlog. Might be the
1771 1769 # revision or might need to be processed to retrieve the revision.
1772 1770 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1773 1771
1774 1772 if self.hassidedata:
1775 1773 if rev is None:
1776 1774 rev = self.rev(node)
1777 1775 sidedata = self._sidedata(rev)
1778 1776 else:
1779 1777 sidedata = {}
1780 1778
1781 1779 if raw and validated:
1782 1780 # if we don't want to process the raw text and that raw
1783 1781 # text is cached, we can exit early.
1784 1782 return rawtext, sidedata
1785 1783 if rev is None:
1786 1784 rev = self.rev(node)
1787 1785 # the revlog's flag for this revision
1788 1786 # (usually alter its state or content)
1789 1787 flags = self.flags(rev)
1790 1788
1791 1789 if validated and flags == REVIDX_DEFAULT_FLAGS:
1792 1790 # no extra flags set, no flag processor runs, text = rawtext
1793 1791 return rawtext, sidedata
1794 1792
1795 1793 if raw:
1796 1794 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1797 1795 text = rawtext
1798 1796 else:
1799 1797 r = flagutil.processflagsread(self, rawtext, flags)
1800 1798 text, validatehash = r
1801 1799 if validatehash:
1802 1800 self.checkhash(text, node, rev=rev)
1803 1801 if not validated:
1804 1802 self._revisioncache = (node, rev, rawtext)
1805 1803
1806 1804 return text, sidedata
1807 1805
1808 1806 def _rawtext(self, node, rev, _df=None):
1809 1807 """return the possibly unvalidated rawtext for a revision
1810 1808
1811 1809 returns (rev, rawtext, validated)
1812 1810 """
1813 1811
1814 1812 # revision in the cache (could be useful to apply delta)
1815 1813 cachedrev = None
1816 1814 # An intermediate text to apply deltas to
1817 1815 basetext = None
1818 1816
1819 1817 # Check if we have the entry in cache
1820 1818 # The cache entry looks like (node, rev, rawtext)
1821 1819 if self._revisioncache:
1822 1820 if self._revisioncache[0] == node:
1823 1821 return (rev, self._revisioncache[2], True)
1824 1822 cachedrev = self._revisioncache[1]
1825 1823
1826 1824 if rev is None:
1827 1825 rev = self.rev(node)
1828 1826
1829 1827 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1830 1828 if stopped:
1831 1829 basetext = self._revisioncache[2]
1832 1830
1833 1831 # drop cache to save memory, the caller is expected to
1834 1832 # update self._revisioncache after validating the text
1835 1833 self._revisioncache = None
1836 1834
1837 1835 targetsize = None
1838 1836 rawsize = self.index[rev][2]
1839 1837 if 0 <= rawsize:
1840 1838 targetsize = 4 * rawsize
1841 1839
1842 1840 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1843 1841 if basetext is None:
1844 1842 basetext = bytes(bins[0])
1845 1843 bins = bins[1:]
1846 1844
1847 1845 rawtext = mdiff.patches(basetext, bins)
1848 1846 del basetext # let us have a chance to free memory early
1849 1847 return (rev, rawtext, False)
1850 1848
1851 1849 def _sidedata(self, rev):
1852 1850 """Return the sidedata for a given revision number."""
1853 1851 index_entry = self.index[rev]
1854 1852 sidedata_offset = index_entry[8]
1855 1853 sidedata_size = index_entry[9]
1856 1854
1857 1855 if self._inline:
1858 1856 sidedata_offset += self.index.entry_size * (1 + rev)
1859 1857 if sidedata_size == 0:
1860 1858 return {}
1861 1859
1862 1860 segment = self._getsegment(sidedata_offset, sidedata_size)
1863 1861 sidedata = sidedatautil.deserialize_sidedata(segment)
1864 1862 return sidedata
1865 1863
1866 1864 def rawdata(self, nodeorrev, _df=None):
1867 1865 """return an uncompressed raw data of a given node or revision number.
1868 1866
1869 1867 _df - an existing file handle to read from. (internal-only)
1870 1868 """
1871 1869 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1872 1870
1873 1871 def hash(self, text, p1, p2):
1874 1872 """Compute a node hash.
1875 1873
1876 1874 Available as a function so that subclasses can replace the hash
1877 1875 as needed.
1878 1876 """
1879 1877 return storageutil.hashrevisionsha1(text, p1, p2)
1880 1878
1881 1879 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1882 1880 """Check node hash integrity.
1883 1881
1884 1882 Available as a function so that subclasses can extend hash mismatch
1885 1883 behaviors as needed.
1886 1884 """
1887 1885 try:
1888 1886 if p1 is None and p2 is None:
1889 1887 p1, p2 = self.parents(node)
1890 1888 if node != self.hash(text, p1, p2):
1891 1889 # Clear the revision cache on hash failure. The revision cache
1892 1890 # only stores the raw revision and clearing the cache does have
1893 1891 # the side-effect that we won't have a cache hit when the raw
1894 1892 # revision data is accessed. But this case should be rare and
1895 1893 # it is extra work to teach the cache about the hash
1896 1894 # verification state.
1897 1895 if self._revisioncache and self._revisioncache[0] == node:
1898 1896 self._revisioncache = None
1899 1897
1900 1898 revornode = rev
1901 1899 if revornode is None:
1902 1900 revornode = templatefilters.short(hex(node))
1903 1901 raise error.RevlogError(
1904 1902 _(b"integrity check failed on %s:%s")
1905 1903 % (self.indexfile, pycompat.bytestr(revornode))
1906 1904 )
1907 1905 except error.RevlogError:
1908 1906 if self._censorable and storageutil.iscensoredtext(text):
1909 1907 raise error.CensoredNodeError(self.indexfile, node, text)
1910 1908 raise
1911 1909
1912 1910 def _enforceinlinesize(self, tr, fp=None):
1913 1911 """Check if the revlog is too big for inline and convert if so.
1914 1912
1915 1913 This should be called after revisions are added to the revlog. If the
1916 1914 revlog has grown too large to be an inline revlog, it will convert it
1917 1915 to use multiple index and data files.
1918 1916 """
1919 1917 tiprev = len(self) - 1
1920 1918 if (
1921 1919 not self._inline
1922 1920 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1923 1921 ):
1924 1922 return
1925 1923
1926 1924 troffset = tr.findoffset(self.indexfile)
1927 1925 if troffset is None:
1928 1926 raise error.RevlogError(
1929 1927 _(b"%s not found in the transaction") % self.indexfile
1930 1928 )
1931 1929 trindex = 0
1932 1930 tr.add(self.datafile, 0)
1933 1931
1934 1932 if fp:
1935 1933 fp.flush()
1936 1934 fp.close()
1937 1935 # We can't use the cached file handle after close(). So prevent
1938 1936 # its usage.
1939 1937 self._writinghandles = None
1940 1938
1941 1939 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1942 1940 for r in self:
1943 1941 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1944 1942 if troffset <= self.start(r):
1945 1943 trindex = r
1946 1944
1947 1945 with self._indexfp(b'w') as fp:
1948 self.version &= ~FLAG_INLINE_DATA
1946 self._format_flags &= ~FLAG_INLINE_DATA
1949 1947 self._inline = False
1950 1948 for i in self:
1951 1949 e = self.index.entry_binary(i)
1952 1950 if i == 0:
1953 header = self.index.pack_header(self.version)
1951 header = self._format_flags | self._format_version
1952 header = self.index.pack_header(header)
1954 1953 e = header + e
1955 1954 fp.write(e)
1956 1955
1957 1956 # the temp file replace the real index when we exit the context
1958 1957 # manager
1959 1958
1960 1959 tr.replace(self.indexfile, trindex * self.index.entry_size)
1961 1960 nodemaputil.setup_persistent_nodemap(tr, self)
1962 1961 self._chunkclear()
1963 1962
1964 1963 def _nodeduplicatecallback(self, transaction, node):
1965 1964 """called when trying to add a node already stored."""
1966 1965
1967 1966 def addrevision(
1968 1967 self,
1969 1968 text,
1970 1969 transaction,
1971 1970 link,
1972 1971 p1,
1973 1972 p2,
1974 1973 cachedelta=None,
1975 1974 node=None,
1976 1975 flags=REVIDX_DEFAULT_FLAGS,
1977 1976 deltacomputer=None,
1978 1977 sidedata=None,
1979 1978 ):
1980 1979 """add a revision to the log
1981 1980
1982 1981 text - the revision data to add
1983 1982 transaction - the transaction object used for rollback
1984 1983 link - the linkrev data to add
1985 1984 p1, p2 - the parent nodeids of the revision
1986 1985 cachedelta - an optional precomputed delta
1987 1986 node - nodeid of revision; typically node is not specified, and it is
1988 1987 computed by default as hash(text, p1, p2), however subclasses might
1989 1988 use different hashing method (and override checkhash() in such case)
1990 1989 flags - the known flags to set on the revision
1991 1990 deltacomputer - an optional deltacomputer instance shared between
1992 1991 multiple calls
1993 1992 """
1994 1993 if link == nullrev:
1995 1994 raise error.RevlogError(
1996 1995 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1997 1996 )
1998 1997
1999 1998 if sidedata is None:
2000 1999 sidedata = {}
2001 2000 elif sidedata and not self.hassidedata:
2002 2001 raise error.ProgrammingError(
2003 2002 _(b"trying to add sidedata to a revlog who don't support them")
2004 2003 )
2005 2004
2006 2005 if flags:
2007 2006 node = node or self.hash(text, p1, p2)
2008 2007
2009 2008 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2010 2009
2011 2010 # If the flag processor modifies the revision data, ignore any provided
2012 2011 # cachedelta.
2013 2012 if rawtext != text:
2014 2013 cachedelta = None
2015 2014
2016 2015 if len(rawtext) > _maxentrysize:
2017 2016 raise error.RevlogError(
2018 2017 _(
2019 2018 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2020 2019 )
2021 2020 % (self.indexfile, len(rawtext))
2022 2021 )
2023 2022
2024 2023 node = node or self.hash(rawtext, p1, p2)
2025 2024 rev = self.index.get_rev(node)
2026 2025 if rev is not None:
2027 2026 return rev
2028 2027
2029 2028 if validatehash:
2030 2029 self.checkhash(rawtext, node, p1=p1, p2=p2)
2031 2030
2032 2031 return self.addrawrevision(
2033 2032 rawtext,
2034 2033 transaction,
2035 2034 link,
2036 2035 p1,
2037 2036 p2,
2038 2037 node,
2039 2038 flags,
2040 2039 cachedelta=cachedelta,
2041 2040 deltacomputer=deltacomputer,
2042 2041 sidedata=sidedata,
2043 2042 )
2044 2043
2045 2044 def addrawrevision(
2046 2045 self,
2047 2046 rawtext,
2048 2047 transaction,
2049 2048 link,
2050 2049 p1,
2051 2050 p2,
2052 2051 node,
2053 2052 flags,
2054 2053 cachedelta=None,
2055 2054 deltacomputer=None,
2056 2055 sidedata=None,
2057 2056 ):
2058 2057 """add a raw revision with known flags, node and parents
2059 2058 useful when reusing a revision not stored in this revlog (ex: received
2060 2059 over wire, or read from an external bundle).
2061 2060 """
2062 2061 dfh = None
2063 2062 if not self._inline:
2064 2063 dfh = self._datafp(b"a+")
2065 2064 ifh = self._indexfp(b"a+")
2066 2065 try:
2067 2066 return self._addrevision(
2068 2067 node,
2069 2068 rawtext,
2070 2069 transaction,
2071 2070 link,
2072 2071 p1,
2073 2072 p2,
2074 2073 flags,
2075 2074 cachedelta,
2076 2075 ifh,
2077 2076 dfh,
2078 2077 deltacomputer=deltacomputer,
2079 2078 sidedata=sidedata,
2080 2079 )
2081 2080 finally:
2082 2081 if dfh:
2083 2082 dfh.close()
2084 2083 ifh.close()
2085 2084
2086 2085 def compress(self, data):
2087 2086 """Generate a possibly-compressed representation of data."""
2088 2087 if not data:
2089 2088 return b'', data
2090 2089
2091 2090 compressed = self._compressor.compress(data)
2092 2091
2093 2092 if compressed:
2094 2093 # The revlog compressor added the header in the returned data.
2095 2094 return b'', compressed
2096 2095
2097 2096 if data[0:1] == b'\0':
2098 2097 return b'', data
2099 2098 return b'u', data
2100 2099
2101 2100 def decompress(self, data):
2102 2101 """Decompress a revlog chunk.
2103 2102
2104 2103 The chunk is expected to begin with a header identifying the
2105 2104 format type so it can be routed to an appropriate decompressor.
2106 2105 """
2107 2106 if not data:
2108 2107 return data
2109 2108
2110 2109 # Revlogs are read much more frequently than they are written and many
2111 2110 # chunks only take microseconds to decompress, so performance is
2112 2111 # important here.
2113 2112 #
2114 2113 # We can make a few assumptions about revlogs:
2115 2114 #
2116 2115 # 1) the majority of chunks will be compressed (as opposed to inline
2117 2116 # raw data).
2118 2117 # 2) decompressing *any* data will likely by at least 10x slower than
2119 2118 # returning raw inline data.
2120 2119 # 3) we want to prioritize common and officially supported compression
2121 2120 # engines
2122 2121 #
2123 2122 # It follows that we want to optimize for "decompress compressed data
2124 2123 # when encoded with common and officially supported compression engines"
2125 2124 # case over "raw data" and "data encoded by less common or non-official
2126 2125 # compression engines." That is why we have the inline lookup first
2127 2126 # followed by the compengines lookup.
2128 2127 #
2129 2128 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2130 2129 # compressed chunks. And this matters for changelog and manifest reads.
2131 2130 t = data[0:1]
2132 2131
2133 2132 if t == b'x':
2134 2133 try:
2135 2134 return _zlibdecompress(data)
2136 2135 except zlib.error as e:
2137 2136 raise error.RevlogError(
2138 2137 _(b'revlog decompress error: %s')
2139 2138 % stringutil.forcebytestr(e)
2140 2139 )
2141 2140 # '\0' is more common than 'u' so it goes first.
2142 2141 elif t == b'\0':
2143 2142 return data
2144 2143 elif t == b'u':
2145 2144 return util.buffer(data, 1)
2146 2145
2147 2146 try:
2148 2147 compressor = self._decompressors[t]
2149 2148 except KeyError:
2150 2149 try:
2151 2150 engine = util.compengines.forrevlogheader(t)
2152 2151 compressor = engine.revlogcompressor(self._compengineopts)
2153 2152 self._decompressors[t] = compressor
2154 2153 except KeyError:
2155 2154 raise error.RevlogError(
2156 2155 _(b'unknown compression type %s') % binascii.hexlify(t)
2157 2156 )
2158 2157
2159 2158 return compressor.decompress(data)
2160 2159
2161 2160 def _addrevision(
2162 2161 self,
2163 2162 node,
2164 2163 rawtext,
2165 2164 transaction,
2166 2165 link,
2167 2166 p1,
2168 2167 p2,
2169 2168 flags,
2170 2169 cachedelta,
2171 2170 ifh,
2172 2171 dfh,
2173 2172 alwayscache=False,
2174 2173 deltacomputer=None,
2175 2174 sidedata=None,
2176 2175 ):
2177 2176 """internal function to add revisions to the log
2178 2177
2179 2178 see addrevision for argument descriptions.
2180 2179
2181 2180 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2182 2181
2183 2182 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2184 2183 be used.
2185 2184
2186 2185 invariants:
2187 2186 - rawtext is optional (can be None); if not set, cachedelta must be set.
2188 2187 if both are set, they must correspond to each other.
2189 2188 """
2190 2189 if node == self.nullid:
2191 2190 raise error.RevlogError(
2192 2191 _(b"%s: attempt to add null revision") % self.indexfile
2193 2192 )
2194 2193 if (
2195 2194 node == self.nodeconstants.wdirid
2196 2195 or node in self.nodeconstants.wdirfilenodeids
2197 2196 ):
2198 2197 raise error.RevlogError(
2199 2198 _(b"%s: attempt to add wdir revision") % self.indexfile
2200 2199 )
2201 2200
2202 2201 if self._inline:
2203 2202 fh = ifh
2204 2203 else:
2205 2204 fh = dfh
2206 2205
2207 2206 btext = [rawtext]
2208 2207
2209 2208 curr = len(self)
2210 2209 prev = curr - 1
2211 2210
2212 2211 offset = self._get_data_offset(prev)
2213 2212
2214 2213 if self._concurrencychecker:
2215 2214 if self._inline:
2216 2215 # offset is "as if" it were in the .d file, so we need to add on
2217 2216 # the size of the entry metadata.
2218 2217 self._concurrencychecker(
2219 2218 ifh, self.indexfile, offset + curr * self.index.entry_size
2220 2219 )
2221 2220 else:
2222 2221 # Entries in the .i are a consistent size.
2223 2222 self._concurrencychecker(
2224 2223 ifh, self.indexfile, curr * self.index.entry_size
2225 2224 )
2226 2225 self._concurrencychecker(dfh, self.datafile, offset)
2227 2226
2228 2227 p1r, p2r = self.rev(p1), self.rev(p2)
2229 2228
2230 2229 # full versions are inserted when the needed deltas
2231 2230 # become comparable to the uncompressed text
2232 2231 if rawtext is None:
2233 2232 # need rawtext size, before changed by flag processors, which is
2234 2233 # the non-raw size. use revlog explicitly to avoid filelog's extra
2235 2234 # logic that might remove metadata size.
2236 2235 textlen = mdiff.patchedsize(
2237 2236 revlog.size(self, cachedelta[0]), cachedelta[1]
2238 2237 )
2239 2238 else:
2240 2239 textlen = len(rawtext)
2241 2240
2242 2241 if deltacomputer is None:
2243 2242 deltacomputer = deltautil.deltacomputer(self)
2244 2243
2245 2244 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2246 2245
2247 2246 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2248 2247
2249 2248 if sidedata and self.hassidedata:
2250 2249 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2251 2250 sidedata_offset = offset + deltainfo.deltalen
2252 2251 else:
2253 2252 serialized_sidedata = b""
2254 2253 # Don't store the offset if the sidedata is empty, that way
2255 2254 # we can easily detect empty sidedata and they will be no different
2256 2255 # than ones we manually add.
2257 2256 sidedata_offset = 0
2258 2257
2259 2258 e = (
2260 2259 offset_type(offset, flags),
2261 2260 deltainfo.deltalen,
2262 2261 textlen,
2263 2262 deltainfo.base,
2264 2263 link,
2265 2264 p1r,
2266 2265 p2r,
2267 2266 node,
2268 2267 sidedata_offset,
2269 2268 len(serialized_sidedata),
2270 2269 )
2271 2270
2272 if self.version & 0xFFFF != REVLOGV2:
2271 if self._format_version != REVLOGV2:
2273 2272 e = e[:8]
2274 2273
2275 2274 self.index.append(e)
2276 2275 entry = self.index.entry_binary(curr)
2277 2276 if curr == 0:
2278 header = self.index.pack_header(self.version)
2277 header = self._format_flags | self._format_version
2278 header = self.index.pack_header(header)
2279 2279 entry = header + entry
2280 2280 self._writeentry(
2281 2281 transaction,
2282 2282 ifh,
2283 2283 dfh,
2284 2284 entry,
2285 2285 deltainfo.data,
2286 2286 link,
2287 2287 offset,
2288 2288 serialized_sidedata,
2289 2289 )
2290 2290
2291 2291 rawtext = btext[0]
2292 2292
2293 2293 if alwayscache and rawtext is None:
2294 2294 rawtext = deltacomputer.buildtext(revinfo, fh)
2295 2295
2296 2296 if type(rawtext) == bytes: # only accept immutable objects
2297 2297 self._revisioncache = (node, curr, rawtext)
2298 2298 self._chainbasecache[curr] = deltainfo.chainbase
2299 2299 return curr
2300 2300
2301 2301 def _get_data_offset(self, prev):
2302 2302 """Returns the current offset in the (in-transaction) data file.
2303 2303 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2304 2304 file to store that information: since sidedata can be rewritten to the
2305 2305 end of the data file within a transaction, you can have cases where, for
2306 2306 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2307 2307 to `n - 1`'s sidedata being written after `n`'s data.
2308 2308
2309 2309 TODO cache this in a docket file before getting out of experimental."""
2310 if self.version & 0xFFFF != REVLOGV2:
2310 if self._format_version != REVLOGV2:
2311 2311 return self.end(prev)
2312 2312
2313 2313 offset = 0
2314 2314 for rev, entry in enumerate(self.index):
2315 2315 sidedata_end = entry[8] + entry[9]
2316 2316 # Sidedata for a previous rev has potentially been written after
2317 2317 # this rev's end, so take the max.
2318 2318 offset = max(self.end(rev), offset, sidedata_end)
2319 2319 return offset
2320 2320
2321 2321 def _writeentry(
2322 2322 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2323 2323 ):
2324 2324 # Files opened in a+ mode have inconsistent behavior on various
2325 2325 # platforms. Windows requires that a file positioning call be made
2326 2326 # when the file handle transitions between reads and writes. See
2327 2327 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2328 2328 # platforms, Python or the platform itself can be buggy. Some versions
2329 2329 # of Solaris have been observed to not append at the end of the file
2330 2330 # if the file was seeked to before the end. See issue4943 for more.
2331 2331 #
2332 2332 # We work around this issue by inserting a seek() before writing.
2333 2333 # Note: This is likely not necessary on Python 3. However, because
2334 2334 # the file handle is reused for reads and may be seeked there, we need
2335 2335 # to be careful before changing this.
2336 2336 ifh.seek(0, os.SEEK_END)
2337 2337 if dfh:
2338 2338 dfh.seek(0, os.SEEK_END)
2339 2339
2340 2340 curr = len(self) - 1
2341 2341 if not self._inline:
2342 2342 transaction.add(self.datafile, offset)
2343 2343 transaction.add(self.indexfile, curr * len(entry))
2344 2344 if data[0]:
2345 2345 dfh.write(data[0])
2346 2346 dfh.write(data[1])
2347 2347 if sidedata:
2348 2348 dfh.write(sidedata)
2349 2349 ifh.write(entry)
2350 2350 else:
2351 2351 offset += curr * self.index.entry_size
2352 2352 transaction.add(self.indexfile, offset)
2353 2353 ifh.write(entry)
2354 2354 ifh.write(data[0])
2355 2355 ifh.write(data[1])
2356 2356 if sidedata:
2357 2357 ifh.write(sidedata)
2358 2358 self._enforceinlinesize(transaction, ifh)
2359 2359 nodemaputil.setup_persistent_nodemap(transaction, self)
2360 2360
2361 2361 def addgroup(
2362 2362 self,
2363 2363 deltas,
2364 2364 linkmapper,
2365 2365 transaction,
2366 2366 alwayscache=False,
2367 2367 addrevisioncb=None,
2368 2368 duplicaterevisioncb=None,
2369 2369 ):
2370 2370 """
2371 2371 add a delta group
2372 2372
2373 2373 given a set of deltas, add them to the revision log. the
2374 2374 first delta is against its parent, which should be in our
2375 2375 log, the rest are against the previous delta.
2376 2376
2377 2377 If ``addrevisioncb`` is defined, it will be called with arguments of
2378 2378 this revlog and the node that was added.
2379 2379 """
2380 2380
2381 2381 if self._writinghandles:
2382 2382 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2383 2383
2384 2384 r = len(self)
2385 2385 end = 0
2386 2386 if r:
2387 2387 end = self.end(r - 1)
2388 2388 ifh = self._indexfp(b"a+")
2389 2389 isize = r * self.index.entry_size
2390 2390 if self._inline:
2391 2391 transaction.add(self.indexfile, end + isize)
2392 2392 dfh = None
2393 2393 else:
2394 2394 transaction.add(self.indexfile, isize)
2395 2395 transaction.add(self.datafile, end)
2396 2396 dfh = self._datafp(b"a+")
2397 2397
2398 2398 def flush():
2399 2399 if dfh:
2400 2400 dfh.flush()
2401 2401 ifh.flush()
2402 2402
2403 2403 self._writinghandles = (ifh, dfh)
2404 2404 empty = True
2405 2405
2406 2406 try:
2407 2407 deltacomputer = deltautil.deltacomputer(self)
2408 2408 # loop through our set of deltas
2409 2409 for data in deltas:
2410 2410 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2411 2411 link = linkmapper(linknode)
2412 2412 flags = flags or REVIDX_DEFAULT_FLAGS
2413 2413
2414 2414 rev = self.index.get_rev(node)
2415 2415 if rev is not None:
2416 2416 # this can happen if two branches make the same change
2417 2417 self._nodeduplicatecallback(transaction, rev)
2418 2418 if duplicaterevisioncb:
2419 2419 duplicaterevisioncb(self, rev)
2420 2420 empty = False
2421 2421 continue
2422 2422
2423 2423 for p in (p1, p2):
2424 2424 if not self.index.has_node(p):
2425 2425 raise error.LookupError(
2426 2426 p, self.indexfile, _(b'unknown parent')
2427 2427 )
2428 2428
2429 2429 if not self.index.has_node(deltabase):
2430 2430 raise error.LookupError(
2431 2431 deltabase, self.indexfile, _(b'unknown delta base')
2432 2432 )
2433 2433
2434 2434 baserev = self.rev(deltabase)
2435 2435
2436 2436 if baserev != nullrev and self.iscensored(baserev):
2437 2437 # if base is censored, delta must be full replacement in a
2438 2438 # single patch operation
2439 2439 hlen = struct.calcsize(b">lll")
2440 2440 oldlen = self.rawsize(baserev)
2441 2441 newlen = len(delta) - hlen
2442 2442 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2443 2443 raise error.CensoredBaseError(
2444 2444 self.indexfile, self.node(baserev)
2445 2445 )
2446 2446
2447 2447 if not flags and self._peek_iscensored(baserev, delta, flush):
2448 2448 flags |= REVIDX_ISCENSORED
2449 2449
2450 2450 # We assume consumers of addrevisioncb will want to retrieve
2451 2451 # the added revision, which will require a call to
2452 2452 # revision(). revision() will fast path if there is a cache
2453 2453 # hit. So, we tell _addrevision() to always cache in this case.
2454 2454 # We're only using addgroup() in the context of changegroup
2455 2455 # generation so the revision data can always be handled as raw
2456 2456 # by the flagprocessor.
2457 2457 rev = self._addrevision(
2458 2458 node,
2459 2459 None,
2460 2460 transaction,
2461 2461 link,
2462 2462 p1,
2463 2463 p2,
2464 2464 flags,
2465 2465 (baserev, delta),
2466 2466 ifh,
2467 2467 dfh,
2468 2468 alwayscache=alwayscache,
2469 2469 deltacomputer=deltacomputer,
2470 2470 sidedata=sidedata,
2471 2471 )
2472 2472
2473 2473 if addrevisioncb:
2474 2474 addrevisioncb(self, rev)
2475 2475 empty = False
2476 2476
2477 2477 if not dfh and not self._inline:
2478 2478 # addrevision switched from inline to conventional
2479 2479 # reopen the index
2480 2480 ifh.close()
2481 2481 dfh = self._datafp(b"a+")
2482 2482 ifh = self._indexfp(b"a+")
2483 2483 self._writinghandles = (ifh, dfh)
2484 2484 finally:
2485 2485 self._writinghandles = None
2486 2486
2487 2487 if dfh:
2488 2488 dfh.close()
2489 2489 ifh.close()
2490 2490 return not empty
2491 2491
2492 2492 def iscensored(self, rev):
2493 2493 """Check if a file revision is censored."""
2494 2494 if not self._censorable:
2495 2495 return False
2496 2496
2497 2497 return self.flags(rev) & REVIDX_ISCENSORED
2498 2498
2499 2499 def _peek_iscensored(self, baserev, delta, flush):
2500 2500 """Quickly check if a delta produces a censored revision."""
2501 2501 if not self._censorable:
2502 2502 return False
2503 2503
2504 2504 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2505 2505
2506 2506 def getstrippoint(self, minlink):
2507 2507 """find the minimum rev that must be stripped to strip the linkrev
2508 2508
2509 2509 Returns a tuple containing the minimum rev and a set of all revs that
2510 2510 have linkrevs that will be broken by this strip.
2511 2511 """
2512 2512 return storageutil.resolvestripinfo(
2513 2513 minlink,
2514 2514 len(self) - 1,
2515 2515 self.headrevs(),
2516 2516 self.linkrev,
2517 2517 self.parentrevs,
2518 2518 )
2519 2519
2520 2520 def strip(self, minlink, transaction):
2521 2521 """truncate the revlog on the first revision with a linkrev >= minlink
2522 2522
2523 2523 This function is called when we're stripping revision minlink and
2524 2524 its descendants from the repository.
2525 2525
2526 2526 We have to remove all revisions with linkrev >= minlink, because
2527 2527 the equivalent changelog revisions will be renumbered after the
2528 2528 strip.
2529 2529
2530 2530 So we truncate the revlog on the first of these revisions, and
2531 2531 trust that the caller has saved the revisions that shouldn't be
2532 2532 removed and that it'll re-add them after this truncation.
2533 2533 """
2534 2534 if len(self) == 0:
2535 2535 return
2536 2536
2537 2537 rev, _ = self.getstrippoint(minlink)
2538 2538 if rev == len(self):
2539 2539 return
2540 2540
2541 2541 # first truncate the files on disk
2542 2542 end = self.start(rev)
2543 2543 if not self._inline:
2544 2544 transaction.add(self.datafile, end)
2545 2545 end = rev * self.index.entry_size
2546 2546 else:
2547 2547 end += rev * self.index.entry_size
2548 2548
2549 2549 transaction.add(self.indexfile, end)
2550 2550
2551 2551 # then reset internal state in memory to forget those revisions
2552 2552 self._revisioncache = None
2553 2553 self._chaininfocache = util.lrucachedict(500)
2554 2554 self._chunkclear()
2555 2555
2556 2556 del self.index[rev:-1]
2557 2557
2558 2558 def checksize(self):
2559 2559 """Check size of index and data files
2560 2560
2561 2561 return a (dd, di) tuple.
2562 2562 - dd: extra bytes for the "data" file
2563 2563 - di: extra bytes for the "index" file
2564 2564
2565 2565 A healthy revlog will return (0, 0).
2566 2566 """
2567 2567 expected = 0
2568 2568 if len(self):
2569 2569 expected = max(0, self.end(len(self) - 1))
2570 2570
2571 2571 try:
2572 2572 with self._datafp() as f:
2573 2573 f.seek(0, io.SEEK_END)
2574 2574 actual = f.tell()
2575 2575 dd = actual - expected
2576 2576 except IOError as inst:
2577 2577 if inst.errno != errno.ENOENT:
2578 2578 raise
2579 2579 dd = 0
2580 2580
2581 2581 try:
2582 2582 f = self.opener(self.indexfile)
2583 2583 f.seek(0, io.SEEK_END)
2584 2584 actual = f.tell()
2585 2585 f.close()
2586 2586 s = self.index.entry_size
2587 2587 i = max(0, actual // s)
2588 2588 di = actual - (i * s)
2589 2589 if self._inline:
2590 2590 databytes = 0
2591 2591 for r in self:
2592 2592 databytes += max(0, self.length(r))
2593 2593 dd = 0
2594 2594 di = actual - len(self) * s - databytes
2595 2595 except IOError as inst:
2596 2596 if inst.errno != errno.ENOENT:
2597 2597 raise
2598 2598 di = 0
2599 2599
2600 2600 return (dd, di)
2601 2601
2602 2602 def files(self):
2603 2603 res = [self.indexfile]
2604 2604 if not self._inline:
2605 2605 res.append(self.datafile)
2606 2606 return res
2607 2607
2608 2608 def emitrevisions(
2609 2609 self,
2610 2610 nodes,
2611 2611 nodesorder=None,
2612 2612 revisiondata=False,
2613 2613 assumehaveparentrevisions=False,
2614 2614 deltamode=repository.CG_DELTAMODE_STD,
2615 2615 sidedata_helpers=None,
2616 2616 ):
2617 2617 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2618 2618 raise error.ProgrammingError(
2619 2619 b'unhandled value for nodesorder: %s' % nodesorder
2620 2620 )
2621 2621
2622 2622 if nodesorder is None and not self._generaldelta:
2623 2623 nodesorder = b'storage'
2624 2624
2625 2625 if (
2626 2626 not self._storedeltachains
2627 2627 and deltamode != repository.CG_DELTAMODE_PREV
2628 2628 ):
2629 2629 deltamode = repository.CG_DELTAMODE_FULL
2630 2630
2631 2631 return storageutil.emitrevisions(
2632 2632 self,
2633 2633 nodes,
2634 2634 nodesorder,
2635 2635 revlogrevisiondelta,
2636 2636 deltaparentfn=self.deltaparent,
2637 2637 candeltafn=self.candelta,
2638 2638 rawsizefn=self.rawsize,
2639 2639 revdifffn=self.revdiff,
2640 2640 flagsfn=self.flags,
2641 2641 deltamode=deltamode,
2642 2642 revisiondata=revisiondata,
2643 2643 assumehaveparentrevisions=assumehaveparentrevisions,
2644 2644 sidedata_helpers=sidedata_helpers,
2645 2645 )
2646 2646
2647 2647 DELTAREUSEALWAYS = b'always'
2648 2648 DELTAREUSESAMEREVS = b'samerevs'
2649 2649 DELTAREUSENEVER = b'never'
2650 2650
2651 2651 DELTAREUSEFULLADD = b'fulladd'
2652 2652
2653 2653 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2654 2654
2655 2655 def clone(
2656 2656 self,
2657 2657 tr,
2658 2658 destrevlog,
2659 2659 addrevisioncb=None,
2660 2660 deltareuse=DELTAREUSESAMEREVS,
2661 2661 forcedeltabothparents=None,
2662 2662 sidedata_helpers=None,
2663 2663 ):
2664 2664 """Copy this revlog to another, possibly with format changes.
2665 2665
2666 2666 The destination revlog will contain the same revisions and nodes.
2667 2667 However, it may not be bit-for-bit identical due to e.g. delta encoding
2668 2668 differences.
2669 2669
2670 2670 The ``deltareuse`` argument control how deltas from the existing revlog
2671 2671 are preserved in the destination revlog. The argument can have the
2672 2672 following values:
2673 2673
2674 2674 DELTAREUSEALWAYS
2675 2675 Deltas will always be reused (if possible), even if the destination
2676 2676 revlog would not select the same revisions for the delta. This is the
2677 2677 fastest mode of operation.
2678 2678 DELTAREUSESAMEREVS
2679 2679 Deltas will be reused if the destination revlog would pick the same
2680 2680 revisions for the delta. This mode strikes a balance between speed
2681 2681 and optimization.
2682 2682 DELTAREUSENEVER
2683 2683 Deltas will never be reused. This is the slowest mode of execution.
2684 2684 This mode can be used to recompute deltas (e.g. if the diff/delta
2685 2685 algorithm changes).
2686 2686 DELTAREUSEFULLADD
2687 2687 Revision will be re-added as if their were new content. This is
2688 2688 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2689 2689 eg: large file detection and handling.
2690 2690
2691 2691 Delta computation can be slow, so the choice of delta reuse policy can
2692 2692 significantly affect run time.
2693 2693
2694 2694 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2695 2695 two extremes. Deltas will be reused if they are appropriate. But if the
2696 2696 delta could choose a better revision, it will do so. This means if you
2697 2697 are converting a non-generaldelta revlog to a generaldelta revlog,
2698 2698 deltas will be recomputed if the delta's parent isn't a parent of the
2699 2699 revision.
2700 2700
2701 2701 In addition to the delta policy, the ``forcedeltabothparents``
2702 2702 argument controls whether to force compute deltas against both parents
2703 2703 for merges. By default, the current default is used.
2704 2704
2705 2705 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2706 2706 `sidedata_helpers`.
2707 2707 """
2708 2708 if deltareuse not in self.DELTAREUSEALL:
2709 2709 raise ValueError(
2710 2710 _(b'value for deltareuse invalid: %s') % deltareuse
2711 2711 )
2712 2712
2713 2713 if len(destrevlog):
2714 2714 raise ValueError(_(b'destination revlog is not empty'))
2715 2715
2716 2716 if getattr(self, 'filteredrevs', None):
2717 2717 raise ValueError(_(b'source revlog has filtered revisions'))
2718 2718 if getattr(destrevlog, 'filteredrevs', None):
2719 2719 raise ValueError(_(b'destination revlog has filtered revisions'))
2720 2720
2721 2721 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2722 2722 # if possible.
2723 2723 oldlazydelta = destrevlog._lazydelta
2724 2724 oldlazydeltabase = destrevlog._lazydeltabase
2725 2725 oldamd = destrevlog._deltabothparents
2726 2726
2727 2727 try:
2728 2728 if deltareuse == self.DELTAREUSEALWAYS:
2729 2729 destrevlog._lazydeltabase = True
2730 2730 destrevlog._lazydelta = True
2731 2731 elif deltareuse == self.DELTAREUSESAMEREVS:
2732 2732 destrevlog._lazydeltabase = False
2733 2733 destrevlog._lazydelta = True
2734 2734 elif deltareuse == self.DELTAREUSENEVER:
2735 2735 destrevlog._lazydeltabase = False
2736 2736 destrevlog._lazydelta = False
2737 2737
2738 2738 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2739 2739
2740 2740 self._clone(
2741 2741 tr,
2742 2742 destrevlog,
2743 2743 addrevisioncb,
2744 2744 deltareuse,
2745 2745 forcedeltabothparents,
2746 2746 sidedata_helpers,
2747 2747 )
2748 2748
2749 2749 finally:
2750 2750 destrevlog._lazydelta = oldlazydelta
2751 2751 destrevlog._lazydeltabase = oldlazydeltabase
2752 2752 destrevlog._deltabothparents = oldamd
2753 2753
2754 2754 def _clone(
2755 2755 self,
2756 2756 tr,
2757 2757 destrevlog,
2758 2758 addrevisioncb,
2759 2759 deltareuse,
2760 2760 forcedeltabothparents,
2761 2761 sidedata_helpers,
2762 2762 ):
2763 2763 """perform the core duty of `revlog.clone` after parameter processing"""
2764 2764 deltacomputer = deltautil.deltacomputer(destrevlog)
2765 2765 index = self.index
2766 2766 for rev in self:
2767 2767 entry = index[rev]
2768 2768
2769 2769 # Some classes override linkrev to take filtered revs into
2770 2770 # account. Use raw entry from index.
2771 2771 flags = entry[0] & 0xFFFF
2772 2772 linkrev = entry[4]
2773 2773 p1 = index[entry[5]][7]
2774 2774 p2 = index[entry[6]][7]
2775 2775 node = entry[7]
2776 2776
2777 2777 # (Possibly) reuse the delta from the revlog if allowed and
2778 2778 # the revlog chunk is a delta.
2779 2779 cachedelta = None
2780 2780 rawtext = None
2781 2781 if deltareuse == self.DELTAREUSEFULLADD:
2782 2782 text, sidedata = self._revisiondata(rev)
2783 2783
2784 2784 if sidedata_helpers is not None:
2785 2785 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2786 2786 self, sidedata_helpers, sidedata, rev
2787 2787 )
2788 2788 flags = flags | new_flags[0] & ~new_flags[1]
2789 2789
2790 2790 destrevlog.addrevision(
2791 2791 text,
2792 2792 tr,
2793 2793 linkrev,
2794 2794 p1,
2795 2795 p2,
2796 2796 cachedelta=cachedelta,
2797 2797 node=node,
2798 2798 flags=flags,
2799 2799 deltacomputer=deltacomputer,
2800 2800 sidedata=sidedata,
2801 2801 )
2802 2802 else:
2803 2803 if destrevlog._lazydelta:
2804 2804 dp = self.deltaparent(rev)
2805 2805 if dp != nullrev:
2806 2806 cachedelta = (dp, bytes(self._chunk(rev)))
2807 2807
2808 2808 sidedata = None
2809 2809 if not cachedelta:
2810 2810 rawtext, sidedata = self._revisiondata(rev)
2811 2811 if sidedata is None:
2812 2812 sidedata = self.sidedata(rev)
2813 2813
2814 2814 if sidedata_helpers is not None:
2815 2815 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2816 2816 self, sidedata_helpers, sidedata, rev
2817 2817 )
2818 2818 flags = flags | new_flags[0] & ~new_flags[1]
2819 2819
2820 2820 ifh = destrevlog.opener(
2821 2821 destrevlog.indexfile, b'a+', checkambig=False
2822 2822 )
2823 2823 dfh = None
2824 2824 if not destrevlog._inline:
2825 2825 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2826 2826 try:
2827 2827 destrevlog._addrevision(
2828 2828 node,
2829 2829 rawtext,
2830 2830 tr,
2831 2831 linkrev,
2832 2832 p1,
2833 2833 p2,
2834 2834 flags,
2835 2835 cachedelta,
2836 2836 ifh,
2837 2837 dfh,
2838 2838 deltacomputer=deltacomputer,
2839 2839 sidedata=sidedata,
2840 2840 )
2841 2841 finally:
2842 2842 if dfh:
2843 2843 dfh.close()
2844 2844 ifh.close()
2845 2845
2846 2846 if addrevisioncb:
2847 2847 addrevisioncb(self, rev, node)
2848 2848
2849 2849 def censorrevision(self, tr, censornode, tombstone=b''):
2850 if (self.version & 0xFFFF) == REVLOGV0:
2850 if self._format_version == REVLOGV0:
2851 2851 raise error.RevlogError(
2852 _(b'cannot censor with version %d revlogs') % self.version
2852 _(b'cannot censor with version %d revlogs')
2853 % self._format_version
2853 2854 )
2854 2855
2855 2856 censorrev = self.rev(censornode)
2856 2857 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2857 2858
2858 2859 if len(tombstone) > self.rawsize(censorrev):
2859 2860 raise error.Abort(
2860 2861 _(b'censor tombstone must be no longer than censored data')
2861 2862 )
2862 2863
2863 2864 # Rewriting the revlog in place is hard. Our strategy for censoring is
2864 2865 # to create a new revlog, copy all revisions to it, then replace the
2865 2866 # revlogs on transaction close.
2866 2867
2867 2868 newindexfile = self.indexfile + b'.tmpcensored'
2868 2869 newdatafile = self.datafile + b'.tmpcensored'
2869 2870
2870 2871 # This is a bit dangerous. We could easily have a mismatch of state.
2871 2872 newrl = revlog(
2872 2873 self.opener,
2873 2874 target=self.target,
2874 2875 indexfile=newindexfile,
2875 2876 datafile=newdatafile,
2876 2877 censorable=True,
2877 2878 )
2878 newrl.version = self.version
2879 newrl._format_version = self._format_version
2880 newrl._format_flags = self._format_flags
2879 2881 newrl._generaldelta = self._generaldelta
2880 2882 newrl._parse_index = self._parse_index
2881 2883
2882 2884 for rev in self.revs():
2883 2885 node = self.node(rev)
2884 2886 p1, p2 = self.parents(node)
2885 2887
2886 2888 if rev == censorrev:
2887 2889 newrl.addrawrevision(
2888 2890 tombstone,
2889 2891 tr,
2890 2892 self.linkrev(censorrev),
2891 2893 p1,
2892 2894 p2,
2893 2895 censornode,
2894 2896 REVIDX_ISCENSORED,
2895 2897 )
2896 2898
2897 2899 if newrl.deltaparent(rev) != nullrev:
2898 2900 raise error.Abort(
2899 2901 _(
2900 2902 b'censored revision stored as delta; '
2901 2903 b'cannot censor'
2902 2904 ),
2903 2905 hint=_(
2904 2906 b'censoring of revlogs is not '
2905 2907 b'fully implemented; please report '
2906 2908 b'this bug'
2907 2909 ),
2908 2910 )
2909 2911 continue
2910 2912
2911 2913 if self.iscensored(rev):
2912 2914 if self.deltaparent(rev) != nullrev:
2913 2915 raise error.Abort(
2914 2916 _(
2915 2917 b'cannot censor due to censored '
2916 2918 b'revision having delta stored'
2917 2919 )
2918 2920 )
2919 2921 rawtext = self._chunk(rev)
2920 2922 else:
2921 2923 rawtext = self.rawdata(rev)
2922 2924
2923 2925 newrl.addrawrevision(
2924 2926 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2925 2927 )
2926 2928
2927 2929 tr.addbackup(self.indexfile, location=b'store')
2928 2930 if not self._inline:
2929 2931 tr.addbackup(self.datafile, location=b'store')
2930 2932
2931 2933 self.opener.rename(newrl.indexfile, self.indexfile)
2932 2934 if not self._inline:
2933 2935 self.opener.rename(newrl.datafile, self.datafile)
2934 2936
2935 2937 self.clearcaches()
2936 2938 self._loadindex()
2937 2939
2938 2940 def verifyintegrity(self, state):
2939 2941 """Verifies the integrity of the revlog.
2940 2942
2941 2943 Yields ``revlogproblem`` instances describing problems that are
2942 2944 found.
2943 2945 """
2944 2946 dd, di = self.checksize()
2945 2947 if dd:
2946 2948 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2947 2949 if di:
2948 2950 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2949 2951
2950 version = self.version & 0xFFFF
2952 version = self._format_version
2951 2953
2952 2954 # The verifier tells us what version revlog we should be.
2953 2955 if version != state[b'expectedversion']:
2954 2956 yield revlogproblem(
2955 2957 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2956 2958 % (self.indexfile, version, state[b'expectedversion'])
2957 2959 )
2958 2960
2959 2961 state[b'skipread'] = set()
2960 2962 state[b'safe_renamed'] = set()
2961 2963
2962 2964 for rev in self:
2963 2965 node = self.node(rev)
2964 2966
2965 2967 # Verify contents. 4 cases to care about:
2966 2968 #
2967 2969 # common: the most common case
2968 2970 # rename: with a rename
2969 2971 # meta: file content starts with b'\1\n', the metadata
2970 2972 # header defined in filelog.py, but without a rename
2971 2973 # ext: content stored externally
2972 2974 #
2973 2975 # More formally, their differences are shown below:
2974 2976 #
2975 2977 # | common | rename | meta | ext
2976 2978 # -------------------------------------------------------
2977 2979 # flags() | 0 | 0 | 0 | not 0
2978 2980 # renamed() | False | True | False | ?
2979 2981 # rawtext[0:2]=='\1\n'| False | True | True | ?
2980 2982 #
2981 2983 # "rawtext" means the raw text stored in revlog data, which
2982 2984 # could be retrieved by "rawdata(rev)". "text"
2983 2985 # mentioned below is "revision(rev)".
2984 2986 #
2985 2987 # There are 3 different lengths stored physically:
2986 2988 # 1. L1: rawsize, stored in revlog index
2987 2989 # 2. L2: len(rawtext), stored in revlog data
2988 2990 # 3. L3: len(text), stored in revlog data if flags==0, or
2989 2991 # possibly somewhere else if flags!=0
2990 2992 #
2991 2993 # L1 should be equal to L2. L3 could be different from them.
2992 2994 # "text" may or may not affect commit hash depending on flag
2993 2995 # processors (see flagutil.addflagprocessor).
2994 2996 #
2995 2997 # | common | rename | meta | ext
2996 2998 # -------------------------------------------------
2997 2999 # rawsize() | L1 | L1 | L1 | L1
2998 3000 # size() | L1 | L2-LM | L1(*) | L1 (?)
2999 3001 # len(rawtext) | L2 | L2 | L2 | L2
3000 3002 # len(text) | L2 | L2 | L2 | L3
3001 3003 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3002 3004 #
3003 3005 # LM: length of metadata, depending on rawtext
3004 3006 # (*): not ideal, see comment in filelog.size
3005 3007 # (?): could be "- len(meta)" if the resolved content has
3006 3008 # rename metadata
3007 3009 #
3008 3010 # Checks needed to be done:
3009 3011 # 1. length check: L1 == L2, in all cases.
3010 3012 # 2. hash check: depending on flag processor, we may need to
3011 3013 # use either "text" (external), or "rawtext" (in revlog).
3012 3014
3013 3015 try:
3014 3016 skipflags = state.get(b'skipflags', 0)
3015 3017 if skipflags:
3016 3018 skipflags &= self.flags(rev)
3017 3019
3018 3020 _verify_revision(self, skipflags, state, node)
3019 3021
3020 3022 l1 = self.rawsize(rev)
3021 3023 l2 = len(self.rawdata(node))
3022 3024
3023 3025 if l1 != l2:
3024 3026 yield revlogproblem(
3025 3027 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3026 3028 node=node,
3027 3029 )
3028 3030
3029 3031 except error.CensoredNodeError:
3030 3032 if state[b'erroroncensored']:
3031 3033 yield revlogproblem(
3032 3034 error=_(b'censored file data'), node=node
3033 3035 )
3034 3036 state[b'skipread'].add(node)
3035 3037 except Exception as e:
3036 3038 yield revlogproblem(
3037 3039 error=_(b'unpacking %s: %s')
3038 3040 % (short(node), stringutil.forcebytestr(e)),
3039 3041 node=node,
3040 3042 )
3041 3043 state[b'skipread'].add(node)
3042 3044
3043 3045 def storageinfo(
3044 3046 self,
3045 3047 exclusivefiles=False,
3046 3048 sharedfiles=False,
3047 3049 revisionscount=False,
3048 3050 trackedsize=False,
3049 3051 storedsize=False,
3050 3052 ):
3051 3053 d = {}
3052 3054
3053 3055 if exclusivefiles:
3054 3056 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3055 3057 if not self._inline:
3056 3058 d[b'exclusivefiles'].append((self.opener, self.datafile))
3057 3059
3058 3060 if sharedfiles:
3059 3061 d[b'sharedfiles'] = []
3060 3062
3061 3063 if revisionscount:
3062 3064 d[b'revisionscount'] = len(self)
3063 3065
3064 3066 if trackedsize:
3065 3067 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3066 3068
3067 3069 if storedsize:
3068 3070 d[b'storedsize'] = sum(
3069 3071 self.opener.stat(path).st_size for path in self.files()
3070 3072 )
3071 3073
3072 3074 return d
3073 3075
3074 3076 def rewrite_sidedata(self, helpers, startrev, endrev):
3075 3077 if not self.hassidedata:
3076 3078 return
3077 3079 # inline are not yet supported because they suffer from an issue when
3078 3080 # rewriting them (since it's not an append-only operation).
3079 3081 # See issue6485.
3080 3082 assert not self._inline
3081 3083 if not helpers[1] and not helpers[2]:
3082 3084 # Nothing to generate or remove
3083 3085 return
3084 3086
3085 3087 # changelog implement some "delayed" writing mechanism that assume that
3086 3088 # all index data is writen in append mode and is therefor incompatible
3087 3089 # with the seeked write done in this method. The use of such "delayed"
3088 3090 # writing will soon be removed for revlog version that support side
3089 3091 # data, so for now, we only keep this simple assert to highlight the
3090 3092 # situation.
3091 3093 delayed = getattr(self, '_delayed', False)
3092 3094 diverted = getattr(self, '_divert', False)
3093 3095 if delayed and not diverted:
3094 3096 msg = "cannot rewrite_sidedata of a delayed revlog"
3095 3097 raise error.ProgrammingError(msg)
3096 3098
3097 3099 new_entries = []
3098 3100 # append the new sidedata
3099 3101 with self._datafp(b'a+') as fp:
3100 3102 # Maybe this bug still exists, see revlog._writeentry
3101 3103 fp.seek(0, os.SEEK_END)
3102 3104 current_offset = fp.tell()
3103 3105 for rev in range(startrev, endrev + 1):
3104 3106 entry = self.index[rev]
3105 3107 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3106 3108 store=self,
3107 3109 sidedata_helpers=helpers,
3108 3110 sidedata={},
3109 3111 rev=rev,
3110 3112 )
3111 3113
3112 3114 serialized_sidedata = sidedatautil.serialize_sidedata(
3113 3115 new_sidedata
3114 3116 )
3115 3117 if entry[8] != 0 or entry[9] != 0:
3116 3118 # rewriting entries that already have sidedata is not
3117 3119 # supported yet, because it introduces garbage data in the
3118 3120 # revlog.
3119 3121 msg = b"Rewriting existing sidedata is not supported yet"
3120 3122 raise error.Abort(msg)
3121 3123
3122 3124 # Apply (potential) flags to add and to remove after running
3123 3125 # the sidedata helpers
3124 3126 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3125 3127 entry = (new_offset_flags,) + entry[1:8]
3126 3128 entry += (current_offset, len(serialized_sidedata))
3127 3129
3128 3130 fp.write(serialized_sidedata)
3129 3131 new_entries.append(entry)
3130 3132 current_offset += len(serialized_sidedata)
3131 3133
3132 3134 # rewrite the new index entries
3133 3135 with self._indexfp(b'r+') as fp:
3134 3136 fp.seek(startrev * self.index.entry_size)
3135 3137 for i, e in enumerate(new_entries):
3136 3138 rev = startrev + i
3137 3139 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3138 3140 packed = self.index.entry_binary(rev)
3139 3141 if rev == 0:
3140 header = self.index.pack_header(self.version)
3142 header = self._format_flags | self._format_version
3143 header = self.index.pack_header(header)
3141 3144 packed = header + packed
3142 3145 fp.write(packed)
@@ -1,627 +1,627
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import short
14 14 from .utils import stringutil
15 15
16 16 from . import (
17 17 error,
18 18 pycompat,
19 19 revlog,
20 20 util,
21 21 )
22 22
23 23 VERIFY_DEFAULT = 0
24 24 VERIFY_FULL = 1
25 25
26 26
27 27 def verify(repo, level=None):
28 28 with repo.lock():
29 29 v = verifier(repo, level)
30 30 return v.verify()
31 31
32 32
33 33 def _normpath(f):
34 34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 35 # converted repo may contain repeated slashes
36 36 while b'//' in f:
37 37 f = f.replace(b'//', b'/')
38 38 return f
39 39
40 40
41 41 class verifier(object):
42 42 def __init__(self, repo, level=None):
43 43 self.repo = repo.unfiltered()
44 44 self.ui = repo.ui
45 45 self.match = repo.narrowmatch()
46 46 if level is None:
47 47 level = VERIFY_DEFAULT
48 48 self._level = level
49 49 self.badrevs = set()
50 50 self.errors = 0
51 51 self.warnings = 0
52 52 self.havecl = len(repo.changelog) > 0
53 53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 56 self.refersmf = False
57 57 self.fncachewarned = False
58 58 # developer config: verify.skipflags
59 59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 60 self.warnorphanstorefiles = True
61 61
62 62 def _warn(self, msg):
63 63 """record a "warning" level issue"""
64 64 self.ui.warn(msg + b"\n")
65 65 self.warnings += 1
66 66
67 67 def _err(self, linkrev, msg, filename=None):
68 68 """record a "error" level issue"""
69 69 if linkrev is not None:
70 70 self.badrevs.add(linkrev)
71 71 linkrev = b"%d" % linkrev
72 72 else:
73 73 linkrev = b'?'
74 74 msg = b"%s: %s" % (linkrev, msg)
75 75 if filename:
76 76 msg = b"%s@%s" % (filename, msg)
77 77 self.ui.warn(b" " + msg + b"\n")
78 78 self.errors += 1
79 79
80 80 def _exc(self, linkrev, msg, inst, filename=None):
81 81 """record exception raised during the verify process"""
82 82 fmsg = stringutil.forcebytestr(inst)
83 83 if not fmsg:
84 84 fmsg = pycompat.byterepr(inst)
85 85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86 86
87 87 def _checkrevlog(self, obj, name, linkrev):
88 88 """verify high level property of a revlog
89 89
90 90 - revlog is present,
91 91 - revlog is non-empty,
92 92 - sizes (index and data) are correct,
93 93 - revlog's format version is correct.
94 94 """
95 95 if not len(obj) and (self.havecl or self.havemf):
96 96 self._err(linkrev, _(b"empty or missing %s") % name)
97 97 return
98 98
99 99 d = obj.checksize()
100 100 if d[0]:
101 101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 102 if d[1]:
103 103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104 104
105 if obj.version != revlog.REVLOGV0:
105 if obj._format_version != revlog.REVLOGV0:
106 106 if not self.revlogv1:
107 107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 108 elif self.revlogv1:
109 109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110 110
111 111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 112 """verify a single revlog entry
113 113
114 114 arguments are:
115 115 - obj: the source revlog
116 116 - i: the revision number
117 117 - node: the revision node id
118 118 - seen: nodes previously seen for this revlog
119 119 - linkrevs: [changelog-revisions] introducing "node"
120 120 - f: string label ("changelog", "manifest", or filename)
121 121
122 122 Performs the following checks:
123 123 - linkrev points to an existing changelog revision,
124 124 - linkrev points to a changelog revision that introduces this revision,
125 125 - linkrev points to the lowest of these changesets,
126 126 - both parents exist in the revlog,
127 127 - the revision is not duplicated.
128 128
129 129 Return the linkrev of the revision (or None for changelog's revisions).
130 130 """
131 131 lr = obj.linkrev(obj.rev(node))
132 132 if lr < 0 or (self.havecl and lr not in linkrevs):
133 133 if lr < 0 or lr >= len(self.repo.changelog):
134 134 msg = _(b"rev %d points to nonexistent changeset %d")
135 135 else:
136 136 msg = _(b"rev %d points to unexpected changeset %d")
137 137 self._err(None, msg % (i, lr), f)
138 138 if linkrevs:
139 139 if f and len(linkrevs) > 1:
140 140 try:
141 141 # attempt to filter down to real linkrevs
142 142 linkrevs = [
143 143 l
144 144 for l in linkrevs
145 145 if self.lrugetctx(l)[f].filenode() == node
146 146 ]
147 147 except Exception:
148 148 pass
149 149 self._warn(
150 150 _(b" (expected %s)")
151 151 % b" ".join(map(pycompat.bytestr, linkrevs))
152 152 )
153 153 lr = None # can't be trusted
154 154
155 155 try:
156 156 p1, p2 = obj.parents(node)
157 157 if p1 not in seen and p1 != self.repo.nullid:
158 158 self._err(
159 159 lr,
160 160 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
161 161 f,
162 162 )
163 163 if p2 not in seen and p2 != self.repo.nullid:
164 164 self._err(
165 165 lr,
166 166 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
167 167 f,
168 168 )
169 169 except Exception as inst:
170 170 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
171 171
172 172 if node in seen:
173 173 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
174 174 seen[node] = i
175 175 return lr
176 176
177 177 def verify(self):
178 178 """verify the content of the Mercurial repository
179 179
180 180 This method run all verifications, displaying issues as they are found.
181 181
182 182 return 1 if any error have been encountered, 0 otherwise."""
183 183 # initial validation and generic report
184 184 repo = self.repo
185 185 ui = repo.ui
186 186 if not repo.url().startswith(b'file:'):
187 187 raise error.Abort(_(b"cannot verify bundle or remote repos"))
188 188
189 189 if os.path.exists(repo.sjoin(b"journal")):
190 190 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
191 191
192 192 if ui.verbose or not self.revlogv1:
193 193 ui.status(
194 194 _(b"repository uses revlog format %d\n")
195 195 % (self.revlogv1 and 1 or 0)
196 196 )
197 197
198 198 # data verification
199 199 mflinkrevs, filelinkrevs = self._verifychangelog()
200 200 filenodes = self._verifymanifest(mflinkrevs)
201 201 del mflinkrevs
202 202 self._crosscheckfiles(filelinkrevs, filenodes)
203 203 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
204 204
205 205 # final report
206 206 ui.status(
207 207 _(b"checked %d changesets with %d changes to %d files\n")
208 208 % (len(repo.changelog), filerevisions, totalfiles)
209 209 )
210 210 if self.warnings:
211 211 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
212 212 if self.fncachewarned:
213 213 ui.warn(
214 214 _(
215 215 b'hint: run "hg debugrebuildfncache" to recover from '
216 216 b'corrupt fncache\n'
217 217 )
218 218 )
219 219 if self.errors:
220 220 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
221 221 if self.badrevs:
222 222 ui.warn(
223 223 _(b"(first damaged changeset appears to be %d)\n")
224 224 % min(self.badrevs)
225 225 )
226 226 return 1
227 227 return 0
228 228
229 229 def _verifychangelog(self):
230 230 """verify the changelog of a repository
231 231
232 232 The following checks are performed:
233 233 - all of `_checkrevlog` checks,
234 234 - all of `_checkentry` checks (for each revisions),
235 235 - each revision can be read.
236 236
237 237 The function returns some of the data observed in the changesets as a
238 238 (mflinkrevs, filelinkrevs) tuples:
239 239 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
240 240 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
241 241
242 242 If a matcher was specified, filelinkrevs will only contains matched
243 243 files.
244 244 """
245 245 ui = self.ui
246 246 repo = self.repo
247 247 match = self.match
248 248 cl = repo.changelog
249 249
250 250 ui.status(_(b"checking changesets\n"))
251 251 mflinkrevs = {}
252 252 filelinkrevs = {}
253 253 seen = {}
254 254 self._checkrevlog(cl, b"changelog", 0)
255 255 progress = ui.makeprogress(
256 256 _(b'checking'), unit=_(b'changesets'), total=len(repo)
257 257 )
258 258 for i in repo:
259 259 progress.update(i)
260 260 n = cl.node(i)
261 261 self._checkentry(cl, i, n, seen, [i], b"changelog")
262 262
263 263 try:
264 264 changes = cl.read(n)
265 265 if changes[0] != self.repo.nullid:
266 266 mflinkrevs.setdefault(changes[0], []).append(i)
267 267 self.refersmf = True
268 268 for f in changes[3]:
269 269 if match(f):
270 270 filelinkrevs.setdefault(_normpath(f), []).append(i)
271 271 except Exception as inst:
272 272 self.refersmf = True
273 273 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
274 274 progress.complete()
275 275 return mflinkrevs, filelinkrevs
276 276
277 277 def _verifymanifest(
278 278 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
279 279 ):
280 280 """verify the manifestlog content
281 281
282 282 Inputs:
283 283 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
284 284 - dir: a subdirectory to check (for tree manifest repo)
285 285 - storefiles: set of currently "orphan" files.
286 286 - subdirprogress: a progress object
287 287
288 288 This function checks:
289 289 * all of `_checkrevlog` checks (for all manifest related revlogs)
290 290 * all of `_checkentry` checks (for all manifest related revisions)
291 291 * nodes for subdirectory exists in the sub-directory manifest
292 292 * each manifest entries have a file path
293 293 * each manifest node refered in mflinkrevs exist in the manifest log
294 294
295 295 If tree manifest is in use and a matchers is specified, only the
296 296 sub-directories matching it will be verified.
297 297
298 298 return a two level mapping:
299 299 {"path" -> { filenode -> changelog-revision}}
300 300
301 301 This mapping primarily contains entries for every files in the
302 302 repository. In addition, when tree-manifest is used, it also contains
303 303 sub-directory entries.
304 304
305 305 If a matcher is provided, only matching paths will be included.
306 306 """
307 307 repo = self.repo
308 308 ui = self.ui
309 309 match = self.match
310 310 mfl = self.repo.manifestlog
311 311 mf = mfl.getstorage(dir)
312 312
313 313 if not dir:
314 314 self.ui.status(_(b"checking manifests\n"))
315 315
316 316 filenodes = {}
317 317 subdirnodes = {}
318 318 seen = {}
319 319 label = b"manifest"
320 320 if dir:
321 321 label = dir
322 322 revlogfiles = mf.files()
323 323 storefiles.difference_update(revlogfiles)
324 324 if subdirprogress: # should be true since we're in a subdirectory
325 325 subdirprogress.increment()
326 326 if self.refersmf:
327 327 # Do not check manifest if there are only changelog entries with
328 328 # null manifests.
329 329 self._checkrevlog(mf._revlog, label, 0)
330 330 progress = ui.makeprogress(
331 331 _(b'checking'), unit=_(b'manifests'), total=len(mf)
332 332 )
333 333 for i in mf:
334 334 if not dir:
335 335 progress.update(i)
336 336 n = mf.node(i)
337 337 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
338 338 if n in mflinkrevs:
339 339 del mflinkrevs[n]
340 340 elif dir:
341 341 self._err(
342 342 lr,
343 343 _(b"%s not in parent-directory manifest") % short(n),
344 344 label,
345 345 )
346 346 else:
347 347 self._err(lr, _(b"%s not in changesets") % short(n), label)
348 348
349 349 try:
350 350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
351 351 for f, fn, fl in mfdelta.iterentries():
352 352 if not f:
353 353 self._err(lr, _(b"entry without name in manifest"))
354 354 elif f == b"/dev/null": # ignore this in very old repos
355 355 continue
356 356 fullpath = dir + _normpath(f)
357 357 if fl == b't':
358 358 if not match.visitdir(fullpath):
359 359 continue
360 360 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
361 361 fn, []
362 362 ).append(lr)
363 363 else:
364 364 if not match(fullpath):
365 365 continue
366 366 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
367 367 except Exception as inst:
368 368 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
369 369 if self._level >= VERIFY_FULL:
370 370 try:
371 371 # Various issues can affect manifest. So we read each full
372 372 # text from storage. This triggers the checks from the core
373 373 # code (eg: hash verification, filename are ordered, etc.)
374 374 mfdelta = mfl.get(dir, n).read()
375 375 except Exception as inst:
376 376 self._exc(
377 377 lr,
378 378 _(b"reading full manifest %s") % short(n),
379 379 inst,
380 380 label,
381 381 )
382 382
383 383 if not dir:
384 384 progress.complete()
385 385
386 386 if self.havemf:
387 387 # since we delete entry in `mflinkrevs` during iteration, any
388 388 # remaining entries are "missing". We need to issue errors for them.
389 389 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
390 390 for c, m in sorted(changesetpairs):
391 391 if dir:
392 392 self._err(
393 393 c,
394 394 _(
395 395 b"parent-directory manifest refers to unknown"
396 396 b" revision %s"
397 397 )
398 398 % short(m),
399 399 label,
400 400 )
401 401 else:
402 402 self._err(
403 403 c,
404 404 _(b"changeset refers to unknown revision %s")
405 405 % short(m),
406 406 label,
407 407 )
408 408
409 409 if not dir and subdirnodes:
410 410 self.ui.status(_(b"checking directory manifests\n"))
411 411 storefiles = set()
412 412 subdirs = set()
413 413 revlogv1 = self.revlogv1
414 414 for t, f, f2, size in repo.store.datafiles():
415 415 if not f:
416 416 self._err(None, _(b"cannot decode filename '%s'") % f2)
417 417 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
418 418 storefiles.add(_normpath(f))
419 419 subdirs.add(os.path.dirname(f))
420 420 subdirprogress = ui.makeprogress(
421 421 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
422 422 )
423 423
424 424 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
425 425 subdirfilenodes = self._verifymanifest(
426 426 linkrevs, subdir, storefiles, subdirprogress
427 427 )
428 428 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
429 429 filenodes.setdefault(f, {}).update(onefilenodes)
430 430
431 431 if not dir and subdirnodes:
432 432 assert subdirprogress is not None # help pytype
433 433 subdirprogress.complete()
434 434 if self.warnorphanstorefiles:
435 435 for f in sorted(storefiles):
436 436 self._warn(_(b"warning: orphan data file '%s'") % f)
437 437
438 438 return filenodes
439 439
440 440 def _crosscheckfiles(self, filelinkrevs, filenodes):
441 441 repo = self.repo
442 442 ui = self.ui
443 443 ui.status(_(b"crosschecking files in changesets and manifests\n"))
444 444
445 445 total = len(filelinkrevs) + len(filenodes)
446 446 progress = ui.makeprogress(
447 447 _(b'crosschecking'), unit=_(b'files'), total=total
448 448 )
449 449 if self.havemf:
450 450 for f in sorted(filelinkrevs):
451 451 progress.increment()
452 452 if f not in filenodes:
453 453 lr = filelinkrevs[f][0]
454 454 self._err(lr, _(b"in changeset but not in manifest"), f)
455 455
456 456 if self.havecl:
457 457 for f in sorted(filenodes):
458 458 progress.increment()
459 459 if f not in filelinkrevs:
460 460 try:
461 461 fl = repo.file(f)
462 462 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
463 463 except Exception:
464 464 lr = None
465 465 self._err(lr, _(b"in manifest but not in changeset"), f)
466 466
467 467 progress.complete()
468 468
469 469 def _verifyfiles(self, filenodes, filelinkrevs):
470 470 repo = self.repo
471 471 ui = self.ui
472 472 lrugetctx = self.lrugetctx
473 473 revlogv1 = self.revlogv1
474 474 havemf = self.havemf
475 475 ui.status(_(b"checking files\n"))
476 476
477 477 storefiles = set()
478 478 for rl_type, f, f2, size in repo.store.datafiles():
479 479 if not f:
480 480 self._err(None, _(b"cannot decode filename '%s'") % f2)
481 481 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
482 482 storefiles.add(_normpath(f))
483 483
484 484 state = {
485 485 # TODO this assumes revlog storage for changelog.
486 b'expectedversion': self.repo.changelog.version & 0xFFFF,
486 b'expectedversion': self.repo.changelog._format_version,
487 487 b'skipflags': self.skipflags,
488 488 # experimental config: censor.policy
489 489 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
490 490 }
491 491
492 492 files = sorted(set(filenodes) | set(filelinkrevs))
493 493 revisions = 0
494 494 progress = ui.makeprogress(
495 495 _(b'checking'), unit=_(b'files'), total=len(files)
496 496 )
497 497 for i, f in enumerate(files):
498 498 progress.update(i, item=f)
499 499 try:
500 500 linkrevs = filelinkrevs[f]
501 501 except KeyError:
502 502 # in manifest but not in changelog
503 503 linkrevs = []
504 504
505 505 if linkrevs:
506 506 lr = linkrevs[0]
507 507 else:
508 508 lr = None
509 509
510 510 try:
511 511 fl = repo.file(f)
512 512 except error.StorageError as e:
513 513 self._err(lr, _(b"broken revlog! (%s)") % e, f)
514 514 continue
515 515
516 516 for ff in fl.files():
517 517 try:
518 518 storefiles.remove(ff)
519 519 except KeyError:
520 520 if self.warnorphanstorefiles:
521 521 self._warn(
522 522 _(b" warning: revlog '%s' not in fncache!") % ff
523 523 )
524 524 self.fncachewarned = True
525 525
526 526 if not len(fl) and (self.havecl or self.havemf):
527 527 self._err(lr, _(b"empty or missing %s") % f)
528 528 else:
529 529 # Guard against implementations not setting this.
530 530 state[b'skipread'] = set()
531 531 state[b'safe_renamed'] = set()
532 532
533 533 for problem in fl.verifyintegrity(state):
534 534 if problem.node is not None:
535 535 linkrev = fl.linkrev(fl.rev(problem.node))
536 536 else:
537 537 linkrev = None
538 538
539 539 if problem.warning:
540 540 self._warn(problem.warning)
541 541 elif problem.error:
542 542 self._err(
543 543 linkrev if linkrev is not None else lr,
544 544 problem.error,
545 545 f,
546 546 )
547 547 else:
548 548 raise error.ProgrammingError(
549 549 b'problem instance does not set warning or error '
550 550 b'attribute: %s' % problem.msg
551 551 )
552 552
553 553 seen = {}
554 554 for i in fl:
555 555 revisions += 1
556 556 n = fl.node(i)
557 557 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
558 558 if f in filenodes:
559 559 if havemf and n not in filenodes[f]:
560 560 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
561 561 else:
562 562 del filenodes[f][n]
563 563
564 564 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
565 565 continue
566 566
567 567 # check renames
568 568 try:
569 569 # This requires resolving fulltext (at least on revlogs,
570 570 # though not with LFS revisions). We may want
571 571 # ``verifyintegrity()`` to pass a set of nodes with
572 572 # rename metadata as an optimization.
573 573 rp = fl.renamed(n)
574 574 if rp:
575 575 if lr is not None and ui.verbose:
576 576 ctx = lrugetctx(lr)
577 577 if not any(rp[0] in pctx for pctx in ctx.parents()):
578 578 self._warn(
579 579 _(
580 580 b"warning: copy source of '%s' not"
581 581 b" in parents of %s"
582 582 )
583 583 % (f, ctx)
584 584 )
585 585 fl2 = repo.file(rp[0])
586 586 if not len(fl2):
587 587 self._err(
588 588 lr,
589 589 _(
590 590 b"empty or missing copy source revlog "
591 591 b"%s:%s"
592 592 )
593 593 % (rp[0], short(rp[1])),
594 594 f,
595 595 )
596 596 elif rp[1] == self.repo.nullid:
597 597 ui.note(
598 598 _(
599 599 b"warning: %s@%s: copy source"
600 600 b" revision is nullid %s:%s\n"
601 601 )
602 602 % (f, lr, rp[0], short(rp[1]))
603 603 )
604 604 else:
605 605 fl2.rev(rp[1])
606 606 except Exception as inst:
607 607 self._exc(
608 608 lr, _(b"checking rename of %s") % short(n), inst, f
609 609 )
610 610
611 611 # cross-check
612 612 if f in filenodes:
613 613 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
614 614 for lr, node in sorted(fns):
615 615 self._err(
616 616 lr,
617 617 _(b"manifest refers to unknown revision %s")
618 618 % short(node),
619 619 f,
620 620 )
621 621 progress.complete()
622 622
623 623 if self.warnorphanstorefiles:
624 624 for f in sorted(storefiles):
625 625 self._warn(_(b"warning: orphan data file '%s'") % f)
626 626
627 627 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now